From 7463e50e328b620487cd1e63f0f17e177c8b37e0 Mon Sep 17 00:00:00 2001 From: "Matthew R. Ochs" Date: Mon, 6 Oct 2025 09:06:12 -0700 Subject: [PATCH 01/35] NVIDIA: SAUCE: Apply packaging changes for NVIDIA QEMU debian/changelog: - Updated version to 10.1.0+nvidia1 debian/control: debian/control-in: - Added dependency support for meson-1.5 (required by v10.1) - Added NVIDIA as maintainer debian/rules: - Removed pvrdma and cris/nios2 architectures (removed since v8.2) - Disabled firmware builds debian/qemu-system-common.install: - Remove obsolete files (removed since v8.2) - Added hw-uefi-vars.so (added since v8.2) debian/rules: - Removed pvrdma and cris/nios2 architectures (removed since v8.2) - Disabled firmware builds Signed-off-by: Matthew R. Ochs --- debian/changelog | 6 +++++ debian/control | 24 +++++++++-------- debian/control-in | 22 ++++++++------- debian/qemu-system-common.install | 9 ++++--- debian/rules | 45 +++++++++++++++---------------- 5 files changed, 59 insertions(+), 47 deletions(-) diff --git a/debian/changelog b/debian/changelog index b9a6df97bb..5142e32d18 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +qemu (1:10.1.0+nvidia1) noble; urgency=medium + + * Update to QEMU 10.1.0 upstream with NVIDIA support + + -- Matthew R. Ochs Tue, 07 Oct 2025 09:16:29 -0700 + qemu (1:8.2.2+ds-0ubuntu1) noble; urgency=medium * Merge version 8.2.2 from upstream. (LP: #2061005). Cherry-picks from diff --git a/debian/control b/debian/control index 133fddace2..28d67bead1 100644 --- a/debian/control +++ b/debian/control @@ -2,12 +2,12 @@ Source: qemu Section: otherosfs Priority: optional -Maintainer: Ubuntu Developers +Maintainer: NVIDIA BaseOS Team XSBC-Original-Maintainer: Debian QEMU Team Uploaders: Michael Tokarev Build-Depends: debhelper-compat (= 13), python3:any, - meson (>> 0.63.0~), ninja-build, + meson (>> 0.63.0~) | meson-1.5, ninja-build, flex, bison, Build-Depends-Arch: # In comments below we also specify (system-specific) arguments @@ -127,25 +127,27 @@ Build-Depends-Arch: # various firmware files (kvmvapic.bin &Co), older qemu-system-data should work #XXX-cyclic-test-dep-dak-bug qemu-system-data [amd64 arm arm64 armel armhf i386 mips mips64 mips64el mipsel powerpc powerpcspe ppc64 ppc64el riscv64 s390x sparc sparc64 x32] , Build-Depends-Indep: +# Firmware build dependencies disabled since we're not building firmware +# (sysdata-components commented out in debian/rules) # pc-bios/*.dts => *.dtb (PPC firmware) device-tree-compiler, - gcc-s390x-linux-gnu, +# gcc-s390x-linux-gnu, # qemu-palcode/palcode-clipper - gcc-alpha-linux-gnu, +# gcc-alpha-linux-gnu, # u-boot code - gcc-powerpc-linux-gnu, bc, +# gcc-powerpc-linux-gnu, bc, # skiboot firmware, openbios - gcc-powerpc64-linux-gnu, +# gcc-powerpc64-linux-gnu, # skiboot includes - libssl-dev, +# libssl-dev, # openbios - gcc-sparc64-linux-gnu, fcode-utils, xsltproc, +# gcc-sparc64-linux-gnu, fcode-utils, xsltproc, # hppa-firmware - gcc-hppa-linux-gnu, +# gcc-hppa-linux-gnu, # opensbi - gcc-riscv64-linux-gnu, +# gcc-riscv64-linux-gnu, # vbootrom/npcm7xx_bootrom - gcc-arm-none-eabi, +# gcc-arm-none-eabi, Build-Conflicts: oss4-dev Standards-Version: 4.6.1 Homepage: http://www.qemu.org/ diff --git a/debian/control-in b/debian/control-in index e4866159b7..79828861b1 100644 --- a/debian/control-in +++ b/debian/control-in @@ -7,7 +7,7 @@ Priority: optional Uploaders: Michael Tokarev Build-Depends: debhelper-compat (= 13), python3:any, - meson (>> 0.63.0~), ninja-build, + meson (>> 0.63.0~) | meson-1.5, ninja-build, flex, bison, Build-Depends-Arch: # In comments below we also specify (system-specific) arguments @@ -131,25 +131,27 @@ Build-Depends-Arch: # various firmware files (kvmvapic.bin &Co), older qemu-system-data should work #XXX-cyclic-test-dep-dak-bug qemu-system-data [:system-arch-linux:] , Build-Depends-Indep: +# Firmware build dependencies disabled since we're not building firmware +# (sysdata-components commented out in debian/rules) # pc-bios/*.dts => *.dtb (PPC firmware) device-tree-compiler, - gcc-s390x-linux-gnu, +# gcc-s390x-linux-gnu, # qemu-palcode/palcode-clipper - gcc-alpha-linux-gnu, +# gcc-alpha-linux-gnu, # u-boot code - gcc-powerpc-linux-gnu, bc, +# gcc-powerpc-linux-gnu, bc, # skiboot firmware, openbios - gcc-powerpc64-linux-gnu, +# gcc-powerpc64-linux-gnu, # skiboot includes - libssl-dev, +# libssl-dev, # openbios - gcc-sparc64-linux-gnu, fcode-utils, xsltproc, +# gcc-sparc64-linux-gnu, fcode-utils, xsltproc, # hppa-firmware - gcc-hppa-linux-gnu, +# gcc-hppa-linux-gnu, # opensbi - gcc-riscv64-linux-gnu, +# gcc-riscv64-linux-gnu, # vbootrom/npcm7xx_bootrom - gcc-arm-none-eabi, +# gcc-arm-none-eabi, Build-Conflicts: oss4-dev Standards-Version: 4.6.1 Homepage: http://www.qemu.org/ diff --git a/debian/qemu-system-common.install b/debian/qemu-system-common.install index 4755392bfa..a4a90b57df 100644 --- a/debian/qemu-system-common.install +++ b/debian/qemu-system-common.install @@ -11,11 +11,13 @@ usr/share/doc/qemu/system usr/share/doc/qemu-system-common # linux-specific usr/lib/qemu/qemu-bridge-helper -usr/lib/qemu/virtfs-proxy-helper -usr/share/man/man1/virtfs-proxy-helper.1 +# virtfs-proxy-helper removed in QEMU 9.0+ (replaced by vhost-user-fs) +# usr/lib/qemu/virtfs-proxy-helper +# usr/share/man/man1/virtfs-proxy-helper.1 # common modules. Other gui modules are in qemu-system-gui -usr/lib/${DEB_HOST_MULTIARCH}/qemu/accel-tcg-*.so +# accel-tcg-*.so modules no longer built as separate .so files in QEMU 10.1+ +# usr/lib/${DEB_HOST_MULTIARCH}/qemu/accel-tcg-*.so usr/lib/${DEB_HOST_MULTIARCH}/qemu/audio-alsa.so usr/lib/${DEB_HOST_MULTIARCH}/qemu/audio-oss.so usr/lib/${DEB_HOST_MULTIARCH}/qemu/chardev-baum.so @@ -26,5 +28,6 @@ usr/lib/${DEB_HOST_MULTIARCH}/qemu/hw-usb-host.so usr/lib/${DEB_HOST_MULTIARCH}/qemu/hw-usb-redirect.so usr/lib/${DEB_HOST_MULTIARCH}/qemu/hw-usb-smartcard.so usr/lib/${DEB_HOST_MULTIARCH}/qemu/hw-s390x-virtio-gpu-ccw.so +usr/lib/${DEB_HOST_MULTIARCH}/qemu/hw-uefi-vars.so usr/lib/${DEB_HOST_MULTIARCH}/qemu/ui-curses.so debian/qemu-kvm-init /usr/share/qemu/init diff --git a/debian/rules b/debian/rules index 56f07e9b5b..b8eabee0ff 100755 --- a/debian/rules +++ b/debian/rules @@ -108,11 +108,8 @@ common_configure_opts = \ # but is -k flag useful these days? common_configure_opts += --disable-xkbcommon -# pvrdma is an extension/optimisation for vmxnet3 vmware virtual network -# adapter. This piece of code seems to be buggy and poorly maintained, -# resulting in numerous security issues which comes unfixed for long time. -# This device isn't native for qemu. # Just disable it for now. -common_configure_opts += --disable-pvrdma +# pvrdma was removed in QEMU 9.1, no longer needs to be disabled +# (it was an extension for vmxnet3 vmware virtual network adapter) # Cross compiling support ifneq ($(DEB_BUILD_GNU_TYPE), $(DEB_HOST_GNU_TYPE)) @@ -162,10 +159,11 @@ system-kvmcpus-x86 = amd64 i386 system-kvmlink-amd64 = x86_64 system-kvmlink-i386 = x86_64 -system-archlist-misc = alpha avr cris hppa m68k loongarch64 \ - microblaze microblazeel nios2 or1k riscv32 riscv64 rx sh4 sh4eb \ +system-archlist-misc = alpha avr hppa m68k loongarch64 \ + microblaze microblazeel or1k riscv32 riscv64 rx sh4 sh4eb \ $(if ${system-s390x},,s390x) \ tricore xtensa xtensaeb +# Note: cris and nios2 removed in QEMU 9.1+ - system emulation targets no longer exist system-alias-loongarch64 = loong64 system-kvmcpus-misc = $(if ${system-s390x},,s390x) @@ -381,10 +379,11 @@ qemu-builds += $(if $(filter qemu-system-xen,${BUILD_PACKAGES}),xen) ############################## # list of linux-user targets, from configs/targets/*-linux-user.mak +# Note: cris and nios2 removed in QEMU 9.1+ user-targets = \ - aarch64 aarch64_be alpha arm armeb cris hexagon hppa i386 loongarch64 \ + aarch64 aarch64_be alpha arm armeb hexagon hppa i386 loongarch64 \ m68k microblaze microblazeel mips mips64 mips64el mipsel mipsn32 mipsn32el \ - nios2 or1k ppc ppc64 ppc64le riscv32 riscv64 \ + or1k ppc ppc64 ppc64le riscv32 riscv64 \ s390x sh4 sh4eb sparc sparc32plus sparc64 \ x86_64 xtensa xtensaeb # aliases for missing ${DEB_HOST_ARCH} names in qemu-user: @@ -572,7 +571,7 @@ install-openbios: build-openbios b/openbios/obj-sparc32/QEMU,tcx.bin \ b/openbios/obj-sparc32/QEMU,cgthree.bin \ b/openbios/obj-sparc64/QEMU,VGA.bin -sysdata-components += openbios +# sysdata-components += openbios ### powernv firmware in roms/skiboot build-skiboot: b/skiboot/skiboot.lid @@ -588,7 +587,7 @@ b/skiboot/skiboot.lid: | roms/skiboot/.version CROSS_COMPILE=${PPC64_CROSSPFX} V=${V} install-skiboot: b/skiboot/skiboot.lid install -m 0644 -t ${sysdataidir} $< -sysdata-components += skiboot +# sysdata-components += skiboot build-vof: b/vof/vof.bin b/vof/vof.bin: | b @@ -597,7 +596,7 @@ b/vof/vof.bin: | b ${MAKE} -C b/vof CROSS=${PPC64_CROSSPFX} SRC_DIR=../../pc-bios/vof -f../../pc-bios/vof/Makefile install-vof: b/vof/vof.bin install -m 0644 -t ${sysdataidir} $< -sysdata-components += vof +# sysdata-components += vof ### u-boot-e500 (u-boot.e500) build-u-boot-e500: b/u-boot/build-e500/u-boot @@ -608,7 +607,7 @@ b/u-boot/build-e500/u-boot: | b ${PPC_CROSSPFX}strip $@ install-u-boot-e500: b/u-boot/build-e500/u-boot install -m 0644 $< ${sysdataidir}/u-boot.e500 -sysdata-components += u-boot-e500 +# sysdata-components += u-boot-e500 ### u-boot-sam460 (u-boot-sam460-20100605.bin) build-u-boot-sam460: b/u-boot-sam460ex/u-boot.bin @@ -620,7 +619,7 @@ b/u-boot-sam460ex/u-boot.bin: | b # ${PPC_CROSSPFX}strip $@ install-u-boot-sam460: b/u-boot-sam460ex/u-boot.bin | ${sysdataidir} install -m 0644 $< ${sysdataidir}/u-boot-sam460-20100605.bin -sysdata-components += u-boot-sam460 +# sysdata-components += u-boot-sam460 ### x86 optionrom build-x86-optionrom: b/optionrom/built @@ -630,7 +629,7 @@ b/optionrom/built: touch $@ install-x86-optionrom: build-x86-optionrom | ${sysdataidir} ${MAKE} -f ${CURDIR}/debian/optionrom.mak -C b/optionrom SRC_PATH="${CURDIR}" install DESTDIR="${CURDIR}/${sysdataidir}" -sysdata-components += x86-optionrom +# sysdata-components += x86-optionrom ### qboot, aka bios-microvm build-qboot: b/qboot/bios.bin @@ -640,7 +639,7 @@ b/qboot/bios.bin: | b ninja -C b/qboot $(if $V,-v) install-qboot: b/qboot/bios.bin install -m 0644 $< ${sysdataidir}/qboot.rom -sysdata-components += qboot +# sysdata-components += qboot ### alpha firmware in roms/palcode-clipper build-palcode-clipper: b/qemu-palcode/palcode-clipper @@ -652,7 +651,7 @@ b/qemu-palcode/palcode-clipper: | b ${ALPHAEV67_CROSSPFX}strip b/qemu-palcode/palcode-clipper install-palcode-clipper: b/qemu-palcode/palcode-clipper install -m 0644 $< ${sysdataidir}/palcode-clipper -sysdata-components += palcode-clipper +# sysdata-components += palcode-clipper ### SLOF build-slof: b/SLOF/boot_rom.bin @@ -661,7 +660,7 @@ b/SLOF/boot_rom.bin: | b env -u LDFLAGS -u CFLAGS $(MAKE) -C b/SLOF qemu CROSS=${PPC64_CROSSPFX} V=${V} install-slof: b/SLOF/boot_rom.bin install -m 0644 $< ${sysdataidir}/slof.bin -sysdata-components += slof +# sysdata-components += slof ### s390x firmware in pc-bios/s390-ccw build-s390x-fw: b/s390fw/built @@ -671,7 +670,7 @@ b/s390fw/built: touch $@ install-s390x-fw: build-s390x-fw install -m 0644 -t ${sysdataidir} b/s390fw/s390*.img -sysdata-components += s390x-fw +# sysdata-components += s390x-fw ### hppa-firmware (roms/seabios-hppa) build-hppa-fw: b/hppafw/hppa-firmware.img @@ -683,7 +682,7 @@ b/hppafw/hppa-firmware.img: hppa-linux-gnu-strip -R.note -R.comment $@ install-hppa-fw: b/hppafw/hppa-firmware.img install -m 0644 $< ${sysdataidir} -sysdata-components += hppa-fw +# sysdata-components += hppa-fw ### opensbi (riscv firmware) # we only build v64 variants, not v32 @@ -696,7 +695,7 @@ b/opensbi/.built: install-opensbi: build-opensbi install -m 0644 b/opensbi/platform/generic/firmware/fw_dynamic.bin ${sysdataidir}/opensbi-riscv64-generic-fw_dynamic.bin install -m 0644 b/opensbi/platform/generic/firmware/fw_dynamic.elf ${sysdataidir}/opensbi-riscv64-generic-fw_dynamic.elf -sysdata-components += opensbi +# sysdata-components += opensbi ### vbootrom (npcm7xx) build-vbootrom: b/vbootrom/.built @@ -706,7 +705,7 @@ b/vbootrom/.built: | b touch $@ install-vbootrom: build-vbootrom install -m 0644 b/vbootrom/npcm7xx_bootrom.bin ${sysdataidir}/ -sysdata-components += vbootrom +# sysdata-components += vbootrom ### misc firmware build-misc: b/misc/.built @@ -732,7 +731,7 @@ install-misc: build-misc debian/qemu-system-data/usr/share/icons/hicolor/32x32/apps/qemu.bmp install -Dp -m0644 -t debian/qemu-system-data/usr/share/qemu/keymaps/ \ $$(ls -1 pc-bios/keymaps/* | fgrep -v /meson.build) -sysdata-components += misc +# sysdata-components += misc ${sysdataidir}: mkdir -p -m 0755 $@ From 05c03e71ef334fe75d816ec6516400aa5856393d Mon Sep 17 00:00:00 2001 From: "Matthew R. Ochs" Date: Tue, 7 Oct 2025 15:30:06 -0700 Subject: [PATCH 02/35] NVIDIA: SAUCE: Indicate this is the -unstable branch of NVIDIA QEMU Add -unstable to version string. Signed-off-by: Matthew R. Ochs --- debian/changelog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/changelog b/debian/changelog index 5142e32d18..c8134623ce 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,4 @@ -qemu (1:10.1.0+nvidia1) noble; urgency=medium +qemu (1:10.1.0+nvidia1-unstable) noble; urgency=medium * Update to QEMU 10.1.0 upstream with NVIDIA support From ec4fb6837cdf6c337520119730de0edfd3eb6ddc Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 13 May 2022 09:08:54 +0100 Subject: [PATCH 03/35] NVIDIA: SAUCE: NOMERGE: Add KVM Arm RME definitions to Linux headers Copy the KVM definitions for Arm RME from the development branch. Don't merge, they will be added from the periodic Linux header sync. Signed-off-by: Jean-Philippe Brucker (cherry picked from commit 963d576c9bfe96c0a31b0313f47dc62fc993e92d https://git.codelinaro.org/linaro/dcap/qemu.git) [ianm: context adjustment for kpset 6.16 commit 052233c47d66f0 - value change of KVM_CAP_ARM_RME commit] Signed-off-by: Ian May --- linux-headers/asm-arm64/kvm.h | 49 +++++++++++++++++++++++++++++++++++ linux-headers/linux/kvm.h | 29 ++++++++++++++++++--- 2 files changed, 74 insertions(+), 4 deletions(-) diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h index f4d9baafa1..91ab0f2763 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h @@ -106,6 +106,7 @@ struct kvm_regs { #define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ #define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */ #define KVM_ARM_VCPU_HAS_EL2_E2H0 8 /* Limit NV support to E2H RES0 */ +#define KVM_ARM_VCPU_REC 9 /* VCPU REC state as part of Realm */ struct kvm_vcpu_init { __u32 target; @@ -417,6 +418,54 @@ enum { #define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3 #define KVM_DEV_ARM_ITS_CTRL_RESET 4 +/* KVM_CAP_ARM_RME on VM fd */ +#define KVM_CAP_ARM_RME_CONFIG_REALM 0 +#define KVM_CAP_ARM_RME_CREATE_REALM 1 +#define KVM_CAP_ARM_RME_INIT_RIPAS_REALM 2 +#define KVM_CAP_ARM_RME_POPULATE_REALM 3 +#define KVM_CAP_ARM_RME_ACTIVATE_REALM 4 + +/* List of configuration items accepted for KVM_CAP_ARM_RME_CONFIG_REALM */ +#define ARM_RME_CONFIG_RPV 0 +#define ARM_RME_CONFIG_HASH_ALGO 1 + +#define ARM_RME_CONFIG_HASH_ALGO_SHA256 0 +#define ARM_RME_CONFIG_HASH_ALGO_SHA512 1 + +#define ARM_RME_CONFIG_RPV_SIZE 64 + +struct arm_rme_config { + __u32 cfg; + union { + /* cfg == ARM_RME_CONFIG_RPV */ + struct { + __u8 rpv[ARM_RME_CONFIG_RPV_SIZE]; + }; + + /* cfg == ARM_RME_CONFIG_HASH_ALGO */ + struct { + __u32 hash_algo; + }; + + /* Fix the size of the union */ + __u8 reserved[256]; + }; +}; + +#define KVM_ARM_RME_POPULATE_FLAGS_MEASURE (1 << 0) +struct arm_rme_populate_realm { + __u64 base; + __u64 size; + __u32 flags; + __u32 reserved[3]; +}; + +struct arm_rme_init_ripas { + __u64 base; + __u64 size; + __u64 reserved[2]; +}; + /* Device Control API on vcpu fd */ #define KVM_ARM_VCPU_PMU_V3_CTRL 0 #define KVM_ARM_VCPU_PMU_V3_IRQ 0 diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 32c5885a3c..81ff244d5f 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -663,14 +663,25 @@ struct kvm_enable_cap { #define KVM_S390_SIE_PAGE_OFFSET 1 /* - * On arm64, machine type can be used to request the physical - * address size for the VM. Bits[7-0] are reserved for the guest - * PA size shift (i.e, log2(PA_Size)). For backward compatibility, - * value 0 implies the default IPA size, 40bits. + * On arm64, machine type can be used to request both the machine type and + * the physical address size for the VM. + * + * Bits[11-8] are reserved for the ARM specific machine type. + * + * Bits[7-0] are reserved for the guest PA size shift (i.e, log2(PA_Size)). + * For backward compatibility, value 0 implies the default IPA size, 40bits. */ +#define KVM_VM_TYPE_ARM_SHIFT 8 +#define KVM_VM_TYPE_ARM_MASK (0xfULL << KVM_VM_TYPE_ARM_SHIFT) +#define KVM_VM_TYPE_ARM(_type) \ + (((_type) << KVM_VM_TYPE_ARM_SHIFT) & KVM_VM_TYPE_ARM_MASK) +#define KVM_VM_TYPE_ARM_NORMAL KVM_VM_TYPE_ARM(0) +#define KVM_VM_TYPE_ARM_REALM KVM_VM_TYPE_ARM(1) + #define KVM_VM_TYPE_ARM_IPA_SIZE_MASK 0xffULL #define KVM_VM_TYPE_ARM_IPA_SIZE(x) \ ((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK) + /* * ioctls for /dev/kvm fds: */ @@ -952,6 +963,7 @@ struct kvm_enable_cap { #define KVM_CAP_ARM_EL2 240 #define KVM_CAP_ARM_EL2_E2H0 241 #define KVM_CAP_RISCV_MP_STATE_RESET 242 +#define KVM_CAP_ARM_RME 243 struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1604,4 +1616,13 @@ struct kvm_pre_fault_memory { __u64 padding[5]; }; +/* Available with KVM_CAP_ARM_RME, only for VMs with KVM_VM_TYPE_ARM_REALM */ +struct kvm_arm_rmm_psci_complete { + __u64 target_mpidr; + __u32 psci_status; + __u32 padding[3]; +}; + +#define KVM_ARM_VCPU_RMM_PSCI_COMPLETE _IOW(KVMIO, 0xd6, struct kvm_arm_rmm_psci_complete) + #endif /* __LINUX_KVM_H */ From 6e91ce1fc8a9b09ce04882de4f5cf8046a8fb3ec Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 4 Dec 2024 15:34:28 +0000 Subject: [PATCH 04/35] NVIDIA: SAUCE: kvm: Use kvm_vm_check_extension() where necessary The Arm KVM code can return different values from KVM_CHECK_EXTENSION depending on the VM type. Use kvm_vm_check_extension() where necessary to ensure we get the right response from KVM. Signed-off-by: Jean-Philippe Brucker (cherry picked from commit 8fe082b9c572e541d2bd07aa82f4f6684948b913 https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- accel/kvm/kvm-all.c | 4 ++-- target/arm/kvm.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 890d5ea9f8..f67814f0cd 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -2479,7 +2479,7 @@ static int kvm_max_vcpus(KVMState *s) static int kvm_max_vcpu_id(KVMState *s) { - int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPU_ID); + int ret = kvm_vm_check_extension(s, KVM_CAP_MAX_VCPU_ID); return (ret) ? ret : kvm_max_vcpus(s); } @@ -2750,7 +2750,7 @@ static int kvm_init(AccelState *as, MachineState *ms) #ifdef TARGET_KVM_HAVE_GUEST_DEBUG kvm_has_guest_debug = - (kvm_check_extension(s, KVM_CAP_SET_GUEST_DEBUG) > 0); + (kvm_vm_check_extension(s, KVM_CAP_SET_GUEST_DEBUG) > 0); #endif kvm_sstep_flags = 0; diff --git a/target/arm/kvm.c b/target/arm/kvm.c index c78d0d59bb..77c171054d 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -586,11 +586,11 @@ int kvm_arch_init(MachineState *ms, KVMState *s) } } - max_hw_wps = kvm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_WPS); + max_hw_wps = kvm_vm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_WPS); hw_watchpoints = g_array_sized_new(true, true, sizeof(HWWatchpoint), max_hw_wps); - max_hw_bps = kvm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_BPS); + max_hw_bps = kvm_vm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_BPS); hw_breakpoints = g_array_sized_new(true, true, sizeof(HWBreakpoint), max_hw_bps); @@ -1743,7 +1743,7 @@ void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa) void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp) { - bool has_steal_time = kvm_check_extension(kvm_state, KVM_CAP_STEAL_TIME); + bool has_steal_time = kvm_vm_check_extension(kvm_state, KVM_CAP_STEAL_TIME); if (cpu->kvm_steal_time == ON_OFF_AUTO_AUTO) { if (!has_steal_time || !arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { @@ -1783,7 +1783,7 @@ bool kvm_arm_el2_supported(void) bool kvm_arm_sve_supported(void) { - return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE); + return kvm_vm_check_extension(kvm_state, KVM_CAP_ARM_SVE); } bool kvm_arm_mte_supported(void) From a9ad578d0ec8357cb6c1d484b48b99eb34d55239 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 16 Jun 2022 18:24:55 +0100 Subject: [PATCH 05/35] NVIDIA: SAUCE: target/arm: Add confidential guest support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new RmeGuest object, inheriting from ConfidentialGuestSupport, to support the Arm Realm Management Extension (RME). It is instantiated by passing on the command-line: -M virt,confidential-guest-support= -object rme-guest,id=[,options...] This is only the skeleton. Support will be added in following patches. Cc: Eric Blake Cc: Markus Armbruster Cc: Daniel P. Berrangé Cc: Eduardo Habkost Acked-by: Markus Armbruster Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Signed-off-by: Jean-Philippe Brucker (cherry picked from commit 5ecb9f465e30870eb44937851a57ac95e345eba6 https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- docs/system/confidential-guest-support.rst | 1 + qapi/qom.json | 1 + target/arm/kvm-rme.c | 40 ++++++++++++++++++++++ target/arm/meson.build | 6 +++- 4 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 target/arm/kvm-rme.c diff --git a/docs/system/confidential-guest-support.rst b/docs/system/confidential-guest-support.rst index 66129fbab6..d97d8cac92 100644 --- a/docs/system/confidential-guest-support.rst +++ b/docs/system/confidential-guest-support.rst @@ -41,5 +41,6 @@ Currently supported confidential guest mechanisms are: * Intel Trust Domain Extension (TDX) (see :doc:`i386/tdx`) * POWER Protected Execution Facility (PEF) (see :ref:`power-papr-protected-execution-facility-pef`) * s390x Protected Virtualization (PV) (see :doc:`s390x/protvirt`) +* Arm Realm Management Extension (RME) Other mechanisms may be supported in future. diff --git a/qapi/qom.json b/qapi/qom.json index 6f619f92bf..42de1e489b 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -1240,6 +1240,7 @@ { 'name': 'pr-manager-helper', 'if': 'CONFIG_LINUX' }, 'qtest', + 'rme-guest', 'rng-builtin', 'rng-egd', { 'name': 'rng-random', diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c new file mode 100644 index 0000000000..a6f06f3aaf --- /dev/null +++ b/target/arm/kvm-rme.c @@ -0,0 +1,40 @@ +/* + * QEMU Arm RME support + * + * Copyright Linaro 2024 + */ + +#include "qemu/osdep.h" + +#include "hw/boards.h" +#include "hw/core/cpu.h" +#include "kvm_arm.h" +#include "migration/blocker.h" +#include "qapi/error.h" +#include "qom/object_interfaces.h" +#include "system/confidential-guest-support.h" +#include "system/kvm.h" +#include "system/runstate.h" + +#define TYPE_RME_GUEST "rme-guest" +OBJECT_DECLARE_SIMPLE_TYPE(RmeGuest, RME_GUEST) + +struct RmeGuest { + ConfidentialGuestSupport parent_obj; +}; + +OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RmeGuest, rme_guest, RME_GUEST, + CONFIDENTIAL_GUEST_SUPPORT, + { TYPE_USER_CREATABLE }, { }) + +static void rme_guest_class_init(ObjectClass *oc, const void *data) +{ +} + +static void rme_guest_init(Object *obj) +{ +} + +static void rme_guest_finalize(Object *obj) +{ +} diff --git a/target/arm/meson.build b/target/arm/meson.build index 07d9271aa4..6eab1c0385 100644 --- a/target/arm/meson.build +++ b/target/arm/meson.build @@ -13,7 +13,11 @@ arm_common_system_ss = ss.source_set() arm_system_ss.add(files( 'arm-qmp-cmds.c', )) -arm_system_ss.add(when: 'CONFIG_KVM', if_true: files('hyp_gdbstub.c', 'kvm.c')) +arm_system_ss.add(when: 'CONFIG_KVM', + if_true: files( + 'hyp_gdbstub.c', + 'kvm.c', + 'kvm-rme.c')) arm_system_ss.add(when: 'CONFIG_HVF', if_true: files('hyp_gdbstub.c')) arm_user_ss = ss.source_set() From 637c6ce5316257f7323a2fa192e611d0560bf4f2 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 21 Feb 2024 15:50:42 +0000 Subject: [PATCH 06/35] NVIDIA: SAUCE: target/arm/kvm: Return immediately on error in kvm_arch_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Returning an error to kvm_init() is fatal anyway, no need to continue the initialization. Leave the `ret` variable in the function scope because it will be reused when adding RME support. Signed-off-by: Jean-Philippe Brucker Reviewed-by: Philippe Mathieu-Daudé (cherry picked from commit c9c713179a95192a9212f06f693f1e34af9822cc https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- target/arm/kvm.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 77c171054d..8ac5c6f2d0 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -531,7 +531,7 @@ int kvm_arch_get_default_type(MachineState *ms) int kvm_arch_init(MachineState *ms, KVMState *s) { - int ret = 0; + int ret; /* For ARM interrupt delivery is always asynchronous, * whether we are using an in-kernel VGIC or not. */ @@ -553,7 +553,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) !kvm_check_extension(s, KVM_CAP_ARM_IRQ_LINE_LAYOUT_2)) { error_report("Using more than 256 vcpus requires a host kernel " "with KVM_CAP_ARM_IRQ_LINE_LAYOUT_2"); - ret = -EINVAL; + return -EINVAL; } if (kvm_check_extension(s, KVM_CAP_ARM_NISV_TO_USER)) { @@ -575,13 +575,14 @@ int kvm_arch_init(MachineState *ms, KVMState *s) warn_report("Eager Page Split support not available"); } else if (!(s->kvm_eager_split_size & sizes)) { error_report("Eager Page Split requested chunk size not valid"); - ret = -EINVAL; + return -EINVAL; } else { ret = kvm_vm_enable_cap(s, KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE, 0, s->kvm_eager_split_size); if (ret < 0) { error_report("Enabling of Eager Page Split failed: %s", strerror(-ret)); + return ret; } } } @@ -594,7 +595,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) hw_breakpoints = g_array_sized_new(true, true, sizeof(HWBreakpoint), max_hw_bps); - return ret; + return 0; } unsigned long kvm_arch_vcpu_id(CPUState *cpu) From f393859c0fa64c4c5fca81609b8040700f9edfff Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 9 Jan 2023 10:45:27 +0000 Subject: [PATCH 07/35] NVIDIA: SAUCE: target/arm/kvm-rme: Initialize realm The machine code calls kvm_arm_rme_vm_type() to get the VM flag and KVM calls kvm_arm_rme_init() to prepare for launching a Realm. Once VM creation is complete, create the Realm: * Create the realm descriptor, * load images into Realm RAM (in another patch), * finalize the REC (vCPU) after the registers are reset, * activate the realm, at which point the realm is sealed. Signed-off-by: Jean-Philippe Brucker (cherry picked from commit bc8e78745236a1dfe5a930d1a63ca977baeb9afb https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- target/arm/kvm-rme.c | 106 ++++++++++++++++++++++++++++++++++++++++++ target/arm/kvm-stub.c | 15 ++++++ target/arm/kvm.c | 9 +++- target/arm/kvm_arm.h | 24 ++++++++++ 4 files changed, 152 insertions(+), 2 deletions(-) diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c index a6f06f3aaf..b447be0624 100644 --- a/target/arm/kvm-rme.c +++ b/target/arm/kvm-rme.c @@ -11,6 +11,7 @@ #include "kvm_arm.h" #include "migration/blocker.h" #include "qapi/error.h" +#include "qemu/error-report.h" #include "qom/object_interfaces.h" #include "system/confidential-guest-support.h" #include "system/kvm.h" @@ -27,14 +28,119 @@ OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RmeGuest, rme_guest, RME_GUEST, CONFIDENTIAL_GUEST_SUPPORT, { TYPE_USER_CREATABLE }, { }) +static RmeGuest *rme_guest; + +static int rme_init_cpus(Error **errp) +{ + int ret; + CPUState *cs; + + /* + * Now that do_cpu_reset() initialized the boot PC and + * kvm_cpu_synchronize_post_reset() registered it, we can finalize the REC. + */ + CPU_FOREACH(cs) { + ret = kvm_arm_vcpu_finalize(ARM_CPU(cs), KVM_ARM_VCPU_REC); + if (ret) { + error_setg_errno(errp, -ret, "failed to finalize vCPU"); + return ret; + } + } + return 0; +} + +static int rme_create_realm(Error **errp) +{ + int ret; + + ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, + KVM_CAP_ARM_RME_CREATE_REALM); + if (ret) { + error_setg_errno(errp, -ret, "failed to create Realm Descriptor"); + return -1; + } + + if (rme_init_cpus(errp)) { + return -1; + } + + ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, + KVM_CAP_ARM_RME_ACTIVATE_REALM); + if (ret) { + error_setg_errno(errp, -ret, "failed to activate realm"); + return -1; + } + + kvm_mark_guest_state_protected(); + return 0; +} + +static void rme_vm_state_change(void *opaque, bool running, RunState state) +{ + Error *err = NULL; + + if (!running) { + return; + } + + if (rme_create_realm(&err)) { + error_propagate_prepend(&error_fatal, err, "RME: "); + } +} + static void rme_guest_class_init(ObjectClass *oc, const void *data) { } static void rme_guest_init(Object *obj) { + if (rme_guest) { + error_report("a single instance of RmeGuest is supported"); + exit(1); + } + rme_guest = RME_GUEST(obj); } static void rme_guest_finalize(Object *obj) { } + +int kvm_arm_rme_init(MachineState *ms) +{ + static Error *rme_mig_blocker; + ConfidentialGuestSupport *cgs = ms->cgs; + + if (!rme_guest) { + return 0; + } + + if (!cgs) { + error_report("missing -machine confidential-guest-support parameter"); + return -EINVAL; + } + + if (!kvm_check_extension(kvm_state, KVM_CAP_ARM_RME)) { + return -ENODEV; + } + + error_setg(&rme_mig_blocker, "RME: migration is not implemented"); + migrate_add_blocker(&rme_mig_blocker, &error_fatal); + + /* + * The realm activation is done last, when the VM starts, after all images + * have been loaded and all vcpus finalized. + */ + qemu_add_vm_change_state_handler(rme_vm_state_change, NULL); + + cgs->require_guest_memfd = true; + cgs->ready = true; + return 0; +} + +int kvm_arm_rme_vm_type(MachineState *ms) +{ + if (rme_guest) { + return KVM_VM_TYPE_ARM_REALM; + } + return 0; +} diff --git a/target/arm/kvm-stub.c b/target/arm/kvm-stub.c index c93462c5b9..e828bd4908 100644 --- a/target/arm/kvm-stub.c +++ b/target/arm/kvm-stub.c @@ -65,6 +65,11 @@ void kvm_arm_add_vcpu_properties(ARMCPU *cpu) g_assert_not_reached(); } +int kvm_arm_vcpu_finalize(ARMCPU *cpu, int feature) +{ + g_assert_not_reached(); +} + int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa) { g_assert_not_reached(); @@ -124,3 +129,13 @@ bool kvm_arm_cpu_post_load(ARMCPU *cpu) { g_assert_not_reached(); } + +int kvm_arm_rme_init(MachineState *ms) +{ + g_assert_not_reached(); +} + +int kvm_arm_rme_vm_type(MachineState *ms) +{ + g_assert_not_reached(); +} diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 8ac5c6f2d0..c9ffcbde4b 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -96,7 +96,7 @@ static int kvm_arm_vcpu_init(ARMCPU *cpu) * * Returns: 0 if success else < 0 error code */ -static int kvm_arm_vcpu_finalize(ARMCPU *cpu, int feature) +int kvm_arm_vcpu_finalize(ARMCPU *cpu, int feature) { return kvm_vcpu_ioctl(CPU(cpu), KVM_ARM_VCPU_FINALIZE, &feature); } @@ -595,7 +595,12 @@ int kvm_arch_init(MachineState *ms, KVMState *s) hw_breakpoints = g_array_sized_new(true, true, sizeof(HWBreakpoint), max_hw_bps); - return 0; + ret = kvm_arm_rme_init(ms); + if (ret) { + error_report("Failed to enable RME: %s", strerror(-ret)); + } + + return ret; } unsigned long kvm_arch_vcpu_id(CPUState *cpu) diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h index 6a9b6374a6..38c81cc3b3 100644 --- a/target/arm/kvm_arm.h +++ b/target/arm/kvm_arm.h @@ -151,6 +151,14 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu); */ void kvm_arm_add_vcpu_properties(ARMCPU *cpu); +/** + * @cpu: The CPU object to finalize + * @feature: a KVM_ARM_VCPU_* feature + * + * Finalize the configuration of the given vcpu feature. + */ +int kvm_arm_vcpu_finalize(ARMCPU *cpu, int feature); + /** * kvm_arm_steal_time_finalize: * @cpu: ARMCPU for which to finalize kvm-steal-time @@ -263,4 +271,20 @@ void kvm_arm_enable_mte(Object *cpuobj, Error **errp); void arm_cpu_kvm_set_irq(void *arm_cpu, int irq, int level); +/** + * kvm_arm_rme_init + * @ms: the machine state + * + * Prepare the machine to be a Realm, if the user enabled it. + */ +int kvm_arm_rme_init(MachineState *ms); + +/** + * kvm_arm_rme_vm_type + * @ms: the machine state + * + * Returns the Realm KVM VM type if the user requested a Realm, 0 otherwise. + */ +int kvm_arm_rme_vm_type(MachineState *ms); + #endif From 69877fb29d791974f90631501473076ea720f54a Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 21 Jun 2022 11:52:14 +0100 Subject: [PATCH 08/35] NVIDIA: SAUCE: target/arm/kvm: Split kvm_arch_get/put_registers The confidential guest support in KVM limits the number of registers that we can read and write. Split the get/put_registers function to prepare for it. Signed-off-by: Jean-Philippe Brucker (cherry picked from commit 845397aa4e9727845226a13b21eebc5462cabb8e https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- target/arm/kvm.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/target/arm/kvm.c b/target/arm/kvm.c index c9ffcbde4b..41d692dd01 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -2053,7 +2053,7 @@ static int kvm_arch_put_sve(CPUState *cs) return 0; } -int kvm_arch_put_registers(CPUState *cs, int level, Error **errp) +static int kvm_arm_put_core_regs(CPUState *cs, int level, Error **errp) { uint64_t val; uint32_t fpr; @@ -2156,6 +2156,19 @@ int kvm_arch_put_registers(CPUState *cs, int level, Error **errp) return ret; } + return 0; +} + +int kvm_arch_put_registers(CPUState *cs, int level, Error **errp) +{ + int ret; + ARMCPU *cpu = ARM_CPU(cs); + + ret = kvm_arm_put_core_regs(cs, level, errp); + if (ret) { + return ret; + } + write_cpustate_to_list(cpu, true); if (!write_list_to_kvmstate(cpu, level)) { @@ -2237,7 +2250,7 @@ static int kvm_arch_get_sve(CPUState *cs) return 0; } -int kvm_arch_get_registers(CPUState *cs, Error **errp) +static int kvm_arm_get_core_regs(CPUState *cs, Error **errp) { uint64_t val; unsigned int el; @@ -2340,6 +2353,19 @@ int kvm_arch_get_registers(CPUState *cs, Error **errp) } vfp_set_fpcr(env, fpr); + return 0; +} + +int kvm_arch_get_registers(CPUState *cs, Error **errp) +{ + int ret; + ARMCPU *cpu = ARM_CPU(cs); + + ret = kvm_arm_get_core_regs(cs, errp); + if (ret) { + return ret; + } + ret = kvm_get_vcpu_events(cpu); if (ret) { return ret; From e6137dbca3aeeea020fcf70be6c700d39fe472ef Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 9 Jan 2023 10:55:32 +0000 Subject: [PATCH 09/35] NVIDIA: SAUCE: target/arm/kvm-rme: Initialize vCPU The target code calls kvm_arm_vcpu_init() to mark the vCPU as part of a Realm. For a Realm vCPU, only x0-x7 can be set at runtime. Before boot, the PC can also be set, and is ignored at runtime. KVM also accepts a few system register changes during initial configuration, as returned by KVM_GET_REG_LIST. Signed-off-by: Jean-Philippe Brucker (cherry picked from commit 0c18ee1e332788d1935c587585bd78dbda2d3fee https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- target/arm/cpu.h | 3 +++ target/arm/kvm-rme.c | 11 +++++++++ target/arm/kvm-stub.c | 5 ++++ target/arm/kvm.c | 53 +++++++++++++++++++++++++++++++++++++++++++ target/arm/kvm_arm.h | 10 ++++++++ 5 files changed, 82 insertions(+) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index dc9b6dce4c..3abd921d52 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -1029,6 +1029,9 @@ struct ArchCPU { /* KVM steal time */ OnOffAuto kvm_steal_time; + /* Realm Management Extension */ + bool kvm_rme; + /* Uniprocessor system with MP extensions */ bool mp_is_up; diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c index b447be0624..921b1aa8ce 100644 --- a/target/arm/kvm-rme.c +++ b/target/arm/kvm-rme.c @@ -137,6 +137,17 @@ int kvm_arm_rme_init(MachineState *ms) return 0; } +int kvm_arm_rme_vcpu_init(CPUState *cs) +{ + ARMCPU *cpu = ARM_CPU(cs); + + if (rme_guest) { + cpu->kvm_rme = true; + cpu->kvm_init_features[0] |= (1 << KVM_ARM_VCPU_REC); + } + return 0; +} + int kvm_arm_rme_vm_type(MachineState *ms) { if (rme_guest) { diff --git a/target/arm/kvm-stub.c b/target/arm/kvm-stub.c index e828bd4908..71dc5d404a 100644 --- a/target/arm/kvm-stub.c +++ b/target/arm/kvm-stub.c @@ -139,3 +139,8 @@ int kvm_arm_rme_vm_type(MachineState *ms) { g_assert_not_reached(); } + +int kvm_arm_rme_vcpu_init(CPUState *cs) +{ + g_assert_not_reached(); +} diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 41d692dd01..67fd60f1c9 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -1911,6 +1911,11 @@ int kvm_arch_init_vcpu(CPUState *cs) cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_HAS_EL2; } + ret = kvm_arm_rme_vcpu_init(cs); + if (ret) { + return ret; + } + /* Do KVM_ARM_VCPU_INIT ioctl */ ret = kvm_arm_vcpu_init(cpu); if (ret) { @@ -2053,6 +2058,29 @@ static int kvm_arch_put_sve(CPUState *cs) return 0; } +static int kvm_arm_rme_put_core_regs(CPUState *cs, Error **errp) +{ + int i, ret; + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + /* The RME ABI only allows us to set 8 GPRs and the PC */ + for (i = 0; i < 8; i++) { + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]), + &env->xregs[i]); + if (ret) { + return ret; + } + } + + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.pc), &env->pc); + if (ret) { + return ret; + } + + return 0; +} + static int kvm_arm_put_core_regs(CPUState *cs, int level, Error **errp) { uint64_t val; @@ -2063,6 +2091,10 @@ static int kvm_arm_put_core_regs(CPUState *cs, int level, Error **errp) ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; + if (cpu->kvm_rme) { + return kvm_arm_rme_put_core_regs(cs, errp); + } + /* If we are in AArch32 mode then we need to copy the AArch32 regs to the * AArch64 registers before pushing them out to 64-bit KVM. */ @@ -2250,6 +2282,23 @@ static int kvm_arch_get_sve(CPUState *cs) return 0; } +static int kvm_arm_rme_get_core_regs(CPUState *cs, Error **errp) +{ + int i, ret; + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + for (i = 0; i < 8; i++) { + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]), + &env->xregs[i]); + if (ret) { + return ret; + } + } + + return 0; +} + static int kvm_arm_get_core_regs(CPUState *cs, Error **errp) { uint64_t val; @@ -2260,6 +2309,10 @@ static int kvm_arm_get_core_regs(CPUState *cs, Error **errp) ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; + if (cpu->kvm_rme) { + return kvm_arm_rme_get_core_regs(cs, errp); + } + for (i = 0; i < 31; i++) { ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]), &env->xregs[i]); diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h index 38c81cc3b3..62d691888a 100644 --- a/target/arm/kvm_arm.h +++ b/target/arm/kvm_arm.h @@ -287,4 +287,14 @@ int kvm_arm_rme_init(MachineState *ms); */ int kvm_arm_rme_vm_type(MachineState *ms); +/** + * kvm_arm_rme_vcpu_init + * @cs: the CPU + * + * If the user requested a Realm, setup the given vCPU accordingly. Realm vCPUs + * behave a little differently, for example most of their register state is + * hidden from the host. + */ +int kvm_arm_rme_vcpu_init(CPUState *cs); + #endif From 25fd1683ca5a0b038ab5d911ed993b8729b37645 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 4 Dec 2023 18:48:36 +0000 Subject: [PATCH 10/35] NVIDIA: SAUCE: target/arm/kvm: Create scratch VM as Realm if necessary Some ID registers have a different value for a Realm VM, for example ID_AA64DFR0_EL1 contains the number of breakpoints/watchpoints implemented by RMM instead of the hardware. Even though RMM is in charge of setting up most Realm registers, KVM still provides GET_ONE_REG interface on a Realm VM to probe the VM's capabilities. Signed-off-by: Jean-Philippe Brucker (cherry picked from commit 35c8e7cb786f3701367fe29dd344c89931bb91d0 https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- target/arm/kvm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 67fd60f1c9..14ecced4db 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -106,6 +106,7 @@ bool kvm_arm_create_scratch_host_vcpu(int *fdarray, { int ret = 0, kvmfd = -1, vmfd = -1, cpufd = -1; int max_vm_pa_size; + int vm_type; kvmfd = qemu_open_old("/dev/kvm", O_RDWR); if (kvmfd < 0) { @@ -115,8 +116,9 @@ bool kvm_arm_create_scratch_host_vcpu(int *fdarray, if (max_vm_pa_size < 0) { max_vm_pa_size = 0; } + vm_type = kvm_arm_rme_vm_type(MACHINE(qdev_get_machine())); do { - vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size); + vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size | vm_type); } while (vmfd == -1 && errno == EINTR); if (vmfd < 0) { goto err; From 9d6dd3eb0832d5ffe60c2a3864ddb536db7f5ae3 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 13 Jun 2023 18:01:50 +0100 Subject: [PATCH 11/35] NVIDIA: SAUCE: hw/core/loader: Add ROM loader notifier Add a function to register a notifier, that is invoked after a ROM gets loaded into guest memory. It will be used by Arm confidential guest support, in order to register all blobs loaded into memory with KVM, so that their content is moved into Realm state and measured into the initial VM state. Signed-off-by: Jean-Philippe Brucker (cherry picked from commit 5cc4c5416bc95b856b46c65eb1de2587707a333c https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- hw/core/loader.c | 14 ++++++++++++++ include/hw/loader.h | 15 +++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/hw/core/loader.c b/hw/core/loader.c index e7056ba4bd..d4c2a2ec13 100644 --- a/hw/core/loader.c +++ b/hw/core/loader.c @@ -68,6 +68,8 @@ #include static int roms_loaded; +static NotifierList rom_loader_notifier = + NOTIFIER_LIST_INITIALIZER(rom_loader_notifier); /* return the size or -1 if error */ int64_t get_image_size(const char *filename) @@ -1160,6 +1162,11 @@ MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len, return mr; } +void rom_add_load_notifier(Notifier *notifier) +{ + notifier_list_add(&rom_loader_notifier, notifier); +} + /* This function is specific for elf program because we don't need to allocate * all the rom. We just allocate the first part and the rest is just zeros. This * is why romsize and datasize are different. Also, this function takes its own @@ -1201,6 +1208,7 @@ ssize_t rom_add_option(const char *file, int32_t bootindex) static void rom_reset(void *unused) { Rom *rom; + RomLoaderNotifyData notify; QTAILQ_FOREACH(rom, &roms, next) { if (rom->fw_file) { @@ -1249,6 +1257,12 @@ static void rom_reset(void *unused) cpu_flush_icache_range(rom->addr, rom->datasize); trace_loader_write_rom(rom->name, rom->addr, rom->datasize, rom->isrom); + + notify = (RomLoaderNotifyData) { + .addr = rom->addr, + .len = rom->datasize, + }; + notifier_list_notify(&rom_loader_notifier, ¬ify); } } diff --git a/include/hw/loader.h b/include/hw/loader.h index c96b5e141c..1290419913 100644 --- a/include/hw/loader.h +++ b/include/hw/loader.h @@ -339,6 +339,21 @@ void *rom_ptr_for_as(AddressSpace *as, hwaddr addr, size_t size); ssize_t rom_add_vga(const char *file); ssize_t rom_add_option(const char *file, int32_t bootindex); +typedef struct RomLoaderNotifyData { + /* Address of the blob in guest memory */ + hwaddr addr; + /* Length of the blob */ + size_t len; +} RomLoaderNotifyData; + +/** + * rom_add_load_notifier - Add a notifier for loaded images + * + * Add a notifier that will be invoked with a RomLoaderNotifyData structure for + * each blob loaded into guest memory, after the blob is loaded. + */ +void rom_add_load_notifier(Notifier *notifier); + /* This is the usual maximum in uboot, so if a uImage overflows this, it would * overflow on real hardware too. */ #define UBOOT_MAX_GUNZIP_BYTES (64 << 20) From d005f2adf7191cee62af7160b7ad1bec79b483bb Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 14 Jun 2023 16:54:00 +0100 Subject: [PATCH 12/35] NVIDIA: SAUCE: target/arm/kvm-rme: Initialize Realm memory Initialize the IPA state of RAM. Collect the images copied into guest RAM into a sorted list, and issue POPULATE_REALM KVM ioctls once we've created the Realm Descriptor. The images are part of the Realm Initial Measurement. Signed-off-by: Jean-Philippe Brucker (cherry picked from commit 337decaf5ea55c00e7354ac01d450a63ae28bc0b https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- target/arm/kvm-rme.c | 127 ++++++++++++++++++++++++++++++++++++++++++ target/arm/kvm-stub.c | 4 ++ target/arm/kvm_arm.h | 10 ++++ 3 files changed, 141 insertions(+) diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c index 921b1aa8ce..0b30b3418d 100644 --- a/target/arm/kvm-rme.c +++ b/target/arm/kvm-rme.c @@ -8,6 +8,7 @@ #include "hw/boards.h" #include "hw/core/cpu.h" +#include "hw/loader.h" #include "kvm_arm.h" #include "migration/blocker.h" #include "qapi/error.h" @@ -20,8 +21,19 @@ #define TYPE_RME_GUEST "rme-guest" OBJECT_DECLARE_SIMPLE_TYPE(RmeGuest, RME_GUEST) +#define RME_PAGE_SIZE qemu_real_host_page_size() + +typedef struct { + hwaddr base; + hwaddr size; +} RmeRamRegion; + struct RmeGuest { ConfidentialGuestSupport parent_obj; + Notifier rom_load_notifier; + GSList *ram_regions; + + RmeRamRegion init_ram; }; OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RmeGuest, rme_guest, RME_GUEST, @@ -30,6 +42,63 @@ OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RmeGuest, rme_guest, RME_GUEST, static RmeGuest *rme_guest; +static int rme_init_ram(RmeRamRegion *ram, Error **errp) +{ + int ret; + hwaddr start = QEMU_ALIGN_DOWN(ram->base, RME_PAGE_SIZE); + hwaddr end = QEMU_ALIGN_UP(ram->base + ram->size, RME_PAGE_SIZE); + struct arm_rme_init_ripas init_args = { + .base = start, + .size = end - start, + }; + + ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, + KVM_CAP_ARM_RME_INIT_RIPAS_REALM, + (intptr_t)&init_args); + if (ret) { + error_setg_errno(errp, -ret, + "failed to init RAM [0x%"HWADDR_PRIx", 0x%"HWADDR_PRIx")", + start, end); + } + + return ret; +} + +static int rme_populate_range(hwaddr base, size_t size, bool measure, + Error **errp) +{ + int ret; + hwaddr start = QEMU_ALIGN_DOWN(base, RME_PAGE_SIZE); + hwaddr end = QEMU_ALIGN_UP(base + size, RME_PAGE_SIZE); + struct arm_rme_populate_realm populate_args = { + .base = start, + .size = end - start, + .flags = measure ? KVM_ARM_RME_POPULATE_FLAGS_MEASURE : 0, + }; + + ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, + KVM_CAP_ARM_RME_POPULATE_REALM, + (intptr_t)&populate_args); + if (ret) { + error_setg_errno(errp, -ret, + "failed to populate realm [0x%"HWADDR_PRIx", 0x%"HWADDR_PRIx")", + start, end); + } + return ret; +} + +static void rme_populate_ram_region(gpointer data, gpointer err) +{ + Error **errp = err; + const RmeRamRegion *region = data; + + if (*errp) { + return; + } + + rme_populate_range(region->base, region->size, /* measure */ true, errp); +} + static int rme_init_cpus(Error **errp) { int ret; @@ -60,6 +129,16 @@ static int rme_create_realm(Error **errp) return -1; } + if (rme_init_ram(&rme_guest->init_ram, errp)) { + return -1; + } + + g_slist_foreach(rme_guest->ram_regions, rme_populate_ram_region, errp); + g_slist_free_full(g_steal_pointer(&rme_guest->ram_regions), g_free); + if (*errp) { + return -1; + } + if (rme_init_cpus(errp)) { return -1; } @@ -105,6 +184,43 @@ static void rme_guest_finalize(Object *obj) { } +static gint rme_compare_ram_regions(gconstpointer a, gconstpointer b) +{ + const RmeRamRegion *ra = a; + const RmeRamRegion *rb = b; + + g_assert(ra->base != rb->base); + return ra->base < rb->base ? -1 : 1; +} + +static void rme_rom_load_notify(Notifier *notifier, void *data) +{ + RmeRamRegion *region; + RomLoaderNotifyData *rom = data; + + if (rom->addr == -1) { + /* + * These blobs (ACPI tables) are not loaded into guest RAM at reset. + * Instead the firmware will load them via fw_cfg and measure them + * itself. + */ + return; + } + + region = g_new0(RmeRamRegion, 1); + region->base = rom->addr; + region->size = rom->len; + + /* + * The Realm Initial Measurement (RIM) depends on the order in which we + * initialize and populate the RAM regions. To help a verifier + * independently calculate the RIM, sort regions by GPA. + */ + rme_guest->ram_regions = g_slist_insert_sorted(rme_guest->ram_regions, + region, + rme_compare_ram_regions); +} + int kvm_arm_rme_init(MachineState *ms) { static Error *rme_mig_blocker; @@ -132,11 +248,22 @@ int kvm_arm_rme_init(MachineState *ms) */ qemu_add_vm_change_state_handler(rme_vm_state_change, NULL); + rme_guest->rom_load_notifier.notify = rme_rom_load_notify; + rom_add_load_notifier(&rme_guest->rom_load_notifier); + cgs->require_guest_memfd = true; cgs->ready = true; return 0; } +void kvm_arm_rme_init_guest_ram(hwaddr base, size_t size) +{ + if (rme_guest) { + rme_guest->init_ram.base = base; + rme_guest->init_ram.size = size; + } +} + int kvm_arm_rme_vcpu_init(CPUState *cs) { ARMCPU *cpu = ARM_CPU(cs); diff --git a/target/arm/kvm-stub.c b/target/arm/kvm-stub.c index 71dc5d404a..b9e7634c3b 100644 --- a/target/arm/kvm-stub.c +++ b/target/arm/kvm-stub.c @@ -52,6 +52,10 @@ bool kvm_arm_el2_supported(void) return false; } +void kvm_arm_rme_init_guest_ram(hwaddr base, size_t size) +{ +} + /* * These functions should never actually be called without KVM support. */ diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h index 62d691888a..7ea652055f 100644 --- a/target/arm/kvm_arm.h +++ b/target/arm/kvm_arm.h @@ -297,4 +297,14 @@ int kvm_arm_rme_vm_type(MachineState *ms); */ int kvm_arm_rme_vcpu_init(CPUState *cs); +/* + * kvm_arm_rme_init_guest_ram + * @base: base address of RAM + * @size: size of RAM + * + * If the user requested a Realm, set the base and size of guest RAM, in order + * to initialize the Realm IPA space. + */ +void kvm_arm_rme_init_guest_ram(hwaddr base, size_t size); + #endif From 70b4635db277f99858aa74991a5466b07a2fef1c Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 7 Feb 2023 18:55:22 +0000 Subject: [PATCH 13/35] NVIDIA: SAUCE: target/arm/kvm-rme: Add Realm Personalization Value parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Realm Personalization Value (RPV) is provided by the user to distinguish Realms that have the same initial measurement. The user provides a base64 string encoding 64 bytes. They are stored into the RPV in the same order. Cc: Eric Blake Cc: Markus Armbruster Cc: Daniel P. Berrangé Cc: Eduardo Habkost Acked-by: Markus Armbruster Signed-off-by: Jean-Philippe Brucker --- v3->v4: switch to base64 (cherry picked from commit 1138f32dea34c9c7360a658d5d7ce25a95d35066 https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- qapi/qom.json | 14 ++++++++ target/arm/kvm-rme.c | 85 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+) diff --git a/qapi/qom.json b/qapi/qom.json index 42de1e489b..fb35c75fd2 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -1184,6 +1184,19 @@ 'data': { '*cpu-affinity': ['uint16'], '*node-affinity': ['uint16'] } } +## +# @RmeGuestProperties: +# +# Properties for rme-guest objects. +# +# @personalization-value: a base64 string encoding a 64-byte (512-bit) value. +# This optional parameter allows to uniquely identify the VM instance +# during attestation. (default: all-zero) +# +# Since: 10.0 +## +{ 'struct': 'RmeGuestProperties', + 'data': { '*personalization-value': 'str' } } ## # @ObjectType: @@ -1319,6 +1332,7 @@ 'pr-manager-helper': { 'type': 'PrManagerHelperProperties', 'if': 'CONFIG_LINUX' }, 'qtest': 'QtestProperties', + 'rme-guest': 'RmeGuestProperties', 'rng-builtin': 'RngProperties', 'rng-egd': 'RngEgdProperties', 'rng-random': { 'type': 'RngRandomProperties', diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c index 0b30b3418d..272eb8cbe2 100644 --- a/target/arm/kvm-rme.c +++ b/target/arm/kvm-rme.c @@ -12,6 +12,7 @@ #include "kvm_arm.h" #include "migration/blocker.h" #include "qapi/error.h" +#include "qemu/base64.h" #include "qemu/error-report.h" #include "qom/object_interfaces.h" #include "system/confidential-guest-support.h" @@ -33,6 +34,9 @@ struct RmeGuest { Notifier rom_load_notifier; GSList *ram_regions; + char *personalization_value_str; + uint8_t personalization_value[ARM_RME_CONFIG_RPV_SIZE]; + RmeRamRegion init_ram; }; @@ -42,6 +46,48 @@ OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RmeGuest, rme_guest, RME_GUEST, static RmeGuest *rme_guest; +static int rme_configure_one(RmeGuest *guest, uint32_t cfg, Error **errp) +{ + int ret; + const char *cfg_str; + struct arm_rme_config args = { + .cfg = cfg, + }; + + switch (cfg) { + case ARM_RME_CONFIG_RPV: + memcpy(args.rpv, guest->personalization_value, ARM_RME_CONFIG_RPV_SIZE); + cfg_str = "personalization value"; + break; + default: + g_assert_not_reached(); + } + + ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, + KVM_CAP_ARM_RME_CONFIG_REALM, (intptr_t)&args); + if (ret) { + error_setg_errno(errp, -ret, "failed to configure %s", cfg_str); + } + return ret; +} + +static int rme_configure(Error **errp) +{ + int ret; + size_t option; + const uint32_t config_options[] = { + ARM_RME_CONFIG_RPV, + }; + + for (option = 0; option < ARRAY_SIZE(config_options); option++) { + ret = rme_configure_one(rme_guest, config_options[option], errp); + if (ret) { + return ret; + } + } + return 0; +} + static int rme_init_ram(RmeRamRegion *ram, Error **errp) { int ret; @@ -122,6 +168,10 @@ static int rme_create_realm(Error **errp) { int ret; + if (rme_configure(errp)) { + return -1; + } + ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, KVM_CAP_ARM_RME_CREATE_REALM); if (ret) { @@ -167,8 +217,43 @@ static void rme_vm_state_change(void *opaque, bool running, RunState state) } } +static char *rme_get_rpv(Object *obj, Error **errp) +{ + RmeGuest *guest = RME_GUEST(obj); + + return g_strdup(guest->personalization_value_str); +} + +static void rme_set_rpv(Object *obj, const char *value, Error **errp) +{ + RmeGuest *guest = RME_GUEST(obj); + g_autofree uint8_t *rpv; + size_t len; + + rpv = qbase64_decode(value, -1, &len, errp); + if (!rpv) { + return; + } + + if (len != sizeof(guest->personalization_value)) { + error_setg(errp, + "expecting a Realm Personalization Value of size %zu, got %zu\n", + sizeof(guest->personalization_value), len); + return; + } + memcpy(guest->personalization_value, rpv, len); + + /* Save the value so we don't need to encode it in the getter */ + g_free(guest->personalization_value_str); + guest->personalization_value_str = g_strdup(value); +} + static void rme_guest_class_init(ObjectClass *oc, const void *data) { + object_class_property_add_str(oc, "personalization-value", rme_get_rpv, + rme_set_rpv); + object_class_property_set_description(oc, "personalization-value", + "Realm personalization value (64 bytes encodede in base64)"); } static void rme_guest_init(Object *obj) From a34fe44cbe4b8c1ee9ec2a7fd08c9a82154c1f7a Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 27 Oct 2022 19:22:48 +0100 Subject: [PATCH 14/35] NVIDIA: SAUCE: target/arm/kvm-rme: Add measurement algorithm property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This option selects which measurement algorithm to use for attestation. Supported values are SHA256 and SHA512. Default to SHA512 arbitrarily. SHA512 is generally faster on 64-bit architectures. On a few arm64 CPUs I tested SHA256 is much faster, but that's most likely because they only support acceleration via FEAT_SHA256 (Armv8.0) and not FEAT_SHA512 (Armv8.2). Future CPUs supporting RME are likely to also support FEAT_SHA512. Cc: Eric Blake Cc: Markus Armbruster Cc: Daniel P. Berrangé Cc: Eduardo Habkost Acked-by: Markus Armbruster Signed-off-by: Jean-Philippe Brucker --- TODO: switch to 256 (cherry picked from commit a9c9c91776ef951bd9b8caac93fad4b2f63c75cb https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- qapi/qom.json | 20 +++++++++++++++++++- target/arm/kvm-rme.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/qapi/qom.json b/qapi/qom.json index fb35c75fd2..5b423d5033 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -1184,6 +1184,20 @@ 'data': { '*cpu-affinity': ['uint16'], '*node-affinity': ['uint16'] } } +## +# @RmeGuestMeasurementAlgorithm: +# +# @sha256: Use the SHA256 algorithm +# +# @sha512: Use the SHA512 algorithm +# +# Algorithm to use for realm measurements +# +# Since: 10.0 +## +{ 'enum': 'RmeGuestMeasurementAlgorithm', + 'data': ['sha256', 'sha512'] } + ## # @RmeGuestProperties: # @@ -1193,10 +1207,14 @@ # This optional parameter allows to uniquely identify the VM instance # during attestation. (default: all-zero) # +# @measurement-algorithm: Realm measurement algorithm +# (default: sha512) +# # Since: 10.0 ## { 'struct': 'RmeGuestProperties', - 'data': { '*personalization-value': 'str' } } + 'data': { '*personalization-value': 'str', + '*measurement-algorithm': 'RmeGuestMeasurementAlgorithm' } } ## # @ObjectType: diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c index 272eb8cbe2..c43f1adc88 100644 --- a/target/arm/kvm-rme.c +++ b/target/arm/kvm-rme.c @@ -36,6 +36,7 @@ struct RmeGuest { char *personalization_value_str; uint8_t personalization_value[ARM_RME_CONFIG_RPV_SIZE]; + RmeGuestMeasurementAlgorithm measurement_algo; RmeRamRegion init_ram; }; @@ -59,6 +60,19 @@ static int rme_configure_one(RmeGuest *guest, uint32_t cfg, Error **errp) memcpy(args.rpv, guest->personalization_value, ARM_RME_CONFIG_RPV_SIZE); cfg_str = "personalization value"; break; + case ARM_RME_CONFIG_HASH_ALGO: + switch (guest->measurement_algo) { + case RME_GUEST_MEASUREMENT_ALGORITHM_SHA256: + args.hash_algo = ARM_RME_CONFIG_HASH_ALGO_SHA256; + break; + case RME_GUEST_MEASUREMENT_ALGORITHM_SHA512: + args.hash_algo = ARM_RME_CONFIG_HASH_ALGO_SHA512; + break; + default: + g_assert_not_reached(); + } + cfg_str = "hash algorithm"; + break; default: g_assert_not_reached(); } @@ -77,6 +91,7 @@ static int rme_configure(Error **errp) size_t option; const uint32_t config_options[] = { ARM_RME_CONFIG_RPV, + ARM_RME_CONFIG_HASH_ALGO, }; for (option = 0; option < ARRAY_SIZE(config_options); option++) { @@ -248,12 +263,34 @@ static void rme_set_rpv(Object *obj, const char *value, Error **errp) guest->personalization_value_str = g_strdup(value); } +static int rme_get_measurement_algo(Object *obj, Error **errp) +{ + RmeGuest *guest = RME_GUEST(obj); + + return guest->measurement_algo; +} + +static void rme_set_measurement_algo(Object *obj, int algo, Error **errp) +{ + RmeGuest *guest = RME_GUEST(obj); + + guest->measurement_algo = algo; +} + static void rme_guest_class_init(ObjectClass *oc, const void *data) { object_class_property_add_str(oc, "personalization-value", rme_get_rpv, rme_set_rpv); object_class_property_set_description(oc, "personalization-value", "Realm personalization value (64 bytes encodede in base64)"); + + object_class_property_add_enum(oc, "measurement-algorithm", + "RmeGuestMeasurementAlgorithm", + &RmeGuestMeasurementAlgorithm_lookup, + rme_get_measurement_algo, + rme_set_measurement_algo); + object_class_property_set_description(oc, "measurement-algorithm", + "Realm measurement algorithm ('sha256', 'sha512')"); } static void rme_guest_init(Object *obj) @@ -263,6 +300,7 @@ static void rme_guest_init(Object *obj) exit(1); } rme_guest = RME_GUEST(obj); + rme_guest->measurement_algo = RME_GUEST_MEASUREMENT_ALGORITHM_SHA512; } static void rme_guest_finalize(Object *obj) From 565d5cd2fdad95924cd8c65dd638a46e761320eb Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 4 Dec 2023 18:48:19 +0000 Subject: [PATCH 15/35] NVIDIA: SAUCE: target/arm/cpu: Set number of breakpoints and watchpoints in KVM Add "num-breakpoints" and "num-watchpoints" CPU parameters to configure the debug features that KVM presents to the guest. The KVM vCPU configuration is modified by calling SET_ONE_REG on the ID register. This is needed for Realm VMs, whose parameters include breakpoints and watchpoints, and influence the Realm Initial Measurement. Signed-off-by: Jean-Philippe Brucker (cherry picked from commit a8ad7f59ebbe12ffd23208a86d17a79b56d5e7d5 https://git.codelinaro.org/linaro/dcap/qemu.git) [ianm: adjustment needed for def3f1c1026af66d5672f10b3e6cbb87e4e20f73 -"arm/cpu: Store aa64dfr0/1 into the idregs array"] Signed-off-by: Ian May --- target/arm/arm-qmp-cmds.c | 1 + target/arm/cpu.h | 4 ++ target/arm/cpu64.c | 77 +++++++++++++++++++++++++++++++++++++++ target/arm/kvm.c | 54 +++++++++++++++++++++++++++ target/arm/kvm_arm.h | 2 + 5 files changed, 138 insertions(+) diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c index d292c974c4..2224b4a2fe 100644 --- a/target/arm/arm-qmp-cmds.c +++ b/target/arm/arm-qmp-cmds.c @@ -97,6 +97,7 @@ static const char *cpu_model_advertised_features[] = { "sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048", "kvm-no-adjvtime", "kvm-steal-time", "pauth", "pauth-impdef", "pauth-qarma3", "pauth-qarma5", + "num-breakpoints", "num-watchpoints", NULL }; diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 3abd921d52..d4527bb412 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -1148,6 +1148,10 @@ struct ArchCPU { /* Generic timer counter frequency, in Hz */ uint64_t gt_cntfrq_hz; + + /* Allows to override the default configuration */ + uint8_t num_bps; + uint8_t num_wps; }; typedef struct ARMCPUInfo { diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 26cf7e6dfa..037fd07dec 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -616,6 +616,82 @@ void aarch64_add_pauth_properties(Object *obj) } } +#if defined(CONFIG_KVM) +static void arm_cpu_get_num_wps(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + uint8_t val; + ARMCPU *cpu = ARM_CPU(obj); + + val = cpu->num_wps; + if (val == 0) { + val = FIELD_EX64_IDREG(&cpu->isar, ID_AA64DFR0, WRPS) + 1; + } + + visit_type_uint8(v, name, &val, errp); +} + +static void arm_cpu_set_num_wps(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + uint8_t val; + ARMCPU *cpu = ARM_CPU(obj); + uint8_t max_wps = FIELD_EX64_IDREG(&cpu->isar, ID_AA64DFR0, WRPS) + 1; + + if (!visit_type_uint8(v, name, &val, errp)) { + return; + } + + if (val < 2 || val > max_wps) { + error_setg(errp, "invalid number of watchpoints"); + return; + } + + cpu->num_wps = val; +} + +static void arm_cpu_get_num_bps(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + uint8_t val; + ARMCPU *cpu = ARM_CPU(obj); + + val = cpu->num_bps; + if (val == 0) { + val = FIELD_EX64_IDREG(&cpu->isar, ID_AA64DFR0, BRPS) + 1; + } + + visit_type_uint8(v, name, &val, errp); +} + +static void arm_cpu_set_num_bps(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + uint8_t val; + ARMCPU *cpu = ARM_CPU(obj); + uint8_t max_bps = FIELD_EX64_IDREG(&cpu->isar, ID_AA64DFR0, BRPS) + 1; + + if (!visit_type_uint8(v, name, &val, errp)) { + return; + } + + if (val < 2 || val > max_bps) { + error_setg(errp, "invalid number of breakpoints"); + return; + } + + cpu->num_bps = val; +} + +static void aarch64_add_kvm_writable_properties(Object *obj) +{ + object_property_add(obj, "num-breakpoints", "uint8", arm_cpu_get_num_bps, + arm_cpu_set_num_bps, NULL, NULL); + object_property_add(obj, "num-watchpoints", "uint8", arm_cpu_get_num_wps, + arm_cpu_set_num_wps, NULL, NULL); +} +#endif /* CONFIG_KVM */ + void arm_cpu_lpa2_finalize(ARMCPU *cpu, Error **errp) { uint64_t t; @@ -768,6 +844,7 @@ static void aarch64_host_initfn(Object *obj) if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { aarch64_add_sve_properties(obj); aarch64_add_pauth_properties(obj); + aarch64_add_kvm_writable_properties(obj); } #elif defined(CONFIG_HVF) ARMCPU *cpu = ARM_CPU(obj); diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 14ecced4db..c1793a1b64 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -855,6 +855,54 @@ static int kvm_arm_init_cpreg_list(ARMCPU *cpu) return ret; } +static void kvm_arm_configure_aa64dfr0(ARMCPU *cpu) +{ + int ret; + uint64_t val, newval; + CPUState *cs = CPU(cpu); + + if (!cpu->num_bps && !cpu->num_wps) { + return; + } + + newval = GET_IDREG(&cpu->isar, ID_AA64DFR0); + if (cpu->num_bps) { + uint64_t ctx_cmps = FIELD_EX64(newval, ID_AA64DFR0, CTX_CMPS); + + /* CTX_CMPs is never greater than BRPs */ + ctx_cmps = MIN(ctx_cmps, cpu->num_bps - 1); + newval = FIELD_DP64(newval, ID_AA64DFR0, BRPS, cpu->num_bps - 1); + newval = FIELD_DP64(newval, ID_AA64DFR0, CTX_CMPS, ctx_cmps); + } + if (cpu->num_wps) { + newval = FIELD_DP64(newval, ID_AA64DFR0, WRPS, cpu->num_wps - 1); + } + ret = kvm_set_one_reg(cs, KVM_REG_ARM_ID_AA64DFR0_EL1, &newval); + if (ret) { + error_report("Failed to set KVM_REG_ARM_ID_AA64DFR0_EL1"); + return; + } + + /* + * Check if the write succeeded. KVM does offer the writable mask for this + * register, but this way we also check if the value we wrote was sane. + */ + ret = kvm_get_one_reg(cs, KVM_REG_ARM_ID_AA64DFR0_EL1, &val); + if (ret) { + error_report("Failed to get KVM_REG_ARM_ID_AA64DFR0_EL1"); + return; + } + + if (val != newval) { + error_report("Failed to update KVM_REG_ARM_ID_AA64DFR0_EL1"); + } +} + +static void kvm_arm_configure_vcpu_regs(ARMCPU *cpu) +{ + kvm_arm_configure_aa64dfr0(cpu); +} + /** * kvm_arm_cpreg_level: * @regidx: KVM register index @@ -985,6 +1033,12 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu) fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret)); abort(); } + + /* + * Before loading the KVM values into CPUState, update the KVM configuration + */ + kvm_arm_configure_vcpu_regs(cpu); + if (!write_kvmstate_to_list(cpu)) { fprintf(stderr, "write_kvmstate_to_list failed\n"); abort(); diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h index 7ea652055f..7f084f61c0 100644 --- a/target/arm/kvm_arm.h +++ b/target/arm/kvm_arm.h @@ -17,6 +17,8 @@ #define KVM_ARM_VGIC_V2 (1 << 0) #define KVM_ARM_VGIC_V3 (1 << 1) +#define KVM_REG_ARM_ID_AA64DFR0_EL1 ARM64_SYS_REG(3, 0, 0, 5, 0) + /** * kvm_arm_register_device: * @mr: memory region for this device From df9a868270b4bcf1f53888212131ccadfa7e95ac Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 7 Dec 2023 17:32:13 +0000 Subject: [PATCH 16/35] NVIDIA: SAUCE: target/arm/cpu: Set number of PMU counters in KVM Add a "num-pmu-counters" CPU parameter to configure the number of counters that KVM presents to the guest. This is needed for Realm VMs, whose parameters include the number of PMU counters and influence the Realm Initial Measurement. Signed-off-by: Jean-Philippe Brucker (cherry picked from commit b50d7cdd7f34e07befa0267f750f99e1c02596df https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- target/arm/arm-qmp-cmds.c | 2 +- target/arm/cpu.h | 3 +++ target/arm/cpu64.c | 41 +++++++++++++++++++++++++++++++++++++++ target/arm/kvm.c | 34 +++++++++++++++++++++++++++++++- target/arm/kvm_arm.h | 1 + 5 files changed, 79 insertions(+), 2 deletions(-) diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c index 2224b4a2fe..1e4fad8dcd 100644 --- a/target/arm/arm-qmp-cmds.c +++ b/target/arm/arm-qmp-cmds.c @@ -97,7 +97,7 @@ static const char *cpu_model_advertised_features[] = { "sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048", "kvm-no-adjvtime", "kvm-steal-time", "pauth", "pauth-impdef", "pauth-qarma3", "pauth-qarma5", - "num-breakpoints", "num-watchpoints", + "num-breakpoints", "num-watchpoints", "num-pmu-counters", NULL }; diff --git a/target/arm/cpu.h b/target/arm/cpu.h index d4527bb412..ec39f74025 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -1152,6 +1152,7 @@ struct ArchCPU { /* Allows to override the default configuration */ uint8_t num_bps; uint8_t num_wps; + int8_t num_pmu_ctrs; }; typedef struct ARMCPUInfo { @@ -2426,6 +2427,8 @@ FIELD(MFAR, FPA, 12, 40) FIELD(MFAR, NSE, 62, 1) FIELD(MFAR, NS, 63, 1) +FIELD(PMCR, N, 11, 5) + QEMU_BUILD_BUG_ON(ARRAY_SIZE(((ARMCPU *)0)->ccsidr) <= R_V7M_CSSELR_INDEX_MASK); /* If adding a feature bit which corresponds to a Linux ELF diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 037fd07dec..767a47a12e 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -683,12 +683,53 @@ static void arm_cpu_set_num_bps(Object *obj, Visitor *v, const char *name, cpu->num_bps = val; } +static void arm_cpu_get_num_pmu_ctrs(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + uint8_t val; + ARMCPU *cpu = ARM_CPU(obj); + + if (cpu->num_pmu_ctrs == -1) { + val = FIELD_EX64(cpu->isar.reset_pmcr_el0, PMCR, N); + } else { + val = cpu->num_pmu_ctrs; + } + + visit_type_uint8(v, name, &val, errp); +} + +static void arm_cpu_set_num_pmu_ctrs(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + uint8_t val; + ARMCPU *cpu = ARM_CPU(obj); + uint8_t max_ctrs = FIELD_EX64(cpu->isar.reset_pmcr_el0, PMCR, N); + + if (!visit_type_uint8(v, name, &val, errp)) { + return; + } + + if (val > max_ctrs) { + error_setg(errp, "invalid number of PMU counters"); + return; + } + + cpu->num_pmu_ctrs = val; +} + static void aarch64_add_kvm_writable_properties(Object *obj) { + ARMCPU *cpu = ARM_CPU(obj); + object_property_add(obj, "num-breakpoints", "uint8", arm_cpu_get_num_bps, arm_cpu_set_num_bps, NULL, NULL); object_property_add(obj, "num-watchpoints", "uint8", arm_cpu_get_num_wps, arm_cpu_set_num_wps, NULL, NULL); + + cpu->num_pmu_ctrs = -1; + object_property_add(obj, "num-pmu-counters", "uint8", + arm_cpu_get_num_pmu_ctrs, arm_cpu_set_num_pmu_ctrs, + NULL, NULL); } #endif /* CONFIG_KVM */ diff --git a/target/arm/kvm.c b/target/arm/kvm.c index c1793a1b64..08b8b1e523 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -403,7 +403,7 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) if (pmu_supported) { /* PMCR_EL0 is only accessible if the vCPU has feature PMU_V3 */ err |= read_sys_reg64(fd, &ahcf->isar.reset_pmcr_el0, - ARM64_SYS_REG(3, 3, 9, 12, 0)); + KVM_REG_ARM_PMCR_EL0); } if (sve_supported) { @@ -898,9 +898,41 @@ static void kvm_arm_configure_aa64dfr0(ARMCPU *cpu) } } +static void kvm_arm_configure_pmcr(ARMCPU *cpu) +{ + int ret; + uint64_t val, newval; + CPUState *cs = CPU(cpu); + + if (cpu->num_pmu_ctrs == -1) { + return; + } + + newval = FIELD_DP64(cpu->isar.reset_pmcr_el0, PMCR, N, cpu->num_pmu_ctrs); + ret = kvm_set_one_reg(cs, KVM_REG_ARM_PMCR_EL0, &newval); + if (ret) { + error_report("Failed to set KVM_REG_ARM_PMCR_EL0"); + return; + } + + /* + * Check if the write succeeded, since older versions of KVM ignore it. + */ + ret = kvm_get_one_reg(cs, KVM_REG_ARM_PMCR_EL0, &val); + if (ret) { + error_report("Failed to get KVM_REG_ARM_PMCR_EL0"); + return; + } + + if (val != newval) { + error_report("Failed to update KVM_REG_ARM_PMCR_EL0"); + } +} + static void kvm_arm_configure_vcpu_regs(ARMCPU *cpu) { kvm_arm_configure_aa64dfr0(cpu); + kvm_arm_configure_pmcr(cpu); } /** diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h index 7f084f61c0..d31d724761 100644 --- a/target/arm/kvm_arm.h +++ b/target/arm/kvm_arm.h @@ -18,6 +18,7 @@ #define KVM_ARM_VGIC_V3 (1 << 1) #define KVM_REG_ARM_ID_AA64DFR0_EL1 ARM64_SYS_REG(3, 0, 0, 5, 0) +#define KVM_REG_ARM_PMCR_EL0 ARM64_SYS_REG(3, 3, 9, 12, 0) /** * kvm_arm_register_device: From 15a7b89aea2c3ce3e4fbe2e9e31276250c2e6c25 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 7 Feb 2023 13:05:40 +0000 Subject: [PATCH 17/35] NVIDIA: SAUCE: target/arm/cpu: Inform about reading confidential CPU registers The host cannot access registers of a Realm. Instead of showing all registers as zero in "info registers", display a message about this restriction. Signed-off-by: Jean-Philippe Brucker (cherry picked from commit ebf1075f11afc6093f37881c7d528b088b87a8a8 https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- target/arm/cpu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/target/arm/cpu.c b/target/arm/cpu.c index e2b2337399..225f9cdd6d 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -1199,6 +1199,11 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) const char *ns_status; bool sve; + if (cpu->kvm_rme) { + qemu_fprintf(f, "the CPU registers are confidential to the realm\n"); + return; + } + qemu_fprintf(f, " PC=%016" PRIx64 " ", env->pc); for (i = 0; i < 32; i++) { if (i == 31) { From aa17b9c8a8c0f26ed50a8731d952b5e19df80170 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 6 Feb 2023 16:49:25 +0000 Subject: [PATCH 18/35] NVIDIA: SAUCE: hw/arm/virt: Add support for Arm RME When confidential-guest-support is enabled for the virt machine, add the RME flag to the VM type. The HVC conduit for PSCI is not supported for Realms. Signed-off-by: Jean-Philippe Brucker (cherry picked from commit 41722b7bcce535699c1925b7fbeb22af4dbef565 https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- hw/arm/virt.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 418ed77deb..bad34b0d31 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -250,6 +250,11 @@ static const int a15irqmap[] = { [VIRT_PLATFORM_BUS] = 112, /* ...to 112 + PLATFORM_BUS_NUM_IRQS -1 */ }; +static bool virt_machine_is_confidential(VirtMachineState *vms) +{ + return MACHINE(vms)->cgs; +} + static void create_randomness(MachineState *ms, const char *node) { struct { @@ -2299,10 +2304,11 @@ static void machvirt_init(MachineState *machine) * if the guest has EL2 then we will use SMC as the conduit, * and otherwise we will use HVC (for backwards compatibility and * because if we're using KVM then we must use HVC). + * Realm guests must also use SMC. */ if (vms->secure && firmware_loaded) { vms->psci_conduit = QEMU_PSCI_CONDUIT_DISABLED; - } else if (vms->virt) { + } else if (vms->virt || virt_machine_is_confidential(vms)) { vms->psci_conduit = QEMU_PSCI_CONDUIT_SMC; } else { vms->psci_conduit = QEMU_PSCI_CONDUIT_HVC; @@ -3229,6 +3235,7 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine, static int virt_kvm_type(MachineState *ms, const char *type_str) { VirtMachineState *vms = VIRT_MACHINE(ms); + int rme_vm_type = kvm_arm_rme_vm_type(ms); int max_vm_pa_size, requested_pa_size; bool fixed_ipa; @@ -3258,7 +3265,11 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) * the implicit legacy 40b IPA setting, in which case the kvm_type * must be 0. */ - return fixed_ipa ? 0 : requested_pa_size; + if (fixed_ipa) { + return 0; + } + + return requested_pa_size | rme_vm_type; } static int virt_hvf_get_physical_address_range(MachineState *ms) From 8639d054dae2127afe7fad1e673fc322e16a847c Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 6 Feb 2023 16:52:37 +0000 Subject: [PATCH 19/35] NVIDIA: SAUCE: hw/arm/virt: Disable DTB randomness for confidential VMs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dtb-randomness feature, which adds random seeds to the DTB, isn't really compatible with confidential VMs since it randomizes the Realm Initial Measurement. Enabling it is not an error, but it prevents attestation. It also isn't useful to a Realm, which doesn't trust host input. Currently the feature is automatically enabled, unless the user disables it on the command-line. Change it to OnOffAuto, and automatically disable it for confidential VMs, unless the user explicitly enables it. Signed-off-by: Jean-Philippe Brucker Reviewed-by: Philippe Mathieu-Daudé (cherry picked from commit a59b75ce6be0cd86e1e2d5a20dcbc381e791e776 https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- docs/system/arm/virt.rst | 9 +++++---- hw/arm/virt.c | 41 +++++++++++++++++++++++++--------------- include/hw/arm/virt.h | 2 +- 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst index 10cbffc8a7..69872186d9 100644 --- a/docs/system/arm/virt.rst +++ b/docs/system/arm/virt.rst @@ -203,10 +203,11 @@ dtb-randomness rng-seed and kaslr-seed nodes (in both "/chosen" and "/secure-chosen") to use for features like the random number generator and address space randomisation. The default is - ``on``. You will want to disable it if your trusted boot chain - will verify the DTB it is passed, since this option causes the - DTB to be non-deterministic. It would be the responsibility of - the firmware to come up with a seed and pass it on if it wants to. + ``off`` for confidential VMs, and ``on`` otherwise. You will want + to disable it if your trusted boot chain will verify the DTB it is + passed, since this option causes the DTB to be non-deterministic. + It would be the responsibility of the firmware to come up with a + seed and pass it on if it wants to. dtb-kaslr-seed A deprecated synonym for dtb-randomness. diff --git a/hw/arm/virt.c b/hw/arm/virt.c index bad34b0d31..f6dda194cb 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -285,6 +285,7 @@ static bool ns_el2_virt_timer_present(void) static void create_fdt(VirtMachineState *vms) { + bool dtb_randomness = true; MachineState *ms = MACHINE(vms); int nb_numa_nodes = ms->numa_state->num_nodes; void *fdt = create_device_tree(&vms->fdt_size); @@ -294,6 +295,16 @@ static void create_fdt(VirtMachineState *vms) exit(1); } + /* + * Including random data in the DTB causes random intial measurement on CCA, + * so disable it for confidential VMs. + */ + if (vms->dtb_randomness == ON_OFF_AUTO_OFF || + (vms->dtb_randomness == ON_OFF_AUTO_AUTO && + virt_machine_is_confidential(vms))) { + dtb_randomness = false; + } + ms->fdt = fdt; /* Header */ @@ -315,13 +326,13 @@ static void create_fdt(VirtMachineState *vms) /* /chosen must exist for load_dtb to fill in necessary properties later */ qemu_fdt_add_subnode(fdt, "/chosen"); - if (vms->dtb_randomness) { + if (dtb_randomness) { create_randomness(ms, "/chosen"); } if (vms->secure) { qemu_fdt_add_subnode(fdt, "/secure-chosen"); - if (vms->dtb_randomness) { + if (dtb_randomness) { create_randomness(ms, "/secure-chosen"); } } @@ -2743,18 +2754,21 @@ static void virt_set_its(Object *obj, bool value, Error **errp) vms->its = value; } -static bool virt_get_dtb_randomness(Object *obj, Error **errp) +static void virt_get_dtb_randomness(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); + OnOffAuto dtb_randomness = vms->dtb_randomness; - return vms->dtb_randomness; + visit_type_OnOffAuto(v, name, &dtb_randomness, errp); } -static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) +static void virt_set_dtb_randomness(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); - vms->dtb_randomness = value; + visit_type_OnOffAuto(v, name, &vms->dtb_randomness, errp); } static char *virt_get_oem_id(Object *obj, Error **errp) @@ -3479,16 +3493,16 @@ static void virt_machine_class_init(ObjectClass *oc, const void *data) "Set on/off to enable/disable " "ITS instantiation"); - object_class_property_add_bool(oc, "dtb-randomness", - virt_get_dtb_randomness, - virt_set_dtb_randomness); + object_class_property_add(oc, "dtb-randomness", "OnOffAuto", + virt_get_dtb_randomness, virt_set_dtb_randomness, + NULL, NULL); object_class_property_set_description(oc, "dtb-randomness", "Set off to disable passing random or " "non-deterministic dtb nodes to guest"); - object_class_property_add_bool(oc, "dtb-kaslr-seed", - virt_get_dtb_randomness, - virt_set_dtb_randomness); + object_class_property_add(oc, "dtb-kaslr-seed", "OnOffAuto", + virt_get_dtb_randomness, virt_set_dtb_randomness, + NULL, NULL); object_class_property_set_description(oc, "dtb-kaslr-seed", "Deprecated synonym of dtb-randomness"); @@ -3551,9 +3565,6 @@ static void virt_instance_init(Object *obj) /* MTE is disabled by default. */ vms->mte = false; - /* Supply kaslr-seed and rng-seed by default */ - vms->dtb_randomness = true; - vms->irqmap = a15irqmap; virt_flash_create(vms); diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h index 0963356fc2..20411e40ba 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h @@ -153,7 +153,7 @@ struct VirtMachineState { bool virt; bool ras; bool mte; - bool dtb_randomness; + OnOffAuto dtb_randomness; bool second_ns_uart_present; OnOffAuto acpi; VirtGICType gic_version; From a3e353b66a90fe40f397745e51476899b0dfb72b Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 6 Feb 2023 16:56:39 +0000 Subject: [PATCH 20/35] NVIDIA: SAUCE: hw/arm/virt: Reserve one bit of guest-physical address for RME When RME is enabled, the upper GPA bit is used to distinguish protected from unprotected addresses. Reserve it when setting up the guest memory map. Signed-off-by: Jean-Philippe Brucker (cherry picked from commit 1368d216fdb9d5055c182f213dfeeebfb293af59 https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- hw/arm/virt.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index f6dda194cb..2c71928b8c 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -3251,14 +3251,24 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) VirtMachineState *vms = VIRT_MACHINE(ms); int rme_vm_type = kvm_arm_rme_vm_type(ms); int max_vm_pa_size, requested_pa_size; + int rme_reserve_bit = 0; bool fixed_ipa; - max_vm_pa_size = kvm_arm_get_max_vm_ipa_size(ms, &fixed_ipa); + if (rme_vm_type) { + /* + * With RME, the upper GPA bit differentiates Realm from NS memory. + * Reserve the upper bit to ensure that highmem devices will fit. + */ + rme_reserve_bit = 1; + } + + max_vm_pa_size = kvm_arm_get_max_vm_ipa_size(ms, &fixed_ipa) - + rme_reserve_bit; /* we freeze the memory map to compute the highest gpa */ virt_set_memmap(vms, max_vm_pa_size); - requested_pa_size = 64 - clz64(vms->highest_gpa); + requested_pa_size = 64 - clz64(vms->highest_gpa) + rme_reserve_bit; /* * KVM requires the IPA size to be at least 32 bits. @@ -3267,11 +3277,11 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) requested_pa_size = 32; } - if (requested_pa_size > max_vm_pa_size) { + if (requested_pa_size > max_vm_pa_size + rme_reserve_bit) { error_report("-m and ,maxmem option values " "require an IPA range (%d bits) larger than " "the one supported by the host (%d bits)", - requested_pa_size, max_vm_pa_size); + requested_pa_size, max_vm_pa_size + rme_reserve_bit); return -1; } /* From 6f84a3a2d659b8974e81822abcfbb421f28c2e65 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 14 Jun 2023 16:36:52 +0100 Subject: [PATCH 21/35] NVIDIA: SAUCE: hw/arm/boot: Mark all guest memory as RIPAS_RAM. All Realm IPA states are by default RIPAS_EMPTY, and accessing them in that state causes injection of synchronous exception. Either the loader or the guest needs to set IPA state to RIPAS_RAM before accessing it. Since a Linux guest needs all memory ready at boot [1], initialize it here. [1] https://docs.kernel.org/arch/arm64/booting.html https://lore.kernel.org/all/20241004144307.66199-12-steven.price@arm.com/ Signed-off-by: Jean-Philippe Brucker (cherry picked from commit 41e96f8d1a7727fc9cb4d3a1674b672fc4121942 https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- hw/arm/boot.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/arm/boot.c b/hw/arm/boot.c index c56dfb6c19..c6637a1fc7 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -31,6 +31,7 @@ #include "qemu/option.h" #include "qemu/units.h" #include "qemu/bswap.h" +#include "kvm_arm.h" #include #include "qapi/error.h" @@ -1354,6 +1355,9 @@ void arm_load_kernel(ARMCPU *cpu, MachineState *ms, struct arm_boot_info *info) /* We assume the CPU passed as argument is the primary CPU. */ info->primary_cpu = cpu; + /* Mark all Realm memory as RAM */ + kvm_arm_rme_init_guest_ram(info->loader_start, info->ram_size); + /* Load the kernel. */ if (!info->kernel_filename || info->firmware_loaded) { arm_setup_firmware_boot(cpu, info); From 36e5cca9946a7813eeb84f0be9816c234e31393f Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 8 Jan 2025 17:34:11 +0000 Subject: [PATCH 22/35] NVIDIA: SAUCE: target/arm/kvm-rme: Add DMA remapping for the shared memory region In Arm CCA, the guest-physical address space is split in half. The top half represents memory shared between guest and host, and the bottom half is private to the guest. From QEMU's point of view, the two halves are merged into a single region, and pages within this region are either shared or private. Addresses used by device DMA can potentially target both halves. Physical devices assigned to the VM access the top half, until they are authenticated using features like PCIe CMA-SPDM at which point they can also access memory private to the guest. Virtual devices implemented by the host are only allowed to access the top half. For emulated MMIO, KVM strips the GPA before returning to QEMU, so the GPA already belongs to QEMU's merged view of guest memory. However DMA addresses cannot be stripped this way and need special handling by the VMM: * When emulating DMA the VMM needs to translate the addresses into its merged view. Add an IOMMU memory region on the top half, that retargets DMA accesses to the merged sysmem. * when creating IOMMU mappings for (unauthenticated) VFIO devices, the VMM needs to map the top half of guest-physical addresses to the shared pages. Install RAM discard listeners that issue IOMMU map and unmap requests to IOMMU listeners such as VFIO. The resulting mtree looks like this: address-space: vfio-pci 0000000000000000-ffffffffffffffff (prio 0, i/o): bus master container 0000000000000000-000001ffffffffff (prio 0, i/o): alias bus master @realm-dma-region 0000000000000000-000001ffffffffff memory-region: realm-dma-region 0000000000000000-000001ffffffffff (prio 0, i/o): realm-dma-region There are at least two problems with this approach: given that we use the PCI bus master address space, a vIOMMU cannot install its own address space at the moment. And since sysbus devices can't have an IOMMU at the moment, DMA from non-PCI devices isn't supported. Signed-off-by: Jean-Philippe Brucker (cherry picked from commit b0bbe62ff73bc3be5943fabe01e5dd857bcb6ada https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- hw/arm/virt.c | 2 + target/arm/kvm-rme.c | 227 ++++++++++++++++++++++++++++++++++++++++++ target/arm/kvm-stub.c | 5 + target/arm/kvm_arm.h | 10 ++ 4 files changed, 244 insertions(+) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 2c71928b8c..2460b2637d 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -2594,6 +2594,8 @@ static void machvirt_init(MachineState *machine) vms->fw_cfg, OBJECT(vms)); } + kvm_arm_rme_init_gpa_space(vms->highest_gpa, vms->bus); + vms->bootinfo.ram_size = machine->ram_size; vms->bootinfo.board_id = -1; vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base; diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c index c43f1adc88..9978120007 100644 --- a/target/arm/kvm-rme.c +++ b/target/arm/kvm-rme.c @@ -9,6 +9,7 @@ #include "hw/boards.h" #include "hw/core/cpu.h" #include "hw/loader.h" +#include "hw/pci/pci.h" #include "kvm_arm.h" #include "migration/blocker.h" #include "qapi/error.h" @@ -24,6 +25,35 @@ OBJECT_DECLARE_SIMPLE_TYPE(RmeGuest, RME_GUEST) #define RME_PAGE_SIZE qemu_real_host_page_size() +/* + * Realms have a split guest-physical address space: the bottom half is private + * to the realm, and the top half is shared with the host. Within QEMU, we use a + * merged view of both halves. Most of RAM is private to the guest and not + * accessible to us, but the guest shares some pages with us. + * + * For DMA, devices generally target the shared half (top) of the guest address + * space. Only the devices trusted by the guest (using mechanisms like TDISP for + * device authentication) can access the bottom half. + * + * RealmDmaRegion performs remapping of top-half accesses to system memory. + */ +struct RealmDmaRegion { + IOMMUMemoryRegion parent_obj; +}; + +#define TYPE_REALM_DMA_REGION "realm-dma-region" +OBJECT_DECLARE_SIMPLE_TYPE(RealmDmaRegion, REALM_DMA_REGION) +OBJECT_DEFINE_SIMPLE_TYPE(RealmDmaRegion, realm_dma_region, + REALM_DMA_REGION, IOMMU_MEMORY_REGION); + +typedef struct RealmRamDiscardListener { + MemoryRegion *mr; + hwaddr offset_within_address_space; + uint64_t granularity; + RamDiscardListener listener; + QLIST_ENTRY(RealmRamDiscardListener) rrdl_next; +} RealmRamDiscardListener; + typedef struct { hwaddr base; hwaddr size; @@ -39,6 +69,12 @@ struct RmeGuest { RmeGuestMeasurementAlgorithm measurement_algo; RmeRamRegion init_ram; + uint8_t ipa_bits; + + RealmDmaRegion *dma_region; + QLIST_HEAD(, RealmRamDiscardListener) ram_discard_list; + MemoryListener memory_listener; + AddressSpace dma_as; }; OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RmeGuest, rme_guest, RME_GUEST, @@ -305,6 +341,7 @@ static void rme_guest_init(Object *obj) static void rme_guest_finalize(Object *obj) { + memory_listener_unregister(&rme_guest->memory_listener); } static gint rme_compare_ram_regions(gconstpointer a, gconstpointer b) @@ -405,3 +442,193 @@ int kvm_arm_rme_vm_type(MachineState *ms) } return 0; } + +static int rme_ram_discard_notify(RamDiscardListener *rdl, + MemoryRegionSection *section, + bool populate) +{ + hwaddr gpa, next; + IOMMUTLBEvent event; + const hwaddr end = section->offset_within_address_space + + int128_get64(section->size); + const hwaddr address_mask = MAKE_64BIT_MASK(0, rme_guest->ipa_bits - 1); + RealmRamDiscardListener *rrdl = container_of(rdl, RealmRamDiscardListener, + listener); + + assert(rme_guest->dma_region != NULL); + + event.type = populate ? IOMMU_NOTIFIER_MAP : IOMMU_NOTIFIER_UNMAP; + event.entry.target_as = &address_space_memory; + event.entry.perm = populate ? IOMMU_RW : IOMMU_NONE; + event.entry.addr_mask = rrdl->granularity - 1; + + assert(end <= address_mask); + + /* + * Create IOMMU mappings from the top half of the address space to the RAM + * region. + */ + for (gpa = section->offset_within_address_space; gpa < end; gpa = next) { + event.entry.iova = gpa + address_mask + 1; + event.entry.translated_addr = gpa; + memory_region_notify_iommu(IOMMU_MEMORY_REGION(rme_guest->dma_region), + 0, event); + + next = ROUND_UP(gpa + 1, rrdl->granularity); + next = MIN(next, end); + } + + return 0; +} + +static int rme_ram_discard_notify_populate(RamDiscardListener *rdl, + MemoryRegionSection *section) +{ + return rme_ram_discard_notify(rdl, section, /* populate */ true); +} + +static void rme_ram_discard_notify_discard(RamDiscardListener *rdl, + MemoryRegionSection *section) +{ + rme_ram_discard_notify(rdl, section, /* populate */ false); +} + +/* Install a RAM discard listener */ +static void rme_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + RealmRamDiscardListener *rrdl; + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); + + if (!rdm) { + return; + } + + rrdl = g_new0(RealmRamDiscardListener, 1); + rrdl->mr = section->mr; + rrdl->offset_within_address_space = section->offset_within_address_space; + rrdl->granularity = ram_discard_manager_get_min_granularity(rdm, + section->mr); + QLIST_INSERT_HEAD(&rme_guest->ram_discard_list, rrdl, rrdl_next); + + ram_discard_listener_init(&rrdl->listener, + rme_ram_discard_notify_populate, + rme_ram_discard_notify_discard, true); + ram_discard_manager_register_listener(rdm, &rrdl->listener, section); +} + +static void rme_listener_region_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + RealmRamDiscardListener *rrdl; + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); + + if (!rdm) { + return; + } + + QLIST_FOREACH(rrdl, &rme_guest->ram_discard_list, rrdl_next) { + if (rrdl->mr == section->mr && rrdl->offset_within_address_space == + section->offset_within_address_space) { + ram_discard_manager_unregister_listener(rdm, &rrdl->listener); + g_free(rrdl); + break; + } + } +} + +static AddressSpace *rme_dma_get_address_space(PCIBus *bus, void *opaque, + int devfn) +{ + return &rme_guest->dma_as; +} + +static const PCIIOMMUOps rme_dma_ops = { + .get_address_space = rme_dma_get_address_space, +}; + +void kvm_arm_rme_init_gpa_space(hwaddr highest_gpa, PCIBus *pci_bus) +{ + RealmDmaRegion *dma_region; + const unsigned int ipa_bits = 64 - clz64(highest_gpa) + 1; + + if (!rme_guest) { + return; + } + + assert(ipa_bits < 64); + + /* + * Setup a DMA translation from the shared top half of the guest-physical + * address space to our merged view of RAM. + */ + dma_region = g_new0(RealmDmaRegion, 1); + + memory_region_init_iommu(dma_region, sizeof(*dma_region), + TYPE_REALM_DMA_REGION, OBJECT(rme_guest), + "realm-dma-region", 1ULL << ipa_bits); + address_space_init(&rme_guest->dma_as, MEMORY_REGION(dma_region), + TYPE_REALM_DMA_REGION); + rme_guest->dma_region = dma_region; + + pci_setup_iommu(pci_bus, &rme_dma_ops, NULL); + + /* + * Install notifiers to forward RAM discard changes to the IOMMU notifiers + * (ie. tell VFIO to map shared pages and unmap private ones). + */ + rme_guest->memory_listener = (MemoryListener) { + .name = "rme", + .region_add = rme_listener_region_add, + .region_del = rme_listener_region_del, + }; + memory_listener_register(&rme_guest->memory_listener, + &address_space_memory); + + rme_guest->ipa_bits = ipa_bits; +} + +static void realm_dma_region_init(Object *obj) +{ +} + +static IOMMUTLBEntry realm_dma_region_translate(IOMMUMemoryRegion *mr, + hwaddr addr, + IOMMUAccessFlags flag, + int iommu_idx) +{ + const hwaddr address_mask = MAKE_64BIT_MASK(0, rme_guest->ipa_bits - 1); + IOMMUTLBEntry entry = { + .target_as = &address_space_memory, + .iova = addr, + .translated_addr = addr & address_mask, + /* + * Somewhat arbitrary granule for users that need one, such as + * address_space_get_iotlb_entry(). Should be relatively large to + * avoid frequent TLB misses. It can't be larger than memory region + * alignment (eg. address_mask) because that would mask the whole + * address, preventing vhost from finding the correct memory region. + */ + .addr_mask = 4 * KiB - 1, + .perm = IOMMU_RW, + }; + + return entry; +} + +static void realm_dma_region_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n) +{ + /* Nothing is shared at boot */ +} + +static void realm_dma_region_finalize(Object *obj) +{ +} + +static void realm_dma_region_class_init(ObjectClass *oc, const void *data) +{ + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(oc); + + imrc->translate = realm_dma_region_translate; + imrc->replay = realm_dma_region_replay; +} diff --git a/target/arm/kvm-stub.c b/target/arm/kvm-stub.c index b9e7634c3b..f57b26f679 100644 --- a/target/arm/kvm-stub.c +++ b/target/arm/kvm-stub.c @@ -56,6 +56,11 @@ void kvm_arm_rme_init_guest_ram(hwaddr base, size_t size) { } +void kvm_arm_rme_init_gpa_space(hwaddr highest_gpa, + PCIBus *pci_bus) +{ +} + /* * These functions should never actually be called without KVM support. */ diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h index d31d724761..25ac4e33db 100644 --- a/target/arm/kvm_arm.h +++ b/target/arm/kvm_arm.h @@ -310,4 +310,14 @@ int kvm_arm_rme_vcpu_init(CPUState *cs); */ void kvm_arm_rme_init_guest_ram(hwaddr base, size_t size); +/** + * kvm_arm_rme_setup_gpa + * @highest_gpa: highest address of the lower half of the guest address space + * @pci_bus: The main PCI bus, for which PCI queries DMA address spaces + * + * Setup the guest-physical address space for a Realm. Install a memory region + * and notifier to manage the shared upper half of the address space. + */ +void kvm_arm_rme_init_gpa_space(hwaddr highest_gpa, PCIBus *pci_bus); + #endif From 7dcc4818f2b63dd2be2078594f09b35cf89bbddf Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 12 Aug 2022 11:53:11 +0100 Subject: [PATCH 23/35] NVIDIA: SAUCE: hw/arm/virt: Move virt_flash_create() to machvirt_init() For confidential VMs we'll want to skip flash device creation. Unfortunately, in virt_instance_init() the machine->cgs member has not yet been initialized, so we cannot check whether confidential guest is enabled. Move virt_flash_create() to machvirt_init(), where we can access the machine->cgs member. Signed-off-by: Jean-Philippe Brucker (cherry picked from commit a158ed3493938b9d958dd64d3ee43b03030f0f9b https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- hw/arm/virt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 2460b2637d..2b4e2296d4 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -2258,6 +2258,8 @@ static void machvirt_init(MachineState *machine) unsigned int smp_cpus = machine->smp.cpus; unsigned int max_cpus = machine->smp.max_cpus; + virt_flash_create(vms); + possible_cpus = mc->possible_cpu_arch_ids(machine); /* @@ -3579,8 +3581,6 @@ static void virt_instance_init(Object *obj) vms->irqmap = a15irqmap; - virt_flash_create(vms); - vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); cxl_machine_init(obj, &vms->cxl_devices_state); From 00d30ebb24e79ba4c8ae8acc190383a935fe1dd2 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 12 Aug 2022 12:08:58 +0100 Subject: [PATCH 24/35] NVIDIA: SAUCE: hw/arm/virt: Use RAM instead of flash for confidential guest firmware The flash device that holds firmware code relies on read-only stage-2 mappings. Read accesses behave as RAM and write accesses as MMIO. Since the RMM does not support read-only mappings we cannot use the flash device as-is. That isn't a problem because the firmware does not want to disclose any information to the host, hence will not store its variables in clear persistent memory. We can therefore replace the flash device with RAM, and load the firmware there. Signed-off-by: Jean-Philippe Brucker (cherry picked from commit f00acbc1709d5064dad7a8bc36dffef601dfe699 https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- hw/arm/boot.c | 32 ++++++++++++++++++++++++++++-- hw/arm/virt.c | 45 +++++++++++++++++++++++++++++++++++++++++++ include/hw/arm/boot.h | 9 +++++++++ 3 files changed, 84 insertions(+), 2 deletions(-) diff --git a/hw/arm/boot.c b/hw/arm/boot.c index c6637a1fc7..397bf5a4ef 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -1263,7 +1263,31 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, } } -static void arm_setup_firmware_boot(ARMCPU *cpu, struct arm_boot_info *info) +static void arm_setup_confidential_firmware_boot(ARMCPU *cpu, + struct arm_boot_info *info, + const char *firmware_filename) +{ + ssize_t fw_size; + const char *fname; + AddressSpace *as = arm_boot_address_space(cpu, info); + + fname = qemu_find_file(QEMU_FILE_TYPE_BIOS, firmware_filename); + if (!fname) { + error_report("Could not find firmware image '%s'", firmware_filename); + exit(1); + } + + fw_size = load_image_targphys_as(firmware_filename, + info->firmware_base, + info->firmware_max_size, as); + if (fw_size <= 0) { + error_report("could not load firmware '%s'", firmware_filename); + exit(1); + } +} + +static void arm_setup_firmware_boot(ARMCPU *cpu, struct arm_boot_info *info, + const char *firmware_filename) { /* Set up for booting firmware (which might load a kernel via fw_cfg) */ @@ -1314,6 +1338,10 @@ static void arm_setup_firmware_boot(ARMCPU *cpu, struct arm_boot_info *info) } } + if (info->confidential) { + arm_setup_confidential_firmware_boot(cpu, info, firmware_filename); + } + /* * We will start from address 0 (typically a boot ROM image) in the * same way as hardware. Leave env->boot_info NULL, so that @@ -1360,7 +1388,7 @@ void arm_load_kernel(ARMCPU *cpu, MachineState *ms, struct arm_boot_info *info) /* Load the kernel. */ if (!info->kernel_filename || info->firmware_loaded) { - arm_setup_firmware_boot(cpu, info); + arm_setup_firmware_boot(cpu, info, ms->firmware); } else { arm_setup_direct_kernel_boot(cpu, info); } diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 2b4e2296d4..50d2edd59a 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1270,6 +1270,10 @@ static PFlashCFI01 *virt_flash_create1(VirtMachineState *vms, static void virt_flash_create(VirtMachineState *vms) { + if (virt_machine_is_confidential(vms)) { + return; + } + vms->flash[0] = virt_flash_create1(vms, "virt.flash0", "pflash0"); vms->flash[1] = virt_flash_create1(vms, "virt.flash1", "pflash1"); } @@ -1305,6 +1309,10 @@ static void virt_flash_map(VirtMachineState *vms, hwaddr flashsize = vms->memmap[VIRT_FLASH].size / 2; hwaddr flashbase = vms->memmap[VIRT_FLASH].base; + if (virt_machine_is_confidential(vms)) { + return; + } + virt_flash_map1(vms->flash[0], flashbase, flashsize, secure_sysmem); virt_flash_map1(vms->flash[1], flashbase + flashsize, flashsize, @@ -1320,6 +1328,10 @@ static void virt_flash_fdt(VirtMachineState *vms, MachineState *ms = MACHINE(vms); char *nodename; + if (virt_machine_is_confidential(vms)) { + return; + } + if (sysmem == secure_sysmem) { /* Report both flash devices as a single node in the DT */ nodename = g_strdup_printf("/flash@%" PRIx64, flashbase); @@ -1355,6 +1367,27 @@ static void virt_flash_fdt(VirtMachineState *vms, } } +static bool virt_confidential_firmware_init(VirtMachineState *vms, + MemoryRegion *sysmem) +{ + MemoryRegion *fw_ram; + hwaddr fw_base = vms->memmap[VIRT_FLASH].base; + hwaddr fw_size = vms->memmap[VIRT_FLASH].size; + + if (!MACHINE(vms)->firmware) { + return false; + } + + assert(machine_require_guest_memfd(MACHINE(vms))); + + fw_ram = g_new(MemoryRegion, 1); + memory_region_init_ram_guest_memfd(fw_ram, NULL, "fw_ram", fw_size, + &error_fatal); + memory_region_add_subregion(sysmem, fw_base, fw_ram); + + return true; +} + static bool virt_firmware_init(VirtMachineState *vms, MemoryRegion *sysmem, MemoryRegion *secure_sysmem) @@ -1363,6 +1396,15 @@ static bool virt_firmware_init(VirtMachineState *vms, const char *bios_name; BlockBackend *pflash_blk0; + /* + * For a confidential VM, the firmware image and any boot information, + * including EFI variables, are stored in RAM in order to be measurable and + * private. Create a RAM region and load the firmware image there. + */ + if (virt_machine_is_confidential(vms)) { + return virt_confidential_firmware_init(vms, sysmem); + } + /* Map legacy -drive if=pflash to machine properties */ for (i = 0; i < ARRAY_SIZE(vms->flash); i++) { pflash_cfi01_legacy_drive(vms->flash[i], @@ -2604,7 +2646,10 @@ static void machvirt_init(MachineState *machine) vms->bootinfo.get_dtb = machvirt_dtb; vms->bootinfo.skip_dtb_autoload = true; vms->bootinfo.firmware_loaded = firmware_loaded; + vms->bootinfo.firmware_base = vms->memmap[VIRT_FLASH].base; + vms->bootinfo.firmware_max_size = vms->memmap[VIRT_FLASH].size; vms->bootinfo.psci_conduit = vms->psci_conduit; + vms->bootinfo.confidential = virt_machine_is_confidential(vms); arm_load_kernel(ARM_CPU(first_cpu), machine, &vms->bootinfo); vms->machine_done.notify = virt_machine_done; diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h index a2e22bda8a..8a1bb11069 100644 --- a/include/hw/arm/boot.h +++ b/include/hw/arm/boot.h @@ -112,6 +112,10 @@ struct arm_boot_info { */ bool firmware_loaded; + /* Used when loading firmware into RAM */ + hwaddr firmware_base; + hwaddr firmware_max_size; + /* Address at which board specific loader/setup code exists. If enabled, * this code-blob will run before anything else. It must return to the * caller via the link register. There is no stack set up. Enabled by @@ -135,6 +139,11 @@ struct arm_boot_info { /* CPU having load the kernel and that should be the first to boot. */ ARMCPU *primary_cpu; + + /* + * Confidential guest boot loads everything into RAM so it can be measured. + */ + bool confidential; }; /** From 2a10eb4688e1267cb0ff86f1a87e889d4889b9c2 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 16 Apr 2025 13:40:08 +0100 Subject: [PATCH 25/35] NVIDIA: SAUCE: docs/interop/firmware.json: Add arm-rme firmware feature Some distributions provide packages continaing firmware to be run under QEMU, such as "qemu-efi-aarch64" or "edk2-aarch64". Those packages also contain descriptors in /usr/share/qemu/firmware/*.json listing the firmware features, so that environments like libvirt can figure out which firmware they can load. Define an optional feature for arm64 firmware to indicate that a firmware supports running in a Realm. Firmware implementations need extra support for running in a Realm, in particular to distinguish shared from private guest memory. Signed-off-by: Jean-Philippe Brucker (cherry picked from commit 248ac3e78c1881453d378a0c08e85267ff1e2e25 https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- docs/interop/firmware.json | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/interop/firmware.json b/docs/interop/firmware.json index 6bbe2cce0a..38ee8ef27e 100644 --- a/docs/interop/firmware.json +++ b/docs/interop/firmware.json @@ -159,6 +159,9 @@ # options related to this feature are documented in # "docs/system/i386/amd-memory-encryption.rst". # +# @arm-rme: The firmware supports running in a Realm, under the Arm Realm +# Management Extension (RME). +# # @intel-tdx: The firmware supports running under Intel Trust Domain # Extensions (TDX). # @@ -237,7 +240,7 @@ { 'enum' : 'FirmwareFeature', 'data' : [ 'acpi-s3', 'acpi-s4', 'amd-sev', 'amd-sev-es', 'amd-sev-snp', - 'intel-tdx', + 'arm-rme', 'intel-tdx', 'enrolled-keys', 'requires-smm', 'secure-boot', 'host-uefi-vars', 'verbose-dynamic', 'verbose-static' ] } From 4dc7c424d94feb2a50e2a287002c09767b167062 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 21 Feb 2024 13:58:14 +0000 Subject: [PATCH 26/35] NVIDIA: SAUCE: hw/arm/boot: Load DTB as is for confidential VMs For confidential VMs it may be necessary to measure the DTB, to ensure a malicious host does not insert harmful information in there. In case an external tool can generated and measured the DTB, load it as is without patching it. Signed-off-by: Jean-Philippe Brucker (cherry picked from commit 8d9f6274bbaa65234603277ee5a971e4de660015 https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- hw/arm/boot.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/hw/arm/boot.c b/hw/arm/boot.c index 397bf5a4ef..4b8ee5ffab 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -632,7 +632,14 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo, char **node_path; Error *err = NULL; - if (binfo->dtb_filename) { + if (binfo->dtb_filename && binfo->confidential) { + /* + * If the user is providing a DTB for a confidential VM, it is already + * tailored to this configuration and measured. Load it as is, without + * any modification. + */ + return rom_add_file_fixed_as(binfo->dtb_filename, addr, -1, as); + } else if (binfo->dtb_filename) { char *filename; filename = qemu_find_file(QEMU_FILE_TYPE_DTB, binfo->dtb_filename); if (!filename) { From dc9efb468ae67e21c6effc98d371f07a3f43e41e Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 26 Apr 2024 16:11:59 +0100 Subject: [PATCH 27/35] NVIDIA: SAUCE: hw/arm/boot: Skip bootloader for confidential guests An independent verifier needs to reconstruct the content of guest memory in order to attest that it is running trusted code. To avoid having to reconstruct the bootloader generated by QEMU, skip this step and jump directly to the kernel, with the DTB address in x0 as specified by the Linux boot protocol [1]. [1] https://docs.kernel.org/arch/arm64/booting.html Signed-off-by: Jean-Philippe Brucker (cherry picked from commit 7fd24221e2c996a29eb4665053d38e5fdd5df286 https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- hw/arm/boot.c | 23 +++++++++++++++++------ hw/arm/virt.c | 1 + include/hw/arm/boot.h | 6 ++++++ 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/hw/arm/boot.c b/hw/arm/boot.c index 4b8ee5ffab..5cf28945de 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -875,7 +875,13 @@ static void do_cpu_reset(void *opaque) if (cpu == info->primary_cpu) { AddressSpace *as = arm_boot_address_space(cpu, info); - cpu_set_pc(cs, info->loader_start); + if (info->skip_bootloader) { + assert(is_a64(env)); + env->xregs[0] = info->dtb_start; + cpu_set_pc(cs, info->entry); + } else { + cpu_set_pc(cs, info->loader_start); + } if (!have_dtb(info)) { if (old_param) { @@ -973,7 +979,8 @@ static ssize_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry, } static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base, - hwaddr *entry, AddressSpace *as) + hwaddr *entry, AddressSpace *as, + bool skip_bootloader) { hwaddr kernel_load_offset = KERNEL64_LOAD_ADDR; uint64_t kernel_size = 0; @@ -1025,7 +1032,8 @@ static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base, * bootloader, we can just load it starting at 2MB+offset rather * than 0MB + offset. */ - if (kernel_load_offset < BOOTLOADER_MAX_SIZE) { + if (kernel_load_offset < BOOTLOADER_MAX_SIZE && + !skip_bootloader) { kernel_load_offset += 2 * MiB; } } @@ -1109,7 +1117,8 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, } if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64) && kernel_size < 0) { kernel_size = load_aarch64_image(info->kernel_filename, - info->loader_start, &entry, as); + info->loader_start, &entry, as, + info->skip_bootloader); is_linux = 1; if (kernel_size >= 0) { image_low_addr = entry; @@ -1249,8 +1258,10 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, fixupcontext[FIXUP_ENTRYPOINT_LO] = entry; fixupcontext[FIXUP_ENTRYPOINT_HI] = entry >> 32; - arm_write_bootloader("bootloader", as, info->loader_start, - primary_loader, fixupcontext); + if (!info->skip_bootloader) { + arm_write_bootloader("bootloader", as, info->loader_start, + primary_loader, fixupcontext); + } if (info->write_board_setup) { info->write_board_setup(cpu, info); diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 50d2edd59a..0203f12a3a 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -2650,6 +2650,7 @@ static void machvirt_init(MachineState *machine) vms->bootinfo.firmware_max_size = vms->memmap[VIRT_FLASH].size; vms->bootinfo.psci_conduit = vms->psci_conduit; vms->bootinfo.confidential = virt_machine_is_confidential(vms); + vms->bootinfo.skip_bootloader = vms->bootinfo.confidential; arm_load_kernel(ARM_CPU(first_cpu), machine, &vms->bootinfo); vms->machine_done.notify = virt_machine_done; diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h index 8a1bb11069..6a07dff99d 100644 --- a/include/hw/arm/boot.h +++ b/include/hw/arm/boot.h @@ -140,6 +140,12 @@ struct arm_boot_info { /* CPU having load the kernel and that should be the first to boot. */ ARMCPU *primary_cpu; + /* + * Instead of starting in a small bootloader that jumps to the kernel, + * immediately start in the kernel. + */ + bool skip_bootloader; + /* * Confidential guest boot loads everything into RAM so it can be measured. */ From e8aea40e9b0b6d818f5416e4550f75ce60f493cd Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 7 Nov 2024 13:11:56 +0000 Subject: [PATCH 28/35] NVIDIA: SAUCE: hw/tpm: Add TPM event log Provide a library allowing the VMM to create an event log that describes what is loaded into memory. During remote attestation in confidential computing this helps an independent verifier reconstruct the initial measurements of a VM, which contain the initial state of memory and CPUs. We provide some definitions and structures described by the Trusted Computing Group (TCG) in "TCG PC Client Platform Firmware Profile Specification" Level 00 Version 1.06 Revision 52 [1]. This is the same format as used by UEFI, and UEFI could reuse this log after finding it in DT or ACPI tables, but can also copy its content into a new one. [1] https://trustedcomputinggroup.org/resource/pc-client-specific-platform-firmware-profile-specification/ Cc: Stefan Berger Signed-off-by: Jean-Philippe Brucker (cherry picked from commit 8720d1c59bc738711b0e2bc4b656ad9cd0beba44 https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- hw/tpm/Kconfig | 4 + hw/tpm/meson.build | 1 + hw/tpm/tpm_log.c | 325 +++++++++++++++++++++++++++++++++++++++ include/hw/tpm/tpm_log.h | 89 +++++++++++ qapi/tpm.json | 14 ++ 5 files changed, 433 insertions(+) create mode 100644 hw/tpm/tpm_log.c create mode 100644 include/hw/tpm/tpm_log.h diff --git a/hw/tpm/Kconfig b/hw/tpm/Kconfig index a46663288c..70694b14a3 100644 --- a/hw/tpm/Kconfig +++ b/hw/tpm/Kconfig @@ -30,3 +30,7 @@ config TPM_SPAPR default y depends on TPM && PSERIES select TPM_BACKEND + +config TPM_LOG + bool + default y diff --git a/hw/tpm/meson.build b/hw/tpm/meson.build index 6968e60b3f..81efb557f3 100644 --- a/hw/tpm/meson.build +++ b/hw/tpm/meson.build @@ -6,4 +6,5 @@ system_ss.add(when: 'CONFIG_TPM_CRB', if_true: files('tpm_crb.c')) system_ss.add(when: 'CONFIG_TPM_TIS', if_true: files('tpm_ppi.c')) system_ss.add(when: 'CONFIG_TPM_CRB', if_true: files('tpm_ppi.c')) +system_ss.add(when: 'CONFIG_TPM_LOG', if_true: files('tpm_log.c')) specific_ss.add(when: 'CONFIG_TPM_SPAPR', if_true: files('tpm_spapr.c')) diff --git a/hw/tpm/tpm_log.c b/hw/tpm/tpm_log.c new file mode 100644 index 0000000000..93829a156d --- /dev/null +++ b/hw/tpm/tpm_log.c @@ -0,0 +1,325 @@ +/* + * tpm_log.c - Event log as described by the Trusted Computing Group (TCG) + * + * Copyright (c) 2024 Linaro Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + * Create an event log in the format specified by: + * + * TCG PC Client Platform Firmware Profile Specification + * Level 00 Version 1.06 Revision 52 + * Family “2.0” + */ + +#include "qemu/osdep.h" + +#include "crypto/hash.h" +#include "hw/tpm/tpm_log.h" +#include "qapi/error.h" +#include "qemu/bswap.h" +#include "qom/object_interfaces.h" +#include "system/address-spaces.h" +#include "system/memory.h" + +/* + * Legacy structure used only in the first event in the log, for compatibility + */ +struct TcgPcClientPcrEvent { + uint32_t pcr_index; + uint32_t event_type; + uint8_t digest[20]; + uint32_t event_data_size; + uint8_t event[]; +} QEMU_PACKED; + +struct TcgEfiSpecIdEvent { + uint8_t signature[16]; + uint32_t platform_class; + uint8_t family_version_minor; + uint8_t family_version_major; + uint8_t spec_revision; + uint8_t uintn_size; + uint32_t number_of_algorithms; /* 1 */ + /* + * For now we declare a single algo, but if we want UEFI to reuse this + * header then we'd need to add entries here for all algos supported by + * UEFI (and expand the digest field for EV_NO_ACTION). + */ + uint16_t algorithm_id; + uint16_t digest_size; + uint8_t vendor_info_size; + uint8_t vendor_info[]; +} QEMU_PACKED; + +struct TcgPcrEvent2Head { + uint32_t pcr_index; + uint32_t event_type; + /* variable-sized digests */ + uint8_t digests[]; +} QEMU_PACKED; + +struct TcgPcrEvent2Tail { + uint32_t event_size; + uint8_t event[]; +} QEMU_PACKED; + +struct TpmlDigestValues { + uint32_t count; /* 1 */ + uint16_t hash_alg; + uint8_t digest[]; +} QEMU_PACKED; + +struct TpmLog { + Object parent_obj; + + TpmLogDigestAlgo digest_algo; + size_t max_size; + uint64_t load_addr; + + uint16_t tcg_algo; + GByteArray *content; + uint8_t *digest; + size_t digest_size; +}; + +OBJECT_DEFINE_SIMPLE_TYPE(TpmLog, tpm_log, TPM_LOG, OBJECT) + +static void tpm_log_init(Object *obj) +{ + TpmLog *log = TPM_LOG(obj); + + log->digest_algo = TPM_LOG_DIGEST_ALGO_SHA256; +} + +static void tpm_log_destroy(TpmLog *log) +{ + if (!log->content) { + return; + } + g_free(log->digest); + log->digest = NULL; + g_byte_array_free(log->content, /* free_segment */ true); + log->content = NULL; +} + +static void tpm_log_finalize(Object *obj) +{ + tpm_log_destroy(TPM_LOG(obj)); +} + +static int tpm_log_get_digest_algo(Object *obj, Error **errp) +{ + TpmLog *log = TPM_LOG(obj); + + return log->digest_algo; +} + +static void tpm_log_set_digest_algo(Object *obj, int algo, Error **errp) +{ + TpmLog *log = TPM_LOG(obj); + + if (log->content != NULL) { + error_setg(errp, "cannot set digest algo after log creation"); + return; + } + + log->digest_algo = algo; +} + +static void tpm_log_get_max_size(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + TpmLog *log = TPM_LOG(obj); + uint64_t value = log->max_size; + + visit_type_uint64(v, name, &value, errp); +} + +static void tpm_log_get_load_addr(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + TpmLog *log = TPM_LOG(obj); + uint64_t value = log->load_addr; + + visit_type_uint64(v, name, &value, errp); +} + +static void tpm_log_set_load_addr(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + TpmLog *log = TPM_LOG(obj); + uint64_t value; + + if (!visit_type_uint64(v, name, &value, errp)) { + return; + } + + log->load_addr = value; +} + + +static void tpm_log_class_init(ObjectClass *oc, const void *data) +{ + object_class_property_add_enum(oc, "digest-algo", + "TpmLogDigestAlgo", + &TpmLogDigestAlgo_lookup, + tpm_log_get_digest_algo, + tpm_log_set_digest_algo); + object_class_property_set_description(oc, "digest-algo", + "Algorithm used to hash blobs added as events ('sha256', 'sha512')"); + + /* max_size is set while allocating the log in tpm_log_create */ + object_class_property_add(oc, "max-size", "uint64", tpm_log_get_max_size, + NULL, NULL, NULL); + object_class_property_set_description(oc, "max-size", + "Maximum size of the log, reserved in guest memory"); + + object_class_property_add(oc, "load-addr", "uint64", tpm_log_get_load_addr, + tpm_log_set_load_addr, NULL, NULL); + object_class_property_set_description(oc, "load-addr", + "Base address of the log in guest memory"); +} + +int tpm_log_create(TpmLog *log, size_t max_size, Error **errp) +{ + struct TcgEfiSpecIdEvent event; + struct TcgPcClientPcrEvent header = { + .pcr_index = 0, + .event_type = cpu_to_le32(TCG_EV_NO_ACTION), + .digest = {0}, + .event_data_size = cpu_to_le32(sizeof(event)), + }; + + log->content = g_byte_array_sized_new(max_size); + log->max_size = max_size; + + switch (log->digest_algo) { + case TPM_LOG_DIGEST_ALGO_SHA256: + log->tcg_algo = TCG_ALG_SHA256; + log->digest_size = TCG_ALG_SHA256_DIGEST_SIZE; + break; + case TPM_LOG_DIGEST_ALGO_SHA512: + log->tcg_algo = TCG_ALG_SHA512; + log->digest_size = TCG_ALG_SHA512_DIGEST_SIZE; + break; + default: + g_assert_not_reached(); + } + + log->digest = g_malloc0(log->digest_size); + + event = (struct TcgEfiSpecIdEvent) { + .signature = "Spec ID Event03", + .platform_class = 0, + .family_version_minor = 0, + .family_version_major = 2, + .spec_revision = 106, + .uintn_size = 2, /* UINT64 */ + .number_of_algorithms = cpu_to_le32(1), + .algorithm_id = cpu_to_le16(log->tcg_algo), + .digest_size = cpu_to_le16(log->digest_size), + .vendor_info_size = 0, + }; + + g_byte_array_append(log->content, (guint8 *)&header, sizeof(header)); + g_byte_array_append(log->content, (guint8 *)&event, sizeof(event)); + return 0; +} + +int tpm_log_add_event(TpmLog *log, uint32_t event_type, const uint8_t *event, + size_t event_size, const uint8_t *data, size_t data_size, + Error **errp) +{ + int digests = 0; + size_t rollback_len; + struct TcgPcrEvent2Head header = { + .pcr_index = 0, + .event_type = cpu_to_le32(event_type), + }; + struct TpmlDigestValues digest_header = {0}; + struct TcgPcrEvent2Tail tail = { + .event_size = cpu_to_le32(event_size), + }; + + if (log->content == NULL) { + error_setg(errp, "event log is not initialized"); + return -EINVAL; + } + rollback_len = log->content->len; + + g_byte_array_append(log->content, (guint8 *)&header, sizeof(header)); + + if (data) { + QCryptoHashAlgo qc_algo; + + digest_header.hash_alg = cpu_to_le16(log->tcg_algo); + switch (log->digest_algo) { + case TPM_LOG_DIGEST_ALGO_SHA256: + qc_algo = QCRYPTO_HASH_ALGO_SHA256; + break; + case TPM_LOG_DIGEST_ALGO_SHA512: + qc_algo = QCRYPTO_HASH_ALGO_SHA512; + break; + default: + g_assert_not_reached(); + } + if (qcrypto_hash_bytes(qc_algo, (const char *)data, data_size, + &log->digest, &log->digest_size, errp)) { + goto err_rollback; + } + digests = 1; + } else if (event_type == TCG_EV_NO_ACTION) { + /* EV_NO_ACTION contains empty digests for each supported algo */ + memset(log->digest, 0, log->digest_size); + digest_header.hash_alg = 0; + digests = 1; + } + + if (digests) { + digest_header.count = cpu_to_le32(digests); + g_byte_array_append(log->content, (guint8 *)&digest_header, + sizeof(digest_header)); + g_byte_array_append(log->content, log->digest, log->digest_size); + } else { + /* Add an empty digests list */ + g_byte_array_append(log->content, (guint8 *)&digest_header.count, + sizeof(digest_header.count)); + } + + g_byte_array_append(log->content, (guint8 *)&tail, sizeof(tail)); + g_byte_array_append(log->content, event, event_size); + + if (log->content->len > log->max_size) { + error_setg(errp, "event log exceeds max size"); + goto err_rollback; + } + + return 0; + +err_rollback: + g_byte_array_set_size(log->content, rollback_len); + return -1; +} + +int tpm_log_write_and_close(TpmLog *log, Error **errp) +{ + int ret; + + if (!log->content) { + error_setg(errp, "event log is not initialized"); + return -1; + } + + ret = address_space_write_rom(&address_space_memory, log->load_addr, + MEMTXATTRS_UNSPECIFIED, log->content->data, + log->content->len); + if (ret) { + error_setg(errp, "cannot load log into memory"); + return -1; + } + + tpm_log_destroy(log); + return ret; +} diff --git a/include/hw/tpm/tpm_log.h b/include/hw/tpm/tpm_log.h new file mode 100644 index 0000000000..3552b78dab --- /dev/null +++ b/include/hw/tpm/tpm_log.h @@ -0,0 +1,89 @@ +#ifndef QEMU_TPM_LOG_H +#define QEMU_TPM_LOG_H + +#include "qom/object.h" +#include "system/tpm.h" + +/* + * Defined in: TCG Algorithm Registry + * Family 2.0 Level 00 Revision 01.34 + * + * (Here TCG stands for Trusted Computing Group) + */ +#define TCG_ALG_SHA256 0xB +#define TCG_ALG_SHA512 0xD + +/* Size of a digest in bytes */ +#define TCG_ALG_SHA256_DIGEST_SIZE 32 +#define TCG_ALG_SHA512_DIGEST_SIZE 64 + +/* + * Defined in: TCG PC Client Platform Firmware Profile Specification + * Version 1.06 revision 52 + */ +#define TCG_EV_NO_ACTION 0x00000003 +#define TCG_EV_EVENT_TAG 0x00000006 +#define TCG_EV_POST_CODE2 0x00000013 +#define TCG_EV_EFI_PLATFORM_FIRMWARE_BLOB2 0x8000000A + +struct UefiPlatformFirmwareBlob2Head { + uint8_t blob_description_size; + uint8_t blob_description[]; +} __attribute__((packed)); + +struct UefiPlatformFirmwareBlob2Tail { + uint64_t blob_base; + uint64_t blob_size; +} __attribute__((packed)); + +#define TYPE_TPM_LOG "tpm-log" + +OBJECT_DECLARE_SIMPLE_TYPE(TpmLog, TPM_LOG) + +/** + * tpm_log_create - Create the event log + * @log: the log object + * @max_size: maximum size of the log. Adding an event past that size will + * return an error + * @errp: pointer to a NULL-initialized error object + * + * Allocate the event log and create the initial entry (Spec ID Event03) + * describing the log format. + * + * Returns: 0 on success, -1 on error + */ +int tpm_log_create(TpmLog *log, size_t max_size, Error **errp); + +/** + * tpm_log_add_event - Append an event to the log + * @log: the log object + * @event_type: the `eventType` field in TCG_PCR_EVENT2 + * @event: the `event` field in TCG_PCR_EVENT2 + * @event_size: the `eventSize` field in TCG_PCR_EVENT2 + * @data: content to be hashed into the event digest. May be NULL. + * @data_size: size of @data. Should be zero when @data is NULL. + * @errp: pointer to a NULL-initialized error object + * + * Add a TCG_PCR_EVENT2 event to the event log. Depending on the event type, a + * data buffer may be hashed into the event digest (for example + * TCG_EV_EFI_PLATFORM_FIRMWARE_BLOB2 contains a digest of the blob.) + * + * Returns: 0 on success, -1 on error + */ +int tpm_log_add_event(TpmLog *log, uint32_t event_type, const uint8_t *event, + size_t event_size, const uint8_t *data, size_t data_size, + Error **errp); + +/** + * tpm_log_write_and_close - Move the log to guest memory + * @log: the log object + * @errp: pointer to a NULL-initialized error object + * + * Write the log into memory, at the address set in the load-addr property. + * After this operation, the log is not writable anymore. + * + * Return: 0 on success, -1 on error + */ +int tpm_log_write_and_close(TpmLog *log, Error **errp); + +#endif diff --git a/qapi/tpm.json b/qapi/tpm.json index 3f2850a573..e22b55f817 100644 --- a/qapi/tpm.json +++ b/qapi/tpm.json @@ -186,3 +186,17 @@ ## { 'command': 'query-tpm', 'returns': ['TPMInfo'], 'if': 'CONFIG_TPM' } + +## +# @TpmLogDigestAlgo: +# +# @sha256: Use the SHA256 algorithm +# +# @sha512: Use the SHA512 algorithm +# +# Algorithm to use for event log digests +# +# Since: 9.3 +## +{ 'enum': 'TpmLogDigestAlgo', + 'data': ['sha256', 'sha512'] } From 4ebea58952e95a011a4d3a1affa869f9ad3d4881 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 7 Nov 2024 14:03:34 +0000 Subject: [PATCH 29/35] NVIDIA: SAUCE: hw/core/loader: Add fields to RomLoaderNotify In order to write an event log, the ROM load notification handler needs two more fields. Signed-off-by: Jean-Philippe Brucker (cherry picked from commit 8aec62494a9e7ef4c0206d9aa5e5a378098f09e5 https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- hw/core/loader.c | 2 ++ include/hw/loader.h | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/hw/core/loader.c b/hw/core/loader.c index d4c2a2ec13..0b504feed2 100644 --- a/hw/core/loader.c +++ b/hw/core/loader.c @@ -1259,6 +1259,8 @@ static void rom_reset(void *unused) trace_loader_write_rom(rom->name, rom->addr, rom->datasize, rom->isrom); notify = (RomLoaderNotifyData) { + .name = rom->name, + .blob_ptr = rom->data, .addr = rom->addr, .len = rom->datasize, }; diff --git a/include/hw/loader.h b/include/hw/loader.h index 1290419913..0345c0f6bd 100644 --- a/include/hw/loader.h +++ b/include/hw/loader.h @@ -340,6 +340,10 @@ ssize_t rom_add_vga(const char *file); ssize_t rom_add_option(const char *file, int32_t bootindex); typedef struct RomLoaderNotifyData { + /* Description of the loaded ROM */ + const char *name; + /* Blob */ + void *blob_ptr; /* Address of the blob in guest memory */ hwaddr addr; /* Length of the blob */ From bbb5769197ac1e124b1a458dffd1ec5f6f780e72 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 7 Nov 2024 17:38:11 +0000 Subject: [PATCH 30/35] NVIDIA: SAUCE: target/arm/kvm-rme: Add measurement log Create an event log in the format defined by Trusted Computing Group for TPM2. It contains information about the VMM, the Realm parameters, any data loaded into guest memory before boot, and the initial vCPU state. The guest can access this log from RAM and send it to a verifier, to help the verifier independently compute the Realm Initial Measurement, and check that the data we load into guest RAM is known-good images. Without this log, in order to end up with the right Measurement, the verifier needs to guess what is loaded, where and in what order. Cc: Stefan Berger Signed-off-by: Jean-Philippe Brucker (cherry picked from commit c6f47fcd33cf73d2ba55cb4f4f6eaaedd144b2b7 https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- qapi/qom.json | 9 +- target/arm/Kconfig | 1 + target/arm/kvm-rme.c | 405 +++++++++++++++++++++++++++++++++++++++++- target/arm/kvm-stub.c | 5 + target/arm/kvm_arm.h | 10 ++ 5 files changed, 427 insertions(+), 3 deletions(-) diff --git a/qapi/qom.json b/qapi/qom.json index 5b423d5033..bd19b55d4d 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -1210,11 +1210,18 @@ # @measurement-algorithm: Realm measurement algorithm # (default: sha512) # +# @measurement-log: Enable a measurement log for the Realm. All events +# that contribute to the Realm Initial Measurement (RIM) are added +# to a log in TCG TPM2 format, which is itself loaded into Realm +# memory (unmeasured) and can then be read by a verifier to +# reconstruct the RIM. +# # Since: 10.0 ## { 'struct': 'RmeGuestProperties', 'data': { '*personalization-value': 'str', - '*measurement-algorithm': 'RmeGuestMeasurementAlgorithm' } } + '*measurement-algorithm': 'RmeGuestMeasurementAlgorithm', + '*measurement-log': 'bool'} } ## # @ObjectType: diff --git a/target/arm/Kconfig b/target/arm/Kconfig index 7f8a2217ae..ee3a2184d0 100644 --- a/target/arm/Kconfig +++ b/target/arm/Kconfig @@ -13,3 +13,4 @@ config AARCH64 select ARM # kvm_arch_fixup_msi_route() needs to access PCIDevice select PCI if KVM + select TPM_LOG if KVM diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c index 9978120007..91730ae2f1 100644 --- a/target/arm/kvm-rme.c +++ b/target/arm/kvm-rme.c @@ -10,11 +10,13 @@ #include "hw/core/cpu.h" #include "hw/loader.h" #include "hw/pci/pci.h" +#include "hw/tpm/tpm_log.h" #include "kvm_arm.h" #include "migration/blocker.h" #include "qapi/error.h" #include "qemu/base64.h" #include "qemu/error-report.h" +#include "qemu/units.h" #include "qom/object_interfaces.h" #include "system/confidential-guest-support.h" #include "system/kvm.h" @@ -25,6 +27,14 @@ OBJECT_DECLARE_SIMPLE_TYPE(RmeGuest, RME_GUEST) #define RME_PAGE_SIZE qemu_real_host_page_size() +#define RME_MEASUREMENT_LOG_SIZE (64 * KiB) + +typedef struct RmeLogFiletype { + uint32_t event_type; + /* Description copied into the log event */ + const char *desc; +} RmeLogFiletype; + /* * Realms have a split guest-physical address space: the bottom half is private * to the realm, and the top half is shared with the host. Within QEMU, we use a @@ -57,6 +67,8 @@ typedef struct RealmRamDiscardListener { typedef struct { hwaddr base; hwaddr size; + uint8_t *blob_ptr; + RmeLogFiletype *filetype; } RmeRamRegion; struct RmeGuest { @@ -67,22 +79,335 @@ struct RmeGuest { char *personalization_value_str; uint8_t personalization_value[ARM_RME_CONFIG_RPV_SIZE]; RmeGuestMeasurementAlgorithm measurement_algo; + bool use_measurement_log; RmeRamRegion init_ram; uint8_t ipa_bits; + size_t num_cpus; RealmDmaRegion *dma_region; QLIST_HEAD(, RealmRamDiscardListener) ram_discard_list; MemoryListener memory_listener; AddressSpace dma_as; + + TpmLog *log; + GHashTable *images; }; OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RmeGuest, rme_guest, RME_GUEST, CONFIDENTIAL_GUEST_SUPPORT, { TYPE_USER_CREATABLE }, { }) +typedef struct { + char signature[16]; + char name[32]; + char version[40]; + uint64_t ram_size; + uint32_t num_cpus; + uint64_t flags; +} EventLogVmmVersion; + +typedef struct { + uint32_t id; + uint32_t data_size; + uint8_t data[]; +} EventLogTagged; + +#define EVENT_LOG_TAG_REALM_CREATE 1 +#define EVENT_LOG_TAG_INIT_RIPAS 2 +#define EVENT_LOG_TAG_REC_CREATE 3 + +#define REALM_PARAMS_FLAG_SVE (1 << 1) +#define REALM_PARAMS_FLAG_PMU (1 << 2) + +#define REC_CREATE_FLAG_RUNNABLE (1 << 0) + static RmeGuest *rme_guest; +static int rme_init_measurement_log(MachineState *ms) +{ + Object *log; + gpointer filename; + TpmLogDigestAlgo algo; + RmeLogFiletype *filetype; + + if (!rme_guest->use_measurement_log) { + return 0; + } + + switch (rme_guest->measurement_algo) { + case RME_GUEST_MEASUREMENT_ALGORITHM_SHA256: + algo = TPM_LOG_DIGEST_ALGO_SHA256; + break; + case RME_GUEST_MEASUREMENT_ALGORITHM_SHA512: + algo = TPM_LOG_DIGEST_ALGO_SHA512; + break; + default: + g_assert_not_reached(); + } + + log = object_new_with_props(TYPE_TPM_LOG, OBJECT(rme_guest), + "log", &error_fatal, + "digest-algo", TpmLogDigestAlgo_str(algo), + NULL); + + tpm_log_create(TPM_LOG(log), RME_MEASUREMENT_LOG_SIZE, &error_fatal); + rme_guest->log = TPM_LOG(log); + + /* + * Write down the image names we're expecting to encounter when handling the + * ROM load notifications, so we can record the type of image being loaded + * to help the verifier. + */ + rme_guest->images = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, + g_free); + + filename = g_strdup(ms->kernel_filename); + if (filename) { + filetype = g_new0(RmeLogFiletype, 1); + filetype->event_type = TCG_EV_POST_CODE2; + filetype->desc = "KERNEL"; + g_hash_table_insert(rme_guest->images, filename, (gpointer)filetype); + } + + filename = g_strdup(ms->initrd_filename); + if (filename) { + filetype = g_new0(RmeLogFiletype, 1); + filetype->event_type = TCG_EV_POST_CODE2; + filetype->desc = "INITRD"; + g_hash_table_insert(rme_guest->images, filename, (gpointer)filetype); + } + + filename = g_strdup(ms->firmware); + if (filename) { + filetype = g_new0(RmeLogFiletype, 1); + filetype->event_type = TCG_EV_EFI_PLATFORM_FIRMWARE_BLOB2; + filetype->desc = "FIRMWARE"; + g_hash_table_insert(rme_guest->images, filename, filetype); + } + + filename = g_strdup(ms->dtb); + if (!filename) { + filename = g_strdup("dtb"); + } + filetype = g_new0(RmeLogFiletype, 1); + filetype->event_type = TCG_EV_POST_CODE2; + filetype->desc = "DTB"; + g_hash_table_insert(rme_guest->images, filename, filetype); + + return 0; +} + +static int rme_log_event_tag(uint32_t id, uint8_t *data, size_t size, + Error **errp) +{ + int ret; + EventLogTagged event = { + .id = id, + .data_size = size, + }; + GByteArray *bytes = g_byte_array_new(); + + if (!rme_guest->log) { + return 0; + } + + g_byte_array_append(bytes, (uint8_t *)&event, sizeof(event)); + g_byte_array_append(bytes, data, size); + ret = tpm_log_add_event(rme_guest->log, TCG_EV_EVENT_TAG, bytes->data, + bytes->len, NULL, 0, errp); + g_byte_array_free(bytes, true); + return ret; +} + +/* Log VM type and Realm Descriptor create */ +static int rme_log_realm_create(Error **errp) +{ + int ret; + ARMCPU *cpu; + EventLogVmmVersion vmm_version = { + .signature = "VM VERSION", + .name = "QEMU", + .version = QEMU_VERSION, + .ram_size = cpu_to_le64(rme_guest->init_ram.size), + .num_cpus = cpu_to_le32(rme_guest->num_cpus), + .flags = 0, + }; + struct { + uint64_t flags; + uint8_t s2sz; + uint8_t sve_vl; + uint8_t num_bps; + uint8_t num_wps; + uint8_t pmu_num_ctrs; + uint8_t hash_algo; + } params = { + .s2sz = rme_guest->ipa_bits, + }; + + if (!rme_guest->log) { + return 0; + } + + ret = tpm_log_add_event(rme_guest->log, TCG_EV_NO_ACTION, + (uint8_t *)&vmm_version, sizeof(vmm_version), + NULL, 0, errp); + if (ret) { + return ret; + } + + /* With KVM all CPUs have the same capability */ + cpu = ARM_CPU(first_cpu); + if (cpu->has_pmu) { + params.flags |= REALM_PARAMS_FLAG_PMU; + params.pmu_num_ctrs = FIELD_EX64(cpu->isar.reset_pmcr_el0, PMCR, N); + } + + if (cpu->sve_max_vq) { + params.flags |= REALM_PARAMS_FLAG_SVE; + params.sve_vl = cpu->sve_max_vq - 1; + } + params.num_bps = FIELD_EX64_IDREG(&cpu->isar, ID_AA64DFR0, BRPS); + params.num_wps = FIELD_EX64_IDREG(&cpu->isar, ID_AA64DFR0, WRPS); + + switch (rme_guest->measurement_algo) { + case RME_GUEST_MEASUREMENT_ALGORITHM_SHA256: + params.hash_algo = ARM_RME_CONFIG_HASH_ALGO_SHA256; + break; + case RME_GUEST_MEASUREMENT_ALGORITHM_SHA512: + params.hash_algo = ARM_RME_CONFIG_HASH_ALGO_SHA512; + break; + default: + g_assert_not_reached(); + } + + return rme_log_event_tag(EVENT_LOG_TAG_REALM_CREATE, (uint8_t *)¶ms, + sizeof(params), errp); +} + +/* unmeasured images are logged with @data == NULL */ +static int rme_log_image(RmeLogFiletype *filetype, uint8_t *data, hwaddr base, + size_t size, Error **errp) +{ + int ret; + size_t desc_size; + GByteArray *event = g_byte_array_new(); + struct UefiPlatformFirmwareBlob2Head head = {0}; + struct UefiPlatformFirmwareBlob2Tail tail = {0}; + + if (!rme_guest->log) { + return 0; + } + + if (!filetype) { + error_setg(errp, "cannot log image without a filetype"); + return -1; + } + + /* EV_POST_CODE2 strings are not NUL-terminated */ + desc_size = strlen(filetype->desc); + head.blob_description_size = desc_size; + tail.blob_base = cpu_to_le64(base); + tail.blob_size = cpu_to_le64(size); + + g_byte_array_append(event, (guint8 *)&head, sizeof(head)); + g_byte_array_append(event, (guint8 *)filetype->desc, desc_size); + g_byte_array_append(event, (guint8 *)&tail, sizeof(tail)); + + ret = tpm_log_add_event(rme_guest->log, filetype->event_type, event->data, + event->len, data, size, errp); + g_byte_array_free(event, true); + return ret; +} + +static int rme_log_ripas(hwaddr base, size_t size, Error **errp) +{ + struct { + uint64_t base; + uint64_t size; + } init_ripas = { + .base = cpu_to_le64(base), + .size = cpu_to_le64(size), + }; + + return rme_log_event_tag(EVENT_LOG_TAG_INIT_RIPAS, (uint8_t *)&init_ripas, + sizeof(init_ripas), errp); +} + +static int rme_log_rec(uint64_t flags, uint64_t pc, uint64_t gprs[8], Error **errp) +{ + struct { + uint64_t flags; + uint64_t pc; + uint64_t gprs[8]; + } rec_create = { + .flags = cpu_to_le64(flags), + .pc = cpu_to_le64(pc), + .gprs[0] = cpu_to_le64(gprs[0]), + .gprs[1] = cpu_to_le64(gprs[1]), + .gprs[2] = cpu_to_le64(gprs[2]), + .gprs[3] = cpu_to_le64(gprs[3]), + .gprs[4] = cpu_to_le64(gprs[4]), + .gprs[5] = cpu_to_le64(gprs[5]), + .gprs[6] = cpu_to_le64(gprs[6]), + .gprs[7] = cpu_to_le64(gprs[7]), + }; + + return rme_log_event_tag(EVENT_LOG_TAG_REC_CREATE, (uint8_t *)&rec_create, + sizeof(rec_create), errp); +} + +static int rme_populate_range(hwaddr base, size_t size, bool measure, + Error **errp); + +static int rme_close_measurement_log(Error **errp) +{ + int ret; + hwaddr base; + size_t size; + RmeLogFiletype filetype = { + .event_type = TCG_EV_POST_CODE2, + .desc = "LOG", + }; + + if (!rme_guest->log) { + return 0; + } + + base = object_property_get_uint(OBJECT(rme_guest->log), "load-addr", errp); + if (*errp) { + return -1; + } + + size = object_property_get_uint(OBJECT(rme_guest->log), "max-size", errp); + if (*errp) { + return -1; + } + + /* Log the log itself */ + ret = rme_log_image(&filetype, NULL, base, size, errp); + if (ret) { + return ret; + } + + ret = tpm_log_write_and_close(rme_guest->log, errp); + if (ret) { + return ret; + } + + ret = rme_populate_range(base, size, /* measure */ false, errp); + if (ret) { + return ret; + } + + g_hash_table_destroy(rme_guest->images); + + /* The log is now in the guest. Free this object */ + object_unparent(OBJECT(rme_guest->log)); + rme_guest->log = NULL; + return 0; +} + static int rme_configure_one(RmeGuest *guest, uint32_t cfg, Error **errp) { int ret; @@ -156,9 +481,10 @@ static int rme_init_ram(RmeRamRegion *ram, Error **errp) error_setg_errno(errp, -ret, "failed to init RAM [0x%"HWADDR_PRIx", 0x%"HWADDR_PRIx")", start, end); + return ret; } - return ret; + return rme_log_ripas(ram->base, ram->size, errp); } static int rme_populate_range(hwaddr base, size_t size, bool measure, @@ -194,23 +520,42 @@ static void rme_populate_ram_region(gpointer data, gpointer err) } rme_populate_range(region->base, region->size, /* measure */ true, errp); + if (*errp) { + return; + } + + rme_log_image(region->filetype, region->blob_ptr, region->base, + region->size, errp); } static int rme_init_cpus(Error **errp) { int ret; CPUState *cs; + bool logged_primary_cpu = false; /* * Now that do_cpu_reset() initialized the boot PC and * kvm_cpu_synchronize_post_reset() registered it, we can finalize the REC. */ CPU_FOREACH(cs) { - ret = kvm_arm_vcpu_finalize(ARM_CPU(cs), KVM_ARM_VCPU_REC); + ARMCPU *cpu = ARM_CPU(cs); + + ret = kvm_arm_vcpu_finalize(cpu, KVM_ARM_VCPU_REC); if (ret) { error_setg_errno(errp, -ret, "failed to finalize vCPU"); return ret; } + + if (!logged_primary_cpu) { + ret = rme_log_rec(REC_CREATE_FLAG_RUNNABLE, cpu->env.pc, + cpu->env.xregs, errp); + if (ret) { + return ret; + } + + logged_primary_cpu = true; + } } return 0; } @@ -230,6 +575,10 @@ static int rme_create_realm(Error **errp) return -1; } + if (rme_log_realm_create(errp)) { + return -1; + } + if (rme_init_ram(&rme_guest->init_ram, errp)) { return -1; } @@ -244,6 +593,10 @@ static int rme_create_realm(Error **errp) return -1; } + if (rme_close_measurement_log(errp)) { + return -1; + } + ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, KVM_CAP_ARM_RME_ACTIVATE_REALM); if (ret) { @@ -313,6 +666,20 @@ static void rme_set_measurement_algo(Object *obj, int algo, Error **errp) guest->measurement_algo = algo; } +static bool rme_get_measurement_log(Object *obj, Error **errp) +{ + RmeGuest *guest = RME_GUEST(obj); + + return guest->use_measurement_log; +} + +static void rme_set_measurement_log(Object *obj, bool value, Error **errp) +{ + RmeGuest *guest = RME_GUEST(obj); + + guest->use_measurement_log = value; +} + static void rme_guest_class_init(ObjectClass *oc, const void *data) { object_class_property_add_str(oc, "personalization-value", rme_get_rpv, @@ -327,6 +694,12 @@ static void rme_guest_class_init(ObjectClass *oc, const void *data) rme_set_measurement_algo); object_class_property_set_description(oc, "measurement-algorithm", "Realm measurement algorithm ('sha256', 'sha512')"); + + object_class_property_add_bool(oc, "measurement-log", + rme_get_measurement_log, + rme_set_measurement_log); + object_class_property_set_description(oc, "measurement-log", + "Enable/disable Realm measurement log"); } static void rme_guest_init(Object *obj) @@ -370,6 +743,20 @@ static void rme_rom_load_notify(Notifier *notifier, void *data) region = g_new0(RmeRamRegion, 1); region->base = rom->addr; region->size = rom->len; + /* + * TODO: double-check lifetime. Is data is still available when we measure + * it, while writing the log. Should be fine since data is kept for the next + * reset. + */ + region->blob_ptr = rom->blob_ptr; + + /* + * rme_guest->images is destroyed after ram_regions, so we can store + * filetype even if we don't own the struct. + */ + if (rme_guest->images) { + region->filetype = g_hash_table_lookup(rme_guest->images, rom->name); + } /* * The Realm Initial Measurement (RIM) depends on the order in which we @@ -399,6 +786,12 @@ int kvm_arm_rme_init(MachineState *ms) return -ENODEV; } + if (rme_init_measurement_log(ms)) { + return -ENODEV; + } + + rme_guest->num_cpus = ms->smp.max_cpus; + error_setg(&rme_mig_blocker, "RME: migration is not implemented"); migrate_add_blocker(&rme_mig_blocker, &error_fatal); @@ -632,3 +1025,11 @@ static void realm_dma_region_class_init(ObjectClass *oc, const void *data) imrc->translate = realm_dma_region_translate; imrc->replay = realm_dma_region_replay; } + +Object *kvm_arm_rme_get_measurement_log(void) +{ + if (rme_guest && rme_guest->log) { + return OBJECT(rme_guest->log); + } + return NULL; +} diff --git a/target/arm/kvm-stub.c b/target/arm/kvm-stub.c index f57b26f679..fced12f91f 100644 --- a/target/arm/kvm-stub.c +++ b/target/arm/kvm-stub.c @@ -61,6 +61,11 @@ void kvm_arm_rme_init_gpa_space(hwaddr highest_gpa, { } +Object *kvm_arm_rme_get_measurement_log(void) +{ + return NULL; +} + /* * These functions should never actually be called without KVM support. */ diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h index 25ac4e33db..7c737fd35a 100644 --- a/target/arm/kvm_arm.h +++ b/target/arm/kvm_arm.h @@ -320,4 +320,14 @@ void kvm_arm_rme_init_guest_ram(hwaddr base, size_t size); */ void kvm_arm_rme_init_gpa_space(hwaddr highest_gpa, PCIBus *pci_bus); +/** + * kvm_arm_rme_get_measurement_log + * + * Obtain the measurement log object if enabled, in order to get its size and + * set its base address. + * + * Returns NULL if measurement log is disabled. + */ +Object *kvm_arm_rme_get_measurement_log(void); + #endif From 27d86f99ccd802cb06f91357d7dc8739a782adb6 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 7 Nov 2024 17:42:02 +0000 Subject: [PATCH 31/35] NVIDIA: SAUCE: hw/arm/virt: Add measurement log for confidential boot Create a measurement log describing operations performed by QEMU to initialize the guest, and load it into guest memory above the DTB. Cc: Stefan Berger Signed-off-by: Jean-Philippe Brucker (cherry picked from commit 3d45ff0ff24f4713c09d6355cc39689cf46b329a https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- hw/arm/boot.c | 47 +++++++++++++++++++++++++++++++++++++++++++ hw/arm/virt.c | 22 ++++++++++++++++++++ include/hw/arm/boot.h | 3 +++ include/hw/arm/virt.h | 1 + 4 files changed, 73 insertions(+) diff --git a/hw/arm/boot.c b/hw/arm/boot.c index 5cf28945de..417c835ebe 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -780,6 +780,24 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo, fdt_add_psci_node(fdt, cpu); + /* Add a reserved-memory node for the event log */ + if (binfo->log_size) { + char *nodename; + + qemu_fdt_add_subnode(fdt, "/reserved-memory"); + qemu_fdt_setprop_cell(fdt, "/reserved-memory", "#address-cells", 0x2); + qemu_fdt_setprop_cell(fdt, "/reserved-memory", "#size-cells", 0x2); + qemu_fdt_setprop(fdt, "/reserved-memory", "ranges", NULL, 0); + + nodename = g_strdup_printf("/reserved-memory/event-log@%" PRIx64, + binfo->log_paddr); + qemu_fdt_add_subnode(fdt, nodename); + qemu_fdt_setprop_string(fdt, nodename, "compatible", "cc-event-log"); + qemu_fdt_setprop_sized_cells(fdt, nodename, "reg", 2, binfo->log_paddr, + 2, binfo->log_size); + g_free(nodename); + } + if (binfo->modify_dtb) { binfo->modify_dtb(binfo, fdt); } @@ -1056,6 +1074,30 @@ static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base, return kernel_size; } +static void add_event_log(struct arm_boot_info *info) +{ + if (!info->log_size) { + return; + } + + if (!info->dtb_limit) { + int dtb_size = 0; + + if (!info->get_dtb(info, &dtb_size) || dtb_size == 0) { + error_report("Board does not have a DTB"); + exit(1); + } + info->dtb_limit = info->dtb_start + dtb_size; + } + + info->log_paddr = info->dtb_limit; + if (info->log_paddr + info->log_size > + info->loader_start + info->ram_size) { + error_report("Not enough space for measurement log and DTB"); + exit(1); + } +} + static void arm_setup_direct_kernel_boot(ARMCPU *cpu, struct arm_boot_info *info) { @@ -1103,6 +1145,7 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, } info->dtb_start = info->loader_start; info->dtb_limit = image_low_addr; + add_event_log(info); } } entry = elf_entry; @@ -1241,6 +1284,8 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, error_report("Not enough space for DTB after kernel/initrd"); exit(1); } + add_event_log(info); + fixupcontext[FIXUP_ARGPTR_LO] = info->dtb_start; fixupcontext[FIXUP_ARGPTR_HI] = info->dtb_start >> 32; } else { @@ -1302,6 +1347,8 @@ static void arm_setup_confidential_firmware_boot(ARMCPU *cpu, error_report("could not load firmware '%s'", firmware_filename); exit(1); } + + add_event_log(info); } static void arm_setup_firmware_boot(ARMCPU *cpu, struct arm_boot_info *info, diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 0203f12a3a..c1eb6aa796 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1938,6 +1938,11 @@ void virt_machine_done(Notifier *notifier, void *data) exit(1); } + if (vms->event_log) { + object_property_set_uint(vms->event_log, "load-addr", + vms->bootinfo.log_paddr, &error_fatal); + } + pci_bus_add_fw_cfg_extra_pci_roots(vms->fw_cfg, vms->bus, &error_abort); @@ -2283,6 +2288,21 @@ static void virt_post_cpus_gic_realized(VirtMachineState *vms, } } +static void create_measurement_log(VirtMachineState *vms) +{ + Error *err = NULL; + + vms->event_log = kvm_arm_rme_get_measurement_log(); + if (vms->event_log == NULL) { + return; + } + vms->bootinfo.log_size = object_property_get_uint(vms->event_log, + "max-size", &err); + if (err != NULL) { + error_report_err(err); + } +} + static void machvirt_init(MachineState *machine) { VirtMachineState *vms = VIRT_MACHINE(machine); @@ -2640,6 +2660,8 @@ static void machvirt_init(MachineState *machine) kvm_arm_rme_init_gpa_space(vms->highest_gpa, vms->bus); + create_measurement_log(vms); + vms->bootinfo.ram_size = machine->ram_size; vms->bootinfo.board_id = -1; vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base; diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h index 6a07dff99d..013db6cd97 100644 --- a/include/hw/arm/boot.h +++ b/include/hw/arm/boot.h @@ -150,6 +150,9 @@ struct arm_boot_info { * Confidential guest boot loads everything into RAM so it can be measured. */ bool confidential; + /* measurement log location in guest memory */ + hwaddr log_paddr; + size_t log_size; }; /** diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h index 20411e40ba..f50deda23f 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h @@ -182,6 +182,7 @@ struct VirtMachineState { CXLState cxl_devices_state; bool legacy_smmuv3_present; bool pci_preserve_config; + Object *event_log; }; #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) From 12264fe21c056502d9279675be6efeb499e3ddb6 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 21 Jul 2025 18:12:53 +0100 Subject: [PATCH 32/35] NVIDIA: SAUCE: HACK: hw/virtio/virtio-mem: Support guest-memfd-based RAM When plugging memory into a confidential guest, it starts as guest-private memory and doesn't need mappings in VFIO or vhost devices. When the guest decides to share this memory, the RAM discard listeners are notified. When the memory is unplugged, the guest first unshares the memory if necessary, again notifying the RAM discard listeners. So we don't need to install our own RAM discard listener. Signed-off-by: Jean-Philippe Brucker (cherry picked from commit 705dfb84a8ee44042d8682ce2e56289ea8344b81 https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- hw/virtio/virtio-mem.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index c46f6f9c3e..9bd7c6f462 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -1050,8 +1050,13 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) /* * Set ourselves as RamDiscardManager before the plug handler maps the * memory region and exposes it via an address space. + * + * Guest memfd takes precedence over our ram discard manager, because it + * notifies listeners of guest switching memory between shared and private, + * which also happens to hotplugged memory. */ - if (memory_region_set_ram_discard_manager(&vmem->memdev->mr, + if (!memory_region_has_guest_memfd(&vmem->memdev->mr) && + memory_region_set_ram_discard_manager(&vmem->memdev->mr, RAM_DISCARD_MANAGER(vmem))) { error_setg(errp, "Failed to set RamDiscardManager"); ram_block_coordinated_discard_require(false); @@ -1072,7 +1077,9 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb)); if (ret) { error_setg_errno(errp, -ret, "Unexpected error discarding RAM"); - memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); + if (!memory_region_has_guest_memfd(&vmem->memdev->mr)) { + memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); + } ram_block_coordinated_discard_require(false); return; } @@ -1157,7 +1164,9 @@ static void virtio_mem_device_unrealize(DeviceState *dev) * The unplug handler unmapped the memory region, it cannot be * found via an address space anymore. Unset ourselves. */ - memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); + if (!memory_region_has_guest_memfd(&vmem->memdev->mr)) { + memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); + } ram_block_coordinated_discard_require(false); } From b1a40d5840e1c31ee97e212c53ba169f08ddd155 Mon Sep 17 00:00:00 2001 From: Kazuhiro Abe Date: Thu, 3 Jul 2025 21:08:44 +0900 Subject: [PATCH 33/35] NVIDIA: SAUCE: target/arm/aarch64-qmp-cmds: Add Arm CCA support Add a QMP command to retrieve CCA capabilities; supported on aarch64 platforms only. Signed-off-by: Kazuhiro Abe (cherry picked from commit ce93295d18cede59d0207b52eadf8c4962755674 https://git.codelinaro.org/linaro/dcap/qemu.git) Signed-off-by: Ian May --- qapi/misc-arm.json | 46 ++++++++++++++++++++++++++++++++ stubs/meson.build | 1 + stubs/monitor-aarch64-cca.c | 12 +++++++++ target/arm/aarch64-qmp-cmds.c | 49 +++++++++++++++++++++++++++++++++++ target/arm/meson.build | 2 ++ 5 files changed, 110 insertions(+) create mode 100644 stubs/monitor-aarch64-cca.c create mode 100644 target/arm/aarch64-qmp-cmds.c diff --git a/qapi/misc-arm.json b/qapi/misc-arm.json index f921d740f1..fceab15a5d 100644 --- a/qapi/misc-arm.json +++ b/qapi/misc-arm.json @@ -45,3 +45,49 @@ # { "version": 3, "emulated": false, "kernel": true } ] } ## { 'command': 'query-gic-capabilities', 'returns': ['GICCapability'] } + +## +# @CcaMeasurementAlgo: +# +# The struct describes measurement-algo. +# +# @measurement-algo: measurement algorithm +# +# Since: 10.1 - Provisional +# This version is subject to change. +## +{ 'struct': 'CcaMeasurementAlgo', + 'data': { 'measurement-algo': 'str' } } + +## +# @CcaCapability: +# +# The struct describes capability for a ARM CCA. +# +# @sections: Supported to measurement algorithm list +# +# Since: 10.1 - Provisional +# This version is subject to change. +## +{ 'struct': 'CcaCapability', + 'data': { 'sections': ['CcaMeasurementAlgo'] } } + +## +# @query-cca-capabilities: +# +# This command is used to get the CCA capabilities, and is supported +# on ARM64 platforms only. +# +# Returns: CcaCapability objects. +# +# Since: 10.1 - Provisional +# This version is subject to change. +# +# .. qmp-example:: +# +# -> { "execute": "query-cca-capabilities" } +# <- { "return": { "sections": [{"measurement-algo": "sha256"}, +# {"measurement-algo": "sha512"}]} } +## +{ 'command': 'query-cca-capabilities', 'returns': 'CcaCapability' } + diff --git a/stubs/meson.build b/stubs/meson.build index cef046e685..63a75c7ae8 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -78,6 +78,7 @@ if have_system stub_ss.add(files('win32-kbd-hook.c')) stub_ss.add(files('xen-hw-stub.c')) stub_ss.add(files('monitor-arm-gic.c')) + stub_ss.add(files('monitor-aarch64-cca.c')) stub_ss.add(files('monitor-i386-rtc.c')) stub_ss.add(files('monitor-i386-sev.c')) stub_ss.add(files('monitor-i386-sgx.c')) diff --git a/stubs/monitor-aarch64-cca.c b/stubs/monitor-aarch64-cca.c new file mode 100644 index 0000000000..e14dab30da --- /dev/null +++ b/stubs/monitor-aarch64-cca.c @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-misc-arm.h" + + +CcaCapability *qmp_query_cca_capabilities(Error **errp) +{ + error_setg(errp, "ARM CCA is not available on this target"); + return NULL; +} diff --git a/target/arm/aarch64-qmp-cmds.c b/target/arm/aarch64-qmp-cmds.c new file mode 100644 index 0000000000..10e35e163d --- /dev/null +++ b/target/arm/aarch64-qmp-cmds.c @@ -0,0 +1,49 @@ +/* + * Support QMP command for AARCH64 + * + */ + +#include "qemu/osdep.h" +#include "kvm_arm.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-misc-arm.h" +#include "system/kvm.h" + +static CcaMeasurementAlgoList *cca_measurement_algo_sections(void) +{ + CcaMeasurementAlgoList *head = NULL, **tail = &head; + CcaMeasurementAlgo *malgo; + + malgo = g_new0(CcaMeasurementAlgo, 1); + malgo->measurement_algo = g_malloc(8); + memcpy(malgo->measurement_algo, "sha256", 7); + QAPI_LIST_APPEND(tail, malgo); + + malgo = g_new0(CcaMeasurementAlgo, 1); + malgo->measurement_algo = g_malloc(8); + memcpy(malgo->measurement_algo, "sha512", 7); + QAPI_LIST_APPEND(tail, malgo); + + return head; +} + +CcaCapability *qmp_query_cca_capabilities(Error **errp) +{ + CcaCapability *info = NULL; + + if (!kvm_enabled()) { + error_setg(errp, "KVM not enabled"); + return NULL; + } + + if (!kvm_check_extension(kvm_state, KVM_CAP_ARM_RME)) { + error_setg(errp, "RME is not enabled in KVM"); + return NULL; + } + + info = g_new0(CcaCapability, 1); + info->sections = cca_measurement_algo_sections(); + + return info; +} + diff --git a/target/arm/meson.build b/target/arm/meson.build index 6eab1c0385..18672d976f 100644 --- a/target/arm/meson.build +++ b/target/arm/meson.build @@ -13,6 +13,8 @@ arm_common_system_ss = ss.source_set() arm_system_ss.add(files( 'arm-qmp-cmds.c', )) +arm_system_ss.add(when: ['TARGET_AARCH64', 'CONFIG_KVM'], + if_true: files('aarch64-qmp-cmds.c')) arm_system_ss.add(when: 'CONFIG_KVM', if_true: files( 'hyp_gdbstub.c', From 23be9f122a2a6668dc506c2dff5cb67095b26d80 Mon Sep 17 00:00:00 2001 From: Ian May Date: Mon, 6 Oct 2025 14:58:41 -0700 Subject: [PATCH 34/35] NVIDIA: SAUCE: vfio/pci: Register BDF with kernel RME-DA requires knowledge of the virtual BDF that is assigned to an RME capable device. Register the BDF with kernel: - If the device is attached to the root-bus, register at the time the realm is activated. - If the device is attaches to a bridge, attempt registration when the config space is accessed. If the upstream device has the bus set and the BDF hasn't yet been registered, register the device. Signed-off-by: Arto Merilainen Signed-off-by: Ian May --- hw/vfio/pci.c | 48 ++++++++++++++++++++++++++++++++++++++ hw/vfio/pci.h | 3 +++ linux-headers/linux/vfio.h | 14 +++++++++++ 3 files changed, 65 insertions(+) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 8da9ab286b..81cd262e72 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -56,6 +56,7 @@ static KVMRouteChange vfio_route_change; static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); static void vfio_msi_disable_common(VFIOPCIDevice *vdev); +static void vfio_register_bdf(PCIDevice *pci_dev); /* Create new or reuse existing eventfd */ static bool vfio_notifier_init(VFIOPCIDevice *vdev, EventNotifier *e, @@ -1379,6 +1380,9 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) VFIODevice *vbasedev = &vdev->vbasedev; uint32_t emu_bits = 0, emu_val = 0, phys_val = 0, val; + /* Attempt registering device info to kernel. No-op if done already */ + vfio_register_bdf(pdev); + memcpy(&emu_bits, vdev->emulated_config_bits + addr, len); emu_bits = le32_to_cpu(emu_bits); @@ -1416,6 +1420,9 @@ void vfio_pci_write_config(PCIDevice *pdev, trace_vfio_pci_write_config(vdev->vbasedev.name, addr, val, len); + /* Attempt registering device info to kernel. No-op if done already */ + vfio_register_bdf(pdev); + /* Write everything to VFIO, let it filter out what we can't write */ ret = vfio_pci_config_space_write(vdev, addr, len, &val_le); if (ret != len) { @@ -3030,6 +3037,7 @@ bool vfio_pci_populate_device(VFIOPCIDevice *vdev, Error **errp) void vfio_pci_put_device(VFIOPCIDevice *vdev) { + qemu_del_vm_change_state_handler(vdev->vmstate); vfio_display_finalize(vdev); vfio_bars_finalize(vdev); vfio_cpr_pci_unregister_device(vdev); @@ -3613,6 +3621,40 @@ static void vfio_pci_reset(DeviceState *dev) vfio_pci_post_reset(vdev); } +static void vfio_register_bdf(PCIDevice *pci_dev) +{ + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pci_dev); + PCIBus *bus = pci_get_bus(pci_dev); + struct vfio_dev_info dev_info = { + .argsz = sizeof(dev_info), + .dev_num = (0ULL << 32) | (((uint64_t)pci_get_bdf(pci_dev)) << 8) + }; + + /* Info already set or bus identifier is not set. Skip */ + if (vdev->has_info_set || + !vdev->is_running || + (!pci_bus_is_root(bus) && + (pci_bus_num(bus) == 0))) + return; + + vdev->has_info_set = true; + + (void)ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_DEV_INFO, &dev_info); +} + +static void vfio_register_bdf_notifier(void *opaque, bool running, RunState state) +{ + VFIOPCIDevice *vdev = VFIO_PCI_BASE(opaque); + + if (!running) { + return; + } + + vdev->is_running = true; + + vfio_register_bdf(opaque); +} + static void vfio_instance_init(Object *obj) { PCIDevice *pci_dev = PCI_DEVICE(obj); @@ -3627,6 +3669,9 @@ static void vfio_instance_init(Object *obj) vdev->host.slot = ~0U; vdev->host.function = ~0U; + vdev->is_running = false; + vdev->has_info_set = false; + vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_pci_ops, DEVICE(vdev), false); @@ -3642,6 +3687,9 @@ static void vfio_instance_init(Object *obj) * may be lost. */ pci_dev->cap_present |= QEMU_PCI_SKIP_RESET_ON_CPR; + + vdev->vmstate = + qemu_add_vm_change_state_handler_prio(vfio_register_bdf_notifier, obj, 10); } static void vfio_pci_base_dev_class_init(ObjectClass *klass, const void *data) diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 810a842f4a..827454af67 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -186,9 +186,12 @@ struct VFIOPCIDevice { bool defer_kvm_irq_routing; bool clear_parent_atomics_on_exit; bool skip_vsc_check; + bool has_info_set; + bool is_running; VFIODisplay *dpy; Notifier irqchip_change_notifier; VFIOPCICPR cpr; + VMChangeStateEntry *vmstate; }; /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */ diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h index 79bf8c0cc5..3e715cbe2e 100644 --- a/linux-headers/linux/vfio.h +++ b/linux-headers/linux/vfio.h @@ -1831,6 +1831,20 @@ struct vfio_iommu_spapr_tce_remove { }; #define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20) +/** + * VFIO_DEVICE_SET_DEV_INFO - _IOW(VFIO_TYPE, VFIO_BASE + 22, struct vfio_dev_info) + * + * Sets information related to the VM that needs to be passed to the hypervisor + * for establishing mapping between the hardware and the identifiers used by the + * guest. + */ +struct vfio_dev_info { + __u32 argsz; + __u32 reserved; + __u64 dev_num; +}; +#define VFIO_DEVICE_SET_DEV_INFO _IO(VFIO_TYPE, VFIO_BASE + 22) + /* ***************************************************************** */ #endif /* VFIO_H */ From 9aff54402d090eecbd088ac065f742e1bfc9ef17 Mon Sep 17 00:00:00 2001 From: Ian May Date: Mon, 6 Oct 2025 15:23:44 -0700 Subject: [PATCH 35/35] NVIDIA: SAUCE: hw/arm/virt: Define MEC as shared or private Signed-off-by: Arto Merilainen Signed-off-by: Ian May --- linux-headers/asm-arm64/kvm.h | 5 +++++ qapi/qom.json | 5 ++++- target/arm/kvm-rme.c | 25 +++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h index 91ab0f2763..c969e7fc51 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h @@ -428,6 +428,7 @@ enum { /* List of configuration items accepted for KVM_CAP_ARM_RME_CONFIG_REALM */ #define ARM_RME_CONFIG_RPV 0 #define ARM_RME_CONFIG_HASH_ALGO 1 +#define ARM_RME_CONFIG_MEC 2 #define ARM_RME_CONFIG_HASH_ALGO_SHA256 0 #define ARM_RME_CONFIG_HASH_ALGO_SHA512 1 @@ -447,6 +448,10 @@ struct arm_rme_config { __u32 hash_algo; }; + /* cfg == ARM_RME_CONFIG_MEC */ + struct { + __u32 shared_mec; + }; /* Fix the size of the union */ __u8 reserved[256]; }; diff --git a/qapi/qom.json b/qapi/qom.json index bd19b55d4d..e731d0d545 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -1216,12 +1216,15 @@ # memory (unmeasured) and can then be read by a verifier to # reconstruct the RIM. # +# @shared-mec: Enable/disable usage of a shared MEC. +# # Since: 10.0 ## { 'struct': 'RmeGuestProperties', 'data': { '*personalization-value': 'str', '*measurement-algorithm': 'RmeGuestMeasurementAlgorithm', - '*measurement-log': 'bool'} } + '*measurement-log': 'bool', + '*shared-mec': 'bool'} } ## # @ObjectType: diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c index 91730ae2f1..ef1371acc5 100644 --- a/target/arm/kvm-rme.c +++ b/target/arm/kvm-rme.c @@ -80,6 +80,7 @@ struct RmeGuest { uint8_t personalization_value[ARM_RME_CONFIG_RPV_SIZE]; RmeGuestMeasurementAlgorithm measurement_algo; bool use_measurement_log; + bool use_shared_mec; RmeRamRegion init_ram; uint8_t ipa_bits; @@ -434,6 +435,9 @@ static int rme_configure_one(RmeGuest *guest, uint32_t cfg, Error **errp) } cfg_str = "hash algorithm"; break; + case ARM_RME_CONFIG_MEC: + args.shared_mec = guest->use_shared_mec; + break; default: g_assert_not_reached(); } @@ -453,6 +457,7 @@ static int rme_configure(Error **errp) const uint32_t config_options[] = { ARM_RME_CONFIG_RPV, ARM_RME_CONFIG_HASH_ALGO, + ARM_RME_CONFIG_MEC, }; for (option = 0; option < ARRAY_SIZE(config_options); option++) { @@ -680,6 +685,20 @@ static void rme_set_measurement_log(Object *obj, bool value, Error **errp) guest->use_measurement_log = value; } +static bool rme_get_shared_mec(Object *obj, Error **errp) +{ + RmeGuest *guest = RME_GUEST(obj); + + return guest->use_shared_mec; +} + +static void rme_set_shared_mec(Object *obj, bool value, Error **errp) +{ + RmeGuest *guest = RME_GUEST(obj); + + guest->use_shared_mec = value; +} + static void rme_guest_class_init(ObjectClass *oc, const void *data) { object_class_property_add_str(oc, "personalization-value", rme_get_rpv, @@ -700,6 +719,12 @@ static void rme_guest_class_init(ObjectClass *oc, const void *data) rme_set_measurement_log); object_class_property_set_description(oc, "measurement-log", "Enable/disable Realm measurement log"); + + object_class_property_add_bool(oc, "shared-mec", + rme_get_shared_mec, + rme_set_shared_mec); + object_class_property_set_description(oc, "shared-mec", + "Enable/disable usage of a shared Memory Encryption Context (MEC)"); } static void rme_guest_init(Object *obj)