diff --git a/.last_merge b/.last_merge index e5b78812092f..67bcf88fc09a 100644 --- a/.last_merge +++ b/.last_merge @@ -1 +1 @@ -freebsd-main-20240816 +freebsd-main-20240823 diff --git a/Makefile.inc1 b/Makefile.inc1 index d398da604523..fc3e7f669c2b 100644 --- a/Makefile.inc1 +++ b/Makefile.inc1 @@ -2135,6 +2135,10 @@ REPODIR?= ${OBJROOT}repo PKG_FORMAT?= tzst PKG_REPO_SIGNING_KEY?= # empty PKG_OUTPUT_DIR?= ${PKG_VERSION} +PKG_ABI_FILE?= ${WSTAGEDIR}/usr/bin/uname +.if make(create-*-packages*) || make(real-update-packages) || make(real-sign-packages) +PKG_ABI!= ${PKG_CMD} -o ABI_FILE=${PKG_ABI_FILE} config ABI +.endif .ORDER: stage-packages create-packages .ORDER: create-packages create-world-packages @@ -2147,12 +2151,6 @@ _pkgbootstrap: .PHONY @env ASSUME_ALWAYS_YES=YES pkg bootstrap .endif -# -# Determine PKG_ABI from newvers.sh if not already set. -# -.if !defined(PKG_ABI) && (make(create-world-packages-jobs) || make(create-kernel-packages*) || make(real-update-packages) || make (create-source-packages) || make(sign-packages)) -PKG_ABI=${_TYPE}:${MAJOR_REVISION}:${TARGET_ARCH} -.endif PKG_BIN_VERSION!=${PKG_CMD} --version /dev/null |\ awk -F. '/^[0-9.]+$$/ {print $$1 * 10000 + $$2 * 100 + $$3}' .if ${PKG_BIN_VERSION} < 11700 @@ -2162,8 +2160,7 @@ PKG_EXT= pkg .endif .if !defined(PKG_VERSION_FROM) && make(real-update-packages) -.if defined(PKG_ABI) -.if exists(${REPODIR}/${PKG_ABI}) +.if exists(${PKG_ABI_FILE}) && exists(${REPODIR}/${PKG_ABI}) PKG_VERSION_FROM!=/usr/bin/readlink ${REPODIR}/${PKG_ABI}/latest PKG_VERSION_FROM_DIR= ${REPODIR}/${PKG_ABI}/${PKG_VERSION_FROM} BRANCH_EXT_FROM= ${PKG_VERSION_FROM:C/.*([[:alpha:]][^\.]*).*/\1/} @@ -2173,7 +2170,6 @@ PKG_VERSION_FROM_DIR= BRANCH_EXT_FROM= .endif .endif -.endif PKGMAKEARGS+= PKG_VERSION=${PKG_VERSION} \ NO_INSTALLEXTRAKERNELS=${NO_INSTALLEXTRAKERNELS} @@ -2264,17 +2260,13 @@ create-packages-source: _pkgbootstrap _repodir .PHONY create-packages: .PHONY create-packages-world create-packages-kernel create-packages-source -create-source-packages: _pkgbootstrap .PHONY - rm -f ${SSTAGEDIR}/*.plist 2>/dev/null || : +create-source-src-package: _pkgbootstrap .PHONY + rm -f ${SSTAGEDIR}/src.plist 2>/dev/null || : .if !empty(GIT_CMD) && exists(${GIT_CMD}) && exists(${SRCDIR}/.git) @cd ${SRCDIR}; \ ( echo "@override_prefix /usr/src" ; \ ${GIT_CMD} ls-files --recurse-submodules ":!:sys/" ) \ > ${SSTAGEDIR}/src.plist - @cd ${SRCDIR}; \ - ( echo "@override_prefix /usr/src" ; \ - ${GIT_CMD} ls-files --recurse-submodules "sys/" ) \ - > ${SSTAGEDIR}/src-sys.plist ${SRCDIR}/release/packages/generate-ucl.lua \ PKGNAME "src" \ PKGGENNAME "src" \ @@ -2286,6 +2278,22 @@ create-source-packages: _pkgbootstrap .PHONY PKG_WWW "${PKG_WWW}" \ ${SRCDIR}/release/packages/template.ucl \ ${SSTAGEDIR}/src.ucl + ${PKG_CMD} -o ABI=${PKG_ABI} \ + -o OSVERSION="${SRCRELDATE}" \ + create -f ${PKG_FORMAT} \ + -M ${SSTAGEDIR}/src.ucl \ + -p ${SSTAGEDIR}/src.plist \ + -r ${SRCDIR} \ + -o ${REPODIR}/${PKG_ABI}/${PKG_OUTPUT_DIR} +.endif + +create-source-src-sys-package: _pkgbootstrap .PHONY + rm -f ${SSTAGEDIR}/src-sys.plist 2>/dev/null || : +.if !empty(GIT_CMD) && exists(${GIT_CMD}) && exists(${SRCDIR}/.git) + @cd ${SRCDIR}; \ + ( echo "@override_prefix /usr/src" ; \ + ${GIT_CMD} ls-files --recurse-submodules "sys/" ) \ + > ${SSTAGEDIR}/src-sys.plist ${SRCDIR}/release/packages/generate-ucl.lua \ PKGNAME "src-sys" \ PKGGENNAME "src" \ @@ -2297,13 +2305,6 @@ create-source-packages: _pkgbootstrap .PHONY PKG_WWW "${PKG_WWW}" \ ${SRCDIR}/release/packages/template.ucl \ ${SSTAGEDIR}/src-sys.ucl - ${PKG_CMD} -o ABI=${PKG_ABI} \ - -o OSVERSION="${SRCRELDATE}" \ - create -f ${PKG_FORMAT} \ - -M ${SSTAGEDIR}/src.ucl \ - -p ${SSTAGEDIR}/src.plist \ - -r ${SRCDIR} \ - -o ${REPODIR}/${PKG_ABI}/${PKG_OUTPUT_DIR} ${PKG_CMD} -o ABI=${PKG_ABI} \ -o OSVERSION="${SRCRELDATE}" \ create -f ${PKG_FORMAT} \ @@ -2313,6 +2314,8 @@ create-source-packages: _pkgbootstrap .PHONY -o ${REPODIR}/${PKG_ABI}/${PKG_OUTPUT_DIR} .endif +create-source-packages: .PHONY _pkgbootstrap create-source-src-package create-source-src-sys-package + create-world-packages: _pkgbootstrap .PHONY @rm -f ${WSTAGEDIR}/*.plist 2>/dev/null || : @cd ${WSTAGEDIR} ; \ @@ -2345,7 +2348,7 @@ create-world-package-${pkgname}: .PHONY @if [ "${pkgname}" == "runtime" ]; then \ sed -i '' -e "s/%VCS_REVISION%/${VCS_REVISION}/" ${WSTAGEDIR}/${pkgname}.ucl ; \ fi - ${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname -o ALLOW_BASE_SHLIBS=yes \ + ${PKG_CMD} -o ABI=${PKG_ABI} -o ALLOW_BASE_SHLIBS=yes \ -o OSVERSION="${SRCRELDATE}" \ create -f ${PKG_FORMAT} -M ${WSTAGEDIR}/${pkgname}.ucl \ -p ${WSTAGEDIR}/${pkgname}.plist \ @@ -2447,7 +2450,7 @@ create-kernel-packages-extra-flavor${flavor:C,^""$,${_default_flavor},}-${_kerne /name/ { printf("===> Creating %s-", $$2); next } \ /version/ {print $$2; next } ' \ ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl ; \ - ${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname -o ALLOW_BASE_SHLIBS=yes \ + ${PKG_CMD} -o ABI=${PKG_ABI} -o ALLOW_BASE_SHLIBS=yes \ -o OSVERSION="${SRCRELDATE}" \ create -f ${PKG_FORMAT} \ -M ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl \ @@ -2459,20 +2462,22 @@ create-kernel-packages-extra-flavor${flavor:C,^""$,${_default_flavor},}-${_kerne . endfor .endif -sign-packages: _pkgbootstrap .PHONY +sign-packages: .PHONY + ${_+_}@cd ${.CURDIR}; \ + ${MAKE} -f Makefile.inc1 PKG_VERSION=${PKG_VERSION} real-sign-packages + +real-sign-packages: _pkgbootstrap .PHONY printf "version = 2;\n" > ${WSTAGEDIR}/meta .if ${PKG_BIN_VERSION} < 11700 printf "packing_format = \"${PKG_FORMAT}\";\n" >> ${WSTAGEDIR}/meta .endif - @[ -L "${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname config ABI)/latest" ] && \ - unlink ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname config ABI)/latest ; \ - ${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname repo \ - -o OSVERSION="${SRCRELDATE}" \ + @[ -L "${REPODIR}/${PKG_ABI}/latest" ] && unlink ${REPODIR}/${PKG_ABI}/latest; \ + ${PKG_CMD} -o ABI=${PKG_ABI} repo -o OSVERSION="${SRCRELDATE}" \ -m ${WSTAGEDIR}/meta \ - -o ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname config ABI)/${PKG_VERSION} \ - ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname config ABI)/${PKG_VERSION} \ + -o ${REPODIR}/${PKG_ABI}/${PKG_VERSION} \ + ${REPODIR}/${PKG_ABI}/${PKG_VERSION} \ ${PKG_REPO_SIGNING_KEY} ; \ - cd ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname config ABI); \ + cd ${REPODIR}/${PKG_ABI}; \ ln -s ${PKG_OUTPUT_DIR} latest # diff --git a/contrib/jemalloc/include/jemalloc/jemalloc_FreeBSD.h b/contrib/jemalloc/include/jemalloc/jemalloc_FreeBSD.h index 8341c6a27892..5d65069637c4 100644 --- a/contrib/jemalloc/include/jemalloc/jemalloc_FreeBSD.h +++ b/contrib/jemalloc/include/jemalloc/jemalloc_FreeBSD.h @@ -143,7 +143,6 @@ extern int __isthreaded; #undef je_malloc_stats_print #undef je_allocm #undef je_rallocm -#undef je_sallocm #undef je_dallocm #undef je_nallocm #define je_malloc __malloc @@ -165,11 +164,6 @@ extern int __isthreaded; #define je_mallctlnametomib __mallctlnametomib #define je_mallctlbymib __mallctlbymib #define je_malloc_stats_print __malloc_stats_print -#define je_allocm __allocm -#define je_rallocm __rallocm -#define je_sallocm __sallocm -#define je_dallocm __dallocm -#define je_nallocm __nallocm #define open _open #define read _read #define write _write @@ -216,10 +210,20 @@ __weak_reference(__mallctlnametomib, mallctlnametomib); __weak_reference(__mallctlbymib, mallctlbymib); __weak_reference(__malloc_stats_print, malloc_stats_print); #ifndef __CHERI_PURE_CAPABILITY__ -__weak_reference(__allocm, allocm); -__weak_reference(__rallocm, rallocm); -__weak_reference(__sallocm, sallocm); -__weak_reference(__dallocm, dallocm); -__weak_reference(__nallocm, nallocm); +__weak_reference(je_allocm, weak_allocm); +__weak_reference(je_rallocm, weak_rallocm); +__weak_reference(je_sallocm, weak_sallocm); +__weak_reference(je_dallocm, weak_dallocm); +__weak_reference(je_nallocm, weak_nallocm); +__sym_compat(__allocm, je_allocm, FBSD_1.3); +__sym_compat(__rallocm, je_rallocm, FBSD_1.3); +__sym_compat(__sallocm, je_sallocm, FBSD_1.3); +__sym_compat(__dallocm, je_dallocm, FBSD_1.3); +__sym_compat(__nallocm, je_nallocm, FBSD_1.3); +__sym_compat(allocm, weak_allocm, FBSD_1.3); +__sym_compat(rallocm, weak_rallocm, FBSD_1.3); +__sym_compat(sallocm, weak_sallocm, FBSD_1.3); +__sym_compat(dallocm, weak_dallocm, FBSD_1.3); +__sym_compat(nallocm, weak_nallocm, FBSD_1.3); #endif #endif diff --git a/contrib/libcxxrt/exception.cc b/contrib/libcxxrt/exception.cc index 35ff997dd445..b56333e979a2 100644 --- a/contrib/libcxxrt/exception.cc +++ b/contrib/libcxxrt/exception.cc @@ -1433,6 +1433,19 @@ extern "C" void __cxa_call_unexpected(void*exception) abort(); } +/** + * ABI function, called when an object destructor exits due to an + * exception during stack unwinding. + * + * This function does not return. + */ +extern "C" void __cxa_call_terminate(void *exception) throw() +{ + std::terminate(); + // Should not be reached. + abort(); +} + /** * ABI function, returns the adjusted pointer to the exception object. */ diff --git a/lib/libc/aarch64/string/memcpy.S b/lib/libc/aarch64/string/memcpy.S index ac4fbe8d6175..f403dd2e42a8 100644 --- a/lib/libc/aarch64/string/memcpy.S +++ b/lib/libc/aarch64/string/memcpy.S @@ -1,6 +1,3 @@ #define __memcpy_aarch64 memcpy #define __memmove_aarch64 memmove #include "aarch64/memcpy.S" - -.weak index -.equ index, strchr diff --git a/lib/libcxxrt/Version.map b/lib/libcxxrt/Version.map index e5270f149bcc..34f7a5f58483 100644 --- a/lib/libcxxrt/Version.map +++ b/lib/libcxxrt/Version.map @@ -254,6 +254,11 @@ CXXABI_1.3.11 { # __cxa_init_primary_exception; } CXXABI_1.3.9; +CXXABI_1.3.15 { +# Not present in CHERI libcxxrt +# __cxa_call_terminate; +} CXXABI_1.3.11; + CXXRT_1.0 { extern "C++" { diff --git a/lib/libcxxrt/Version.map.arm b/lib/libcxxrt/Version.map.arm index 9c98c05f3446..8bc0afacda31 100644 --- a/lib/libcxxrt/Version.map.arm +++ b/lib/libcxxrt/Version.map.arm @@ -255,6 +255,11 @@ CXXABI_1.3.11 { # __cxa_init_primary_exception; } CXXABI_1.3.9; +CXXABI_1.3.15 { +# Not present in CHERI libcxxrt +# __cxa_call_terminate; +} CXXABI_1.3.11; + CXXRT_1.0 { extern "C++" { diff --git a/lib/libfetch/fetch.c b/lib/libfetch/fetch.c index 12cbd0fb746f..97fc04bb09a6 100644 --- a/lib/libfetch/fetch.c +++ b/lib/libfetch/fetch.c @@ -447,7 +447,10 @@ fetchParseURL(const char *URL) goto ouch; } u->doc = doc; - while (*p != '\0') { + /* fragments are reserved for client-side processing, see + * https://www.rfc-editor.org/rfc/rfc9110.html#section-7.1 + */ + while (*p != '\0' && *p != '#') { if (!isspace((unsigned char)*p)) { *doc++ = *p++; } else { diff --git a/lib/libgeom/geom_stats.c b/lib/libgeom/geom_stats.c index 7ae5c947b7b1..510636eb9a8b 100644 --- a/lib/libgeom/geom_stats.c +++ b/lib/libgeom/geom_stats.c @@ -54,9 +54,12 @@ geom_stats_close(void) { if (statsfd == -1) return; - munmap(statp, npages * pagesize); - statp = NULL; - close (statsfd); + if (statp != NULL) { + if (munmap(statp, npages * pagesize) != 0) + err(1, "munmap"); + statp = NULL; + } + close(statsfd); statsfd = -1; } @@ -73,22 +76,18 @@ geom_stats_resync(void) if (error) err(1, "DIOCGMEDIASIZE(" _PATH_DEV DEVSTAT_DEVICE_NAME ")"); - munmap(statp, npages * pagesize); - p = mmap(statp, mediasize, PROT_READ, MAP_SHARED, statsfd, 0); + if (statp != NULL && munmap(statp, npages * pagesize) != 0) + err(1, "munmap"); + p = mmap(NULL, mediasize, PROT_READ, MAP_SHARED, statsfd, 0); if (p == MAP_FAILED) - err(1, "mmap(/dev/devstat):"); - else { - statp = p; - npages = mediasize / pagesize; - } + err(1, "mmap(/dev/devstat)"); + statp = p; + npages = mediasize / pagesize; } int geom_stats_open(void) { - int error; - void *p; - if (statsfd != -1) return (EBUSY); statsfd = open(_PATH_DEV DEVSTAT_DEVICE_NAME, O_RDONLY); @@ -96,15 +95,6 @@ geom_stats_open(void) return (errno); pagesize = getpagesize(); spp = pagesize / sizeof(struct devstat); - p = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, statsfd, 0); - if (p == MAP_FAILED) { - error = errno; - close(statsfd); - statsfd = -1; - errno = error; - return (error); - } - statp = p; npages = 1; geom_stats_resync(); return (0); diff --git a/lib/libpam/modules/pam_xdg/pam_xdg.8 b/lib/libpam/modules/pam_xdg/pam_xdg.8 index 1a8b53def051..9b97d3626531 100644 --- a/lib/libpam/modules/pam_xdg/pam_xdg.8 +++ b/lib/libpam/modules/pam_xdg/pam_xdg.8 @@ -22,7 +22,7 @@ .\" * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" * SUCH DAMAGE. -.Dd February 21, 2024 +.Dd August 22, 2024 .Dt PAM_XDG 8 .Os .Sh NAME @@ -42,8 +42,8 @@ By default the directory is created under .Pa /var/run/xdg/ . .Pp The following option may be passed to the authentication module: -.Bl -tag -width ".Cm runtime_dir" -.It Cm runtime_dir Ns = Ns Ar directory +.Bl -tag -width ".Cm runtime_dir_prefix" +.It Cm runtime_dir_prefix Ns = Ns Ar directory Use an alternate base directory .El .Sh SEE ALSO diff --git a/lib/msun/src/math_private.h b/lib/msun/src/math_private.h index f3f7985ab7b6..1595f902846c 100644 --- a/lib/msun/src/math_private.h +++ b/lib/msun/src/math_private.h @@ -405,7 +405,7 @@ do { \ * any extra precision into the type of 'a' -- 'a' should have type float_t, * double_t or long double. b's type should be no larger than 'a's type. * Callers should use these types with scopes as large as possible, to - * reduce their own extra-precision and efficiciency problems. In + * reduce their own extra-precision and efficiency problems. In * particular, they shouldn't convert back and forth just to call here. */ #ifdef DEBUG diff --git a/libexec/nuageinit/nuage.lua b/libexec/nuageinit/nuage.lua index cca1fe9b4678..116ab143ccfa 100644 --- a/libexec/nuageinit/nuage.lua +++ b/libexec/nuageinit/nuage.lua @@ -188,10 +188,7 @@ local function addsshkey(homedir, key) chownak = true dirattrs = lfs.attributes(dotssh_path) if dirattrs == nil then - if not lfs.mkdir(dotssh_path) then - warnmsg("nuageinit: impossible to create ".. dotssh_path) - return - end + assert(lfs.mkdir(dotssh_path)) chowndotssh = true dirattrs = lfs.attributes(homedir) end diff --git a/libexec/nuageinit/nuageinit b/libexec/nuageinit/nuageinit index f268f9b0f52c..622e294bb531 100755 --- a/libexec/nuageinit/nuageinit +++ b/libexec/nuageinit/nuageinit @@ -164,6 +164,12 @@ if citype == "config-2" then nuage.err("nuageinit: error parsing config-2: meta_data.json: " .. err) end local obj = parser:get_object() + if obj.public_keys then + local homedir = nuage.adduser(default_user) + for _,v in pairs(obj.public_keys) do + nuage.addsshkey(homedir, v) + end + end nuage.sethostname(obj["hostname"]) -- network diff --git a/libexec/nuageinit/tests/nuageinit.sh b/libexec/nuageinit/tests/nuageinit.sh index c6a86bc15486..b5078e256853 100644 --- a/libexec/nuageinit/tests/nuageinit.sh +++ b/libexec/nuageinit/tests/nuageinit.sh @@ -8,6 +8,7 @@ atf_test_case nocloud_network atf_test_case config2 atf_test_case config2_pubkeys atf_test_case config2_pubkeys_user_data +atf_test_case config2_pubkeys_meta_data atf_test_case config2_network atf_test_case config2_network_static_v4 @@ -242,6 +243,52 @@ EOF atf_check -o inline:"ssh-rsa AAAAB3NzaC1y...== Generated by Nova\n" cat home/freebsd/.ssh/authorized_keys } +config2_pubkeys_meta_data_body() +{ + here=$(pwd) + export NUAGE_FAKE_ROOTDIR=$(pwd) + if [ $(id -u) -ne 0 ]; then + atf_skip "root required" + fi + mkdir -p media/nuageinit + cat > media/nuageinit/meta_data.json < etc/master.passwd < etc/group <pltrela; rela < relalim; rela++) { Elf_Addr *where; - assert(ELF_R_TYPE(rela->r_info) == R_RISCV_JUMP_SLOT); - where = (Elf_Addr *)(obj->relocbase + rela->r_offset); - *where += (Elf_Addr)obj->relocbase; + + switch (ELF_R_TYPE(rela->r_info)) { + case R_RISCV_JUMP_SLOT: + *where += (Elf_Addr)obj->relocbase; + break; + case R_RISCV_IRELATIVE: + obj->irelative = true; + break; + default: + _rtld_error("Unknown relocation type %u in PLT", + (unsigned int)ELF_R_TYPE(rela->r_info)); + return (-1); + } } return (0); @@ -241,6 +251,11 @@ reloc_jmpslots(Obj_Entry *obj, int flags, RtldLockState *lockstate) return (-1); } + if (ELF_ST_TYPE(def->st_info) == STT_GNU_IFUNC) { + obj->gnu_ifunc = true; + continue; + } + *where = (Elf_Addr)(defobj->relocbase + def->st_value); break; default: @@ -253,30 +268,89 @@ reloc_jmpslots(Obj_Entry *obj, int flags, RtldLockState *lockstate) return (0); } +static void +reloc_iresolve_one(Obj_Entry *obj, const Elf_Rela *rela, + RtldLockState *lockstate) +{ + Elf_Addr *where, target, *ptr; + + ptr = (Elf_Addr *)(obj->relocbase + rela->r_addend); + where = (Elf_Addr *)(obj->relocbase + rela->r_offset); + lock_release(rtld_bind_lock, lockstate); + target = call_ifunc_resolver(ptr); + wlock_acquire(rtld_bind_lock, lockstate); + *where = target; +} + int -reloc_iresolve(Obj_Entry *obj __unused, - struct Struct_RtldLockState *lockstate __unused) +reloc_iresolve(Obj_Entry *obj, struct Struct_RtldLockState *lockstate) { + const Elf_Rela *relalim; + const Elf_Rela *rela; + + if (!obj->irelative) + return (0); - /* XXX not implemented */ + obj->irelative = false; + relalim = (const Elf_Rela *)((const char *)obj->pltrela + + obj->pltrelasize); + for (rela = obj->pltrela; rela < relalim; rela++) { + if (ELF_R_TYPE(rela->r_info) == R_RISCV_IRELATIVE) + reloc_iresolve_one(obj, rela, lockstate); + } return (0); } int -reloc_iresolve_nonplt(Obj_Entry *obj __unused, - struct Struct_RtldLockState *lockstate __unused) +reloc_iresolve_nonplt(Obj_Entry *obj, struct Struct_RtldLockState *lockstate) { + const Elf_Rela *relalim; + const Elf_Rela *rela; - /* XXX not implemented */ + if (!obj->irelative_nonplt) + return (0); + + obj->irelative_nonplt = false; + relalim = (const Elf_Rela *)((const char *)obj->rela + obj->relasize); + for (rela = obj->rela; rela < relalim; rela++) { + if (ELF_R_TYPE(rela->r_info) == R_RISCV_IRELATIVE) + reloc_iresolve_one(obj, rela, lockstate); + } return (0); } int -reloc_gnu_ifunc(Obj_Entry *obj __unused, int flags __unused, - struct Struct_RtldLockState *lockstate __unused) +reloc_gnu_ifunc(Obj_Entry *obj, int flags, + struct Struct_RtldLockState *lockstate) { + const Elf_Rela *relalim; + const Elf_Rela *rela; + uintptr_t *where, target; + const Elf_Sym *def; + const Obj_Entry *defobj; + + if (!obj->gnu_ifunc) + return (0); - /* XXX not implemented */ + relalim = (const Elf_Rela *)((const char *)obj->pltrela + obj->pltrelasize); + for (rela = obj->pltrela; rela < relalim; rela++) { + if (ELF_R_TYPE(rela->r_info) == R_RISCV_JUMP_SLOT) { + where = (uintptr_t *)(obj->relocbase + rela->r_offset); + def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj, + SYMLOOK_IN_PLT | flags, NULL, lockstate); + if (def == NULL) + return (-1); + if (ELF_ST_TYPE(def->st_info) != STT_GNU_IFUNC) + continue; + + lock_release(rtld_bind_lock, lockstate); + target = (Elf_Addr)rtld_resolve_ifunc(defobj, def); + wlock_acquire(rtld_bind_lock, lockstate); + reloc_jmpslot(where, target, defobj, obj, + (const Elf_Rel *)rela); + } + } + obj->gnu_ifunc = false; return (0); } @@ -286,7 +360,8 @@ reloc_jmpslot(uintptr_t *where, uintptr_t target, const Elf_Rel *rel) { - assert(ELF_R_TYPE(rel->r_info) == R_RISCV_JUMP_SLOT); + assert(ELF_R_TYPE(rel->r_info) == R_RISCV_JUMP_SLOT || + ELF_R_TYPE(rel->r_info) == R_RISCV_IRELATIVE); if (*where != target && !ld_bind_not) *where = target; @@ -305,7 +380,7 @@ reloc_non_plt(Obj_Entry *obj, Obj_Entry *obj_rtld, int flags, const Elf_Rela *rela; const Elf_Sym *def; SymCache *cache; - Elf_Addr *where; + Elf_Addr *where, symval; unsigned long symnum; #ifdef __CHERI_PURE_CAPABILITY__ @@ -319,10 +394,6 @@ reloc_non_plt(Obj_Entry *obj, Obj_Entry *obj_rtld, int flags, } #endif - if ((flags & SYMLOOK_IFUNC) != 0) - /* XXX not implemented */ - return (0); - /* * The dynamic loader may be called from a thread, we have * limited amounts of stack available so we cannot use alloca(). @@ -350,8 +421,27 @@ reloc_non_plt(Obj_Entry *obj, Obj_Entry *obj_rtld, int flags, if (def == NULL) return (-1); - *where = (Elf_Addr)(defobj->relocbase + def->st_value + - rela->r_addend); + /* + * If symbol is IFUNC, only perform relocation + * when caller allowed it by passing + * SYMLOOK_IFUNC flag. Skip the relocations + * otherwise. + */ + if (ELF_ST_TYPE(def->st_info) == STT_GNU_IFUNC) { + if ((flags & SYMLOOK_IFUNC) == 0) { + obj->non_plt_gnu_ifunc = true; + continue; + } + symval = (Elf_Addr)rtld_resolve_ifunc(defobj, + def); + } else { + if ((flags & SYMLOOK_IFUNC) != 0) + continue; + symval = (Elf_Addr)(defobj->relocbase + + def->st_value); + } + + *where = symval + rela->r_addend; break; case R_RISCV_TLS_DTPMOD64: def = find_symdef(symnum, obj, &defobj, flags, cache, @@ -430,6 +520,9 @@ reloc_non_plt(Obj_Entry *obj, Obj_Entry *obj_rtld, int flags, case R_RISCV_RELATIVE: *where = (Elf_Addr)(obj->relocbase + rela->r_addend); break; + case R_RISCV_IRELATIVE: + obj->irelative_nonplt = true; + break; #ifdef __CHERI_PURE_CAPABILITY__ case R_RISCV_CHERI_CAPABILITY: if (process_r_cheri_capability(obj, symnum, lockstate, @@ -447,10 +540,13 @@ reloc_non_plt(Obj_Entry *obj, Obj_Entry *obj_rtld, int flags, return (0); } +unsigned long elf_hwcap; + void -ifunc_init(Elf_Auxinfo aux_info[__min_size(AT_COUNT)] __unused) +ifunc_init(Elf_Auxinfo *aux_info[__min_size(AT_COUNT)]) { - + if (aux_info[AT_HWCAP] != NULL) + elf_hwcap = aux_info[AT_HWCAP]->a_un.a_val; } void diff --git a/libexec/rtld-elf/riscv/rtld_machdep.h b/libexec/rtld-elf/riscv/rtld_machdep.h index 408822de369e..f464c70e2b06 100644 --- a/libexec/rtld-elf/riscv/rtld_machdep.h +++ b/libexec/rtld-elf/riscv/rtld_machdep.h @@ -111,8 +111,11 @@ uintptr_t reloc_jmpslot(uintptr_t *where, uintptr_t target, #endif /* __CHERI_PURE_CAPABILITY__ */ +extern unsigned long elf_hwcap; #define call_ifunc_resolver(ptr) \ - (((uintptr_t (*)(void))ptr)()) + (((uintptr_t (*)(unsigned long, unsigned long, unsigned long, \ + unsigned long, unsigned long, unsigned long, unsigned long, \ + unsigned long))ptr)(elf_hwcap, 0, 0, 0, 0, 0, 0, 0)) /* * TLS diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c index 9422311ff617..918dcb785113 100644 --- a/libexec/rtld-elf/rtld.c +++ b/libexec/rtld-elf/rtld.c @@ -1059,7 +1059,7 @@ _rtld(Elf_Addr *sp, func_ptr_type *exit_proc, Obj_Entry **objp) exit (0); } - ifunc_init(aux); + ifunc_init(aux_info); /* * Setup TLS for main thread. This must be done after the diff --git a/libexec/rtld-elf/rtld.h b/libexec/rtld-elf/rtld.h index 10cf1c700837..0b49324ee898 100644 --- a/libexec/rtld-elf/rtld.h +++ b/libexec/rtld-elf/rtld.h @@ -587,7 +587,7 @@ int reloc_jmpslots(Obj_Entry *, int flags, struct Struct_RtldLockState *); int reloc_iresolve(Obj_Entry *, struct Struct_RtldLockState *); int reloc_iresolve_nonplt(Obj_Entry *, struct Struct_RtldLockState *); int reloc_gnu_ifunc(Obj_Entry *, int flags, struct Struct_RtldLockState *); -void ifunc_init(Elf_Auxinfo[__min_size(AT_COUNT)]); +void ifunc_init(Elf_Auxinfo *[__min_size(AT_COUNT)]); void init_pltgot(Obj_Entry *); void allocate_initial_tls(Obj_Entry *); diff --git a/release/Makefile b/release/Makefile index 0673c40760a2..6ac0946c19fb 100644 --- a/release/Makefile +++ b/release/Makefile @@ -123,7 +123,7 @@ base.txz: sh ${WORLDDIR}/usr.sbin/etcupdate/etcupdate.sh extract -B \ -m "${MAKE}" -M "TARGET_ARCH=${TARGET_ARCH} TARGET=${TARGET}" \ -s ${WORLDDIR} -d "${.OBJDIR}/${DISTDIR}/base/var/db/etcupdate" \ - ${NO_ROOT:D-N} + -L /dev/null ${NO_ROOT:D-N} .if defined(NO_ROOT) echo "./var/db/etcupdate type=dir uname=root gname=wheel mode=0755" >> ${.OBJDIR}/${DISTDIR}/base.meta sed -n 's,^\.,./var/db/etcupdate/current,p' ${.OBJDIR}/${DISTDIR}/base/var/db/etcupdate/current/METALOG \ diff --git a/release/Makefile.ec2 b/release/Makefile.ec2 index 8f5f6f205779..27e6340dca2b 100644 --- a/release/Makefile.ec2 +++ b/release/Makefile.ec2 @@ -29,6 +29,11 @@ BOOTMODEOPT= --uefi .if ${AMIBOOTMETHOD} == "UEFI-PREFERRED" && ${TARGET_ARCH} == "amd64" BOOTMODEOPT= --uefi-preferred .endif +.if ${AMIBOOTMETHOD} == "UEFI-PREFERRED" +AMIBASENAME=${TYPE} ${REVISION}-${BRANCH}-${TARGET}${AMINAMESUFFIX} +.else +AMIBASENAME=${TYPE} ${REVISION}-${BRANCH}-${TARGET}${AMINAMESUFFIX} ${AMIBOOTMETHOD} +.endif CLEANFILES+= ec2ami @@ -60,6 +65,7 @@ cw-ec2-portinstall: SSMOPTS_${_FL}_${_FS}= --ssm-name ${SSMPREFIX}/${TARGET_ARCH:S/aarch64/arm64/}/${_FL}/${_FS}/${REVISION}/${BRANCH} .endif EC2AMILIST+= ec2ami-${_FL}-${_FS} +CLEANFILES+= ec2ami-${_FL}-${_FS} ec2ami-${_FL}-${_FS}: cw-ec2-${_FL}-${_FS} ${CW_EC2_PORTINSTALL} .if !defined(AWSKEYFILE) || !exists(${AWSKEYFILE}) @echo "--------------------------------------------------------------" @@ -82,7 +88,7 @@ ec2ami-${_FL}-${_FS}: cw-ec2-${_FL}-${_FS} ${CW_EC2_PORTINSTALL} /usr/local/bin/bsdec2-image-upload ${PUBLISH} ${PUBLICSNAP} \ ${EC2ARCH} ${SSMOPTS_${_FL}_${_FS}} ${BOOTMODEOPT} --sriov --ena \ ${.OBJDIR}/${EC2-${_FL:tu}${_FS:tu}IMAGE} \ - "${TYPE} ${REVISION}-${BRANCH}-${TARGET}${AMINAMESUFFIX} ${AMIBOOTMETHOD} ${_FL} ${_FS:tu}" \ + "${AMIBASENAME} ${_FL} ${_FS:tu}" \ "${TYPE}/${TARGET} ${GITBRANCH}@${GITREV}" \ ${AWSREGION} ${AWSBUCKET} ${AWSKEYFILE} \ ${EC2SNSTOPIC} ${EC2SNSREL} ${EC2SNSVERS} diff --git a/release/tools/ec2.conf b/release/tools/ec2.conf index 989e8a05a318..602216d3c2d4 100644 --- a/release/tools/ec2.conf +++ b/release/tools/ec2.conf @@ -10,12 +10,12 @@ export VM_EXTRA_PACKAGES="${VM_EXTRA_PACKAGES} ebsnvme-id amazon-ssm-agent" # Services which should be enabled by default in rc.conf(5). export VM_RC_LIST="dev_aws_disk ntpd" -# Build with a 5.9 GB partition; the growfs rc.d script will expand +# Build with a 7.9 GB partition; the growfs rc.d script will expand # the partition to fill the root disk after the EC2 instance is launched. # Note that if this is set to G, we will end up with an GB disk # image since VMSIZE is the size of the filesystem partition, not the disk # which it resides within. -export VMSIZE=6000m +export VMSIZE=8000m # No swap space; it doesn't make sense to provision any as part of the disk # image when we could be launching onto a system with anywhere between 0.5 diff --git a/sbin/pfctl/pfctl.c b/sbin/pfctl/pfctl.c index b60e64fba338..45bfdf31f8dc 100644 --- a/sbin/pfctl/pfctl.c +++ b/sbin/pfctl/pfctl.c @@ -394,8 +394,6 @@ pfctl_check_skip_ifaces(char *ifname) continue; for (n = h; n != NULL; n = n->next) { - if (p->pfik_ifp == NULL) - continue; if (strncmp(p->pfik_name, ifname, IFNAMSIZ)) continue; @@ -422,9 +420,6 @@ pfctl_adjust_skip_ifaces(struct pfctl *pf) for (n = h; n != NULL; n = n->next) PFRB_FOREACH(pp, &skip_b) { - if (pp->pfik_ifp == NULL) - continue; - if (strncmp(pp->pfik_name, n->ifname, IFNAMSIZ)) continue; @@ -437,7 +432,7 @@ pfctl_adjust_skip_ifaces(struct pfctl *pf) } PFRB_FOREACH(p, &skip_b) { - if (p->pfik_ifp == NULL || ! (p->pfik_flags & PFI_IFLAG_SKIP)) + if (! (p->pfik_flags & PFI_IFLAG_SKIP)) continue; pfctl_set_interface_flags(pf, p->pfik_name, PFI_IFLAG_SKIP, 0); diff --git a/share/man/man4/linux.4 b/share/man/man4/linux.4 index 212dd2526f3f..711ac11e8fce 100644 --- a/share/man/man4/linux.4 +++ b/share/man/man4/linux.4 @@ -61,11 +61,13 @@ under .Pa /compat/linux ) before .Pa / . -For example, when Linux process attempts to open +For example, when a Linux process attempts to open .Pa /etc/passwd , -it will really access +it will first access .Pa /compat/linux/etc/passwd , -unless the latter does not exist. +falling back to +.Pa /etc/passwd +if the compat path does not exist. This is used to make sure Linux processes load Linux shared libraries instead of their similarly-named FreeBSD counterparts, and also to provide alternative versions of certain other files and virtual diff --git a/share/man/man5/pf.conf.5 b/share/man/man5/pf.conf.5 index da55f00293bb..f04b0799741e 100644 --- a/share/man/man5/pf.conf.5 +++ b/share/man/man5/pf.conf.5 @@ -27,7 +27,7 @@ .\" ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.Dd June 6, 2024 +.Dd June 24, 2024 .Dt PF.CONF 5 .Os .Sh NAME @@ -1400,9 +1400,14 @@ or .Xr udp 4 connections; implicitly in the case of .Ar nat -rules and explicitly in the case of +rules and both implicitly and explicitly in the case of .Ar rdr rules. +A +.Ar rdr +rule may cause the source port to be modified if doing so avoids a conflict +with an existing connection. +A random source port in the range 50001-65535 is chosen in this case. Port numbers are never translated with a .Ar binat rule. diff --git a/share/man/man5/src.conf.5 b/share/man/man5/src.conf.5 index 5672e1c2987a..c7375606b757 100644 --- a/share/man/man5/src.conf.5 +++ b/share/man/man5/src.conf.5 @@ -1,5 +1,5 @@ .\" DO NOT EDIT-- this file is @generated by tools/build/options/makeman. -.Dd November 14, 2024 +.Dd November 20, 2024 .Dt SRC.CONF 5 .Os .Sh NAME @@ -1788,7 +1788,7 @@ and related programs. .It Va WITH_UNDEFINED_VERSION Link libraries with --undefined-version which permits version maps to contain symbols that are not present in the library. -If this is necessicary to build a particular configuration, a bug is +If this is necessary to build a particular configuration, a bug is present and the configuration should be reported. .It Va WITHOUT_UNIFIED_OBJDIR Use the historical object directory format for diff --git a/share/man/man7/build.7 b/share/man/man7/build.7 index 3c71c14e6039..afafef9d3c02 100644 --- a/share/man/man7/build.7 +++ b/share/man/man7/build.7 @@ -401,6 +401,19 @@ Install the kernel to the directory .Pa ${DISTDIR}/kernel/boot/kernel . This target is used while building a release; see .Xr release 7 . +.It Cm packages +Create a +.Xr pkg 7 +repository containing packages that can be used to create or upgrade an +installation of the base system. +The output repository is placed in the object directory, under +.Pa repo/${PKG_ABI} +where +.Va PKG_ABI +is the +.Xr pkg 7 +ABI for the build target, for example, +.Pa /usr/obj/${SRCDIR}/repo/FreeBSD:15:amd64 . .It Cm packagekernel Archive the results of .Cm distributekernel , diff --git a/share/misc/bsd-family-tree b/share/misc/bsd-family-tree index 955bb0f30e43..0849e883e2c3 100644 --- a/share/misc/bsd-family-tree +++ b/share/misc/bsd-family-tree @@ -413,54 +413,56 @@ FreeBSD 5.2 | | | | | | | | | | | | DragonFly 5.6.3 | | | | | | NetBSD | | | | | | | | 8.2 | | - | | | | | | | DragonFly 5.8.1 - | | | | | | OpenBSD 6.7 | - | | FreeBSD | | | | | - | | 11.4 | | | | | - | | | | | | DragonFly 5.8.2 - | | | | | | DragonFly 5.8.3 - | | | | NetBSD 9.1 OpenBSD 6.8 | - | FreeBSD macOS | | | | - | 12.2 11 | | | | - | | | | | | | - | `------. | | | | | - | | | | | | | - *--FreeBSD | | | | | | - | 13.0 | | | NetBSD 9.2 OpenBSD 6.9 DragonFly 6.0.0 - | | | | | | | | - | | | | | | | DragonFly 6.0.1 - | | | | | | | | - | | FreeBSD macOS | | OpenBSD 7.0 | - | | 12.3 12 | | | | - | | | | | | | DragonFly 6.2.1 - | | | | | | OpenBSD 7.1 | - | FreeBSD | | | | | | - | 13.1 | | | | | | - | | | | | | | DragonFly 6.2.2 - | | | | | NetBSD 9.3 | | - | | | macOS | OpenBSD 7.2 | - | | | 13 | | | - | | FreeBSD | | | | - | | 12.4 | | | | - | | | | | DragonFly 6.4.0 - | | | | OpenBSD 7.3 | - | FreeBSD | | | | - | 13.2 | | | | - | | | | | | - | `------. | | | | - | | macOS | | | - | | 14 | | | - | | | | OpenBSD 7.4 | - *--FreeBSD | | | | | - | 14.0 | | | | | - | | | | | | | - | | FreeBSD | | | | - | | 13.3 | | | | - | | | *--NetBSD | | - | | | | 10.0 | | - | | | | | | - | | | | OpenBSD 7.5 | - | FreeBSD | | | | + | | | | | | | | DragonFly 5.8.1 + | | | | | | | OpenBSD 6.7 | + | | FreeBSD | | | | | | + | | 11.4 | | | | | | + | | | | | | | DragonFly 5.8.2 + | | | | | | | DragonFly 5.8.3 + | | | | NetBSD | OpenBSD 6.8 | + | FreeBSD macOS | 9.1 | | | + | 12.2 11 | | | | | + | | | | | | | | + | `------. | | | | | | + | | | | | | | | + *--FreeBSD | | | | | | | + | 13.0 | | | NetBSD | OpenBSD 6.9 DragonFly 6.0.0 + | | | | | 9.2 | | | + | | | | | | | | DragonFly 6.0.1 + | | | | | | | | | + | | FreeBSD macOS | | | OpenBSD 7.0 | + | | 12.3 12 | | | | | + | | | | | | | | DragonFly 6.2.1 + | | | | | | | OpenBSD 7.1 | + | FreeBSD | | | | | | | + | 13.1 | | | | | | | + | | | | | | | | DragonFly 6.2.2 + | | | | | NetBSD | | | + | | | macOS | 9.3 | OpenBSD 7.2 | + | | | 13 | | | | | + | | FreeBSD | | | | | | + | | 12.4 | | | | | | + | | | | | | | DragonFly 6.4.0 + | | | | | | OpenBSD 7.3 | + | FreeBSD | | | | | | + | 13.2 | | | | | | + | | | | | | | | + | `------. | | | | | | + | | macOS | | | | | + | | 14 | | | | | + | | | | | | OpenBSD 7.4 | + *--FreeBSD | | | | | | | + | 14.0 | | | | | | | + | | | | | | | | | + | | FreeBSD | | NetBSD | | | + | | 13.3 | | 9.4 | | | + | | | | | | | + | | | *--NetBSD | | | + | | | | 10.0 | | | + | | | | | | | + | | | | | OpenBSD 7.5 | + | | | | NetBSD | | + | FreeBSD | | 8.3 | | | 14.1 | | | | | | | | | FreeBSD 15 -current | NetBSD -current OpenBSD -current DragonFly -current @@ -492,7 +494,7 @@ the announcement in Usenet or if it was available as tape. [QCU] Salus, Peter H. A quarter century of UNIX. ISBN 0201547775, EAN 9780201547771 [SMS] Steven M. Schultz. 2.11BSD, UNIX for the PDP-11. -[TUHS] The Unix Historical Society. https://minnie.tuhs.org/Unix_History/. +[TUHS] The Unix Historical Society. https://minnie.tuhs.org/Unix_History/ [USE] Usenet announcement. [WRS] Wind River Systems, Inc. [dmr] Dennis Ritchie, via E-Mail @@ -868,7 +870,7 @@ DragonFly 5.8.3 2020-09-24 [DFB] OpenBSD 6.8 2020-10-18 [OBD] NetBSD 9.1 2020-10-18 [NBD] FreeBSD 12.2 2020-10-27 [FBD] -macOS 11 2020-11-19 [APL] +macOS 11 2020-11-12 [APL] FreeBSD 13.0 2021-04-13 [FBD] OpenBSD 6.9 2021-05-01 [OBD] DragonFly 6.0 2021-05-08 [DFB] @@ -894,6 +896,8 @@ FreeBSD 14.0 2023-11-20 [FBD] FreeBSD 13.3 2024-03-05 [FBD] NetBSD 10.0 2024-03-28 [NBD] OpenBSD 7.5 2024-04-05 [OBD] +NetBSD 9.4 2024-04-20 [NBD] +NetBSD 8.3 2024-05-04 [NBD] FreeBSD 14.1 2024-06-04 [FBD] Bibliography @@ -934,10 +938,13 @@ FreeBSD Release Information URL: https://www.FreeBSD.org/releases/ Manual pages for FreeBSD and ports -URL: https://man.FreeBSD.org/cgi/man.cgi +URL: https://man.FreeBSD.org + +FreeBSD Documentation Archive +URL: https://docs-archive.freebsd.org/doc/ UNIX history graphing project -URL: https://minnie.tuhs.org/Unix_History/index.html +URL: https://minnie.tuhs.org/Unix_History/ UNIX history URL: https://www.levenez.com/unix/ @@ -956,5 +963,5 @@ original BSD announcements from Usenet or tapes. Steven M. Schultz for providing 2.8BSD, 2.10BSD, 2.11BSD manual pages. -- -Copyright (c) 1997-2023 Wolfram Schneider +Copyright (c) 1997-2024 Wolfram Schneider URL: https://cgit.freebsd.org/src/tree/share/misc/bsd-family-tree diff --git a/share/misc/pci_vendors b/share/misc/pci_vendors index df152a4e97e0..f168678909c8 100644 --- a/share/misc/pci_vendors +++ b/share/misc/pci_vendors @@ -1,8 +1,8 @@ # # List of PCI ID's # -# Version: 2024.05.14 -# Date: 2024-05-14 03:15:02 +# Version: 2024.06.23 +# Date: 2024-06-23 03:15:02 # # Maintained by Albert Pool, Martin Mares, and other volunteers from # the PCI ID Project at https://pci-ids.ucw.cz/. @@ -104,6 +104,17 @@ 025e d81d NVMe DC SSD E1.L 9.5mm [D5-P5336] 0b70 NVMe DC SSD [Yorktown controller] 2b59 NVMe DC SSD [Atomos Prime] + 025e 0008 NVMe DC SSD U.2-SFF 15mm [D7-PS1010] + 025e 0019 NVMe DC SSD E3.S-1T 7.5mm [D7-PS1010] + 025e 0108 NVMe DC SSD U.2-SFF 15mm [D7-PS1030] + 025e 0119 NVMe DC SSD E3.S-1T 7.5mm [D7-PS1030] + 108e 48a0 NVMe DC SSD U.2-SFF 15mm 3.84TB [D7-PS1010 Custom] + 108e 48a1 NVMe DC SSD U.2-SFF 15mm 7.68TB [D7-PS1010 Custom] + 108e 48a2 NVMe DC SSD U.2-SFF 15mm 15.36TB [D7-PS1010 Custom] + 108e 48a3 NVMe DC SSD Add-In-Card [D7-PS1030 Custom] + 108e 48a4 NVMe DC SSD E3.S-1T 7.5mm 3.84TB [D7-PS1010 Custom] + 108e 48a5 NVMe DC SSD E3.S-1T 7.5mm 7.68TB [D7-PS1010 Custom] + 108e 48a6 NVMe DC SSD E3.S-1T 7.5mm 15.36TB [D7-PS1010 Custom] f1ab P41 Plus NVMe SSD (DRAM-less) [Echo Harbor] f1ac P44 Pro NVMe SSD [Hollywood Beach] 0270 Hauppauge computer works Inc. (Wrong ID) @@ -616,6 +627,7 @@ 1bd4 000e 6G SAS2008IR 1bd4 000f 6G SAS2008IT SA5248 1bd4 0010 6G SAS2008IR SA5248 + 4c52 96c8 LRSA96C8 8-Port SATA3(6Gb/s)Exchange Adapter (with Raid) 8086 350f RMS2LL040 RAID Controller 8086 3700 SSD 910 Series 0073 MegaRAID SAS 2008 [Falcon] @@ -1082,6 +1094,12 @@ 10e4 MegaRAID 12GSAS/PCIe Unsupported SAS38xx 10e5 MegaRAID 12GSAS/PCIe SAS38xx 10e6 MegaRAID 12GSAS/PCIe Secure SAS38xx + 1000 04d9 3808N iMR ROMB + 1000 04da 3808N iMR ROMB + 1000 04db 3808N iMR ROMB + 1000 04dc 3808N iMR ROMB + 1000 04dd 3808N iMR ROMB + 1000 40d8 MegaRAID 9524-8i 1000 40e0 MegaRAID 9540-2M2 1028 2172 PERC H355 Adapter 1028 2173 PERC H355 Front @@ -1127,6 +1145,7 @@ 1000 a064 PEX88064 64 lane/port PCIe Gen 4 Switch 1000 a080 PEX88080 80 lane/port PCIe Gen 4 Switch 1000 a096 PEX88096 98 lane/port PCIe Gen 4.0 Switch + 4c52 9f48 LRNV9F48 4-port Built-in 8654 NVMe Switching Adapter c012 PEX880xx PCIe Gen 4 Switch # Virtual endpoint used in Broadcom synthetic PCIe switches for resource reservation 1000 100b PEX88000 PCIe Gen 4 Virtual Upstream/Downstream Port @@ -1249,7 +1268,7 @@ 103c 8b17 ProBook 445 G9/455 G9 [Ryzen 7 Integrated Radeon GPU] 15ff Fenghuang [Zhongshan Subor Z+] 1607 Arden - 1636 Renoir [Radeon RX Vega 6 (Ryzen 4000/5000 Mobile Series)] + 1636 Renoir [Radeon Vega Series / Radeon Vega Mobile Series] 1637 Renoir Radeon High Definition Audio Controller 1638 Cezanne [Radeon Vega Series / Radeon Vega Mobile Series] 1043 16c2 Radeon Vega 8 @@ -3987,6 +4006,8 @@ 1458 2408 Radeon RX 6750 XT GAMING OC 12G 1462 3980 Radeon RX 6700 XT Mech 2X 12G [MSI] 148c 2409 Red Devil RX 6700 XT +# Dual fan version + 1849 5210 Radeon RX 6700 XT Challenger D 1849 5219 Radeon RX 6700 XT Challenger D 1849 5222 RX 6700 XT Challenger D OC # Gaming 1440/QHD Overclock edition with 12 Gb GDDR6 and PCIe 4.0 of Radeon RX 6700 XT by Sapphire PULSE manufactured on autumn 2022 / C1 reviseion @@ -4022,13 +4043,17 @@ 7448 Navi 31 [Radeon Pro W7900] 744c Navi 31 [Radeon RX 7900 XT/7900 XTX/7900M] 1002 0e3b RX 7900 GRE [XFX] + 1043 0506 TUF Gaming Radeon RX 7900 XTX OC + 1849 5304 Radeon RX 7900 XTX 1da2 471e PULSE RX 7900 XTX + 1da2 475e PULSE RX 7900 GRE 1da2 e471 NITRO+ RX 7900 XTX Vapor-X 1eae 7901 RX-79XMERCB9 [SPEEDSTER MERC 310 RX 7900 XTX] 745e Navi 31 [Radeon Pro W7800] + 7460 7460 Navi32 GL-XL [AMD Radeon PRO V710] 7470 Navi 32 [Radeon PRO W7700] 747e Navi 32 [Radeon RX 7700 XT / 7800 XT] - 7480 Navi 33 [Radeon RX 7700S/7600/7600S/7600M XT/PRO W7600] + 7480 Navi 33 [Radeon RX 7600/7600 XT/7600M XT/7600S/7700S / PRO W7600] 1849 5313 RX 7600 Challenger OC 7483 Navi 33 [Radeon RX 7600M/7600M XT] 7489 Navi 33 [Radeon Pro W7500] @@ -5460,10 +5485,12 @@ 1849 43c8 Fatal1ty X370 Professional Gaming 43b6 X399 Series Chipset SATA Controller 43b7 300 Series Chipset SATA Controller + 43b8 A320 Chipset SATA Controller [AHCI mode] 43b9 X370 Series Chipset USB 3.1 xHCI Controller 1849 43d0 Fatal1ty X370 Professional Gaming 43ba X399 Series Chipset USB 3.1 xHCI Controller 43bb 300 Series Chipset USB 3.1 xHCI Controller + 43bc A320 USB 3.1 XHCI Host Controller 43c6 400 Series Chipset PCIe Bridge 43c7 400 Series Chipset PCIe Port 43c8 400 Series Chipset SATA Controller @@ -9186,12 +9213,17 @@ 8717 PEX 8717 16-lane, 8-Port PCI Express Gen 3 (8.0 GT/s) Switch with DMA 8718 PEX 8718 16-Lane, 5-Port PCI Express Gen 3 (8.0 GT/s) Switch 8724 PEX 8724 24-Lane, 6-Port PCI Express Gen 3 (8 GT/s) Switch, 19 x 19mm FCBGA + 4c52 9234 LRNV9324 2-port Built-in 8643 NVMe Exchange Adapter + 4c52 9524 LRNV9524 2-port M.2 NVMe SSD Exchange Adapter 8725 PEX 8725 24-Lane, 10-Port PCI Express Gen 3 (8.0 GT/s) Multi-Root Switch with DMA 8732 PEX 8732 32-lane, 8-Port PCI Express Gen 3 (8.0 GT/s) Switch 8734 PEX 8734 32-lane, 8-Port PCI Express Gen 3 (8.0GT/s) Switch 8747 PEX 8747 48-Lane, 5-Port PCI Express Gen 3 (8.0 GT/s) Switch + 4c52 9347 LRNV9347L 2-port Built-in 8643 NVMe Switching Adapter + 4c52 9547 LRNV9547 4-port M.2 NVMe SSD Exchange Adapter 8748 PEX 8748 48-Lane, 12-Port PCI Express Gen 3 (8 GT/s) Switch, 27 x 27mm FCBGA 8749 PEX 8749 48-Lane, 18-Port PCI Express Gen 3 (8.0 GT/s) Multi-Root Switch with DMA + 4c52 9349 LRNV9349 8-port SFF-8643 NVMe SSD Exchange Adapter 87a0 PEX PCI Express Switch NT0 Port Link Interface 87a1 PEX PCI Express Switch NT1 Port Link Interface 87b0 PEX PCI Express Switch NT0 Port Virtual Interface @@ -12959,6 +12991,8 @@ 2296 Tegra PCIe Endpoint Virtual Network 22a3 GH100 [H100 NVSwitch] 22ba AD102 High Definition Audio Controller + 22bc AD104 High Definition Audio Controller + 22bd AD106M High Definition Audio Controller 2302 GH100 2313 GH100 [H100 CNX] 2321 GH100 [H100L 94GB] @@ -13080,6 +13114,7 @@ 2681 AD102 [RTX TITAN Ada] 2684 AD102 [GeForce RTX 4090] 2685 AD102 [GeForce RTX 4090 D] + 2689 AD102 [GeForce RTX 4070 Ti SUPER] 26b1 AD102GL [RTX 6000 Ada Generation] 26b2 AD102GL [RTX 5000 Ada Generation] 26b3 AD102GL [RTX 5880 Ada Generation] @@ -13299,6 +13334,7 @@ 8043 LANai4.x [Myrinet LANai interface chip] 8062 S5933_PARASTATION 807d S5933 [Matchmaker] + 8081 GPIB interface card [IOtech Inc. PCI488] 8088 Kongsberg Spacetec Format Synchronizer 8089 Kongsberg Spacetec Serial Output Board 809c S5933_HEPC3 @@ -16238,7 +16274,9 @@ 11ae Aztech System Ltd 11af Avid Technology Inc. 0001 Cinema + ee21 Digidesign DSP Farm ee40 Digidesign Audiomedia III + ee60 Digidesign SampleCell II / II Plus 11b0 V3 Semiconductor Inc. 0002 V300PSC 0292 V292PBC [Am29030/40 Bridge] @@ -17445,7 +17483,8 @@ 10a9 8002 Acenic Gigabit Ethernet 12ae 0002 Gigabit Ethernet-T (3C986-T) 00fa Farallon PN9100-T Gigabit Ethernet -12af TDK USA Corp +12af TDK Corporation + 5831 GBDriver GX1 x2 NVMe SSD Controller (DRAM-less) 12b0 Jorge Scientific Corp 12b1 GammaLink 12b2 General Signal Networks @@ -18916,6 +18955,9 @@ 580b Secure Flash Controller (Xenon) 580d System Management Controller (Xenon) 5811 Xenos GPU (Xenon) + 5821 Xenos GPU (Zephyr/Falcon) + 5831 Xenos GPU (Jasper) + 5841 Xenos GPU (Slim) 1415 Oxford Semiconductor Ltd 8401 OX9162 Mode 1 (8-bit bus) 8403 OX9162 Mode 0 (parallel port) @@ -19859,6 +19901,7 @@ 144d a801 SM963 2.5" NVMe PCIe SSD a806 NVMe SSD SM0032L a808 NVMe SSD Controller SM981/PM981/PM983 +# Used by different variants of SSD 970 EVO and PRO 144d a801 SSD 970 EVO/PRO 1d49 403b Thinksystem U.2 PM983 NVMe SSD a809 NVMe SSD Controller 980 (DRAM-less) @@ -19886,7 +19929,7 @@ # Actually 88SS1322 according to techpowerup a80b NVMe SSD Controller PM9B1 (DRAM-less) a80c NVMe SSD Controller S4LV008[Pascal] - a80d NVMe SSD Controller PM9C1a + a80d NVMe SSD Controller PM9C1a (DRAM-less) a820 NVMe SSD Controller 171X 1028 1f95 Express Flash NVMe XS1715 SSD 400GB 1028 1f96 Express Flash NVMe XS1715 SSD 800GB @@ -20844,6 +20887,7 @@ 14e4 5250 NetXtreme-E BCM57504 4x25G KR Mezz 14e4 5425 NetXtreme-E Quad-port 25G SFP28 Ethernet OCP 3.0 Adapter (BCM957504-N425G) 14e4 d142 NetXtreme-E P425D BCM57504 4x25G SFP28 PCIE + 1590 0420 HPE Ethernet 25/50Gb 2-port 6310C Adapter 1752 BCM57502 NetXtreme-E 10Gb/25Gb/40Gb/50Gb Ethernet 1760 BCM57608 10Gb/25Gb/50Gb/100Gb/200Gb/400Gb Ethernet 14e4 d125 BCM57608 2x200G PCIe Ethernet NIC @@ -21908,11 +21952,13 @@ 0262 MT27710 [ConnectX-4 Lx Programmable] EN 0263 MT27710 [ConnectX-4 Lx Programmable Virtual Function] EN 0264 Innova-2 Flex Burn image - 0270 Spectrum-4L, Flash recovery mode + 0270 Spectrum-5 in Flash Recovery Mode 0271 Spectrum-4L, RMA - 0274 Spectrum-4C, Flash recovery mode + 0274 Spectrum-6 in Flash Recovery Mode 0275 Spectrum-4C RMA 0277 Spectrum-4TOR RMA + 0278 Quantum-4 in Flash Recovery Mode + 0279 Quantum-4 RMA 0281 NPS-600 Flash Recovery 0282 ArcusE Flash recovery 0283 ArcusE RMA @@ -22132,6 +22178,7 @@ d2f2 Quantum-2 NDR (400Gbps) switch d2f4 Quantum-3 d2f6 Quantum-3CPO + d2f8 Quantum-4 15b4 CCI/TRIAD 15b5 Cimetrics Inc 15b6 Texas Memory Systems Inc @@ -23338,7 +23385,8 @@ 1108 IPQ95xx/97xx PCIe Root Port 1109 QCN62xx/92xx Wireless Network Adapter 17cc NetChip Technology, Inc - 2280 USB 2.0 + 2280 NET2280 PCI to USB 2.0 Hi-Speed Peripheral Controller + 2282 NET2282 PCI to USB 2.0 Hi-Speed Peripheral Controller 17cd Cadence Design Systems, Inc. 17cf Z-Com, Inc. 17d3 Areca Technology Corp. @@ -23925,6 +23973,7 @@ 0013 SH7757 PCIe Switch [PS] 0014 uPD720201 USB 3.0 Host Controller 0015 uPD720202 USB 3.0 Host Controller + 4c52 9a72 LRSU9A72 2-Port USB 3.0 Exchange Adapter 001a SH7758 PCIe-PCI Bridge [PPB] 001b SH7758 PCIe End-Point [PBI] 001d SH7758 PCIe Switch [PS] @@ -24087,6 +24136,8 @@ 1942 ClearSpeed Technology plc e511 Advance X620 accelerator card e521 Advance e620 accelerator card +1945 MERA + 6200 PXI/PXIe measurement module 1947 C-guys, Inc. 4743 CG200 Dual SD/SDIO Host controller device 1948 Alpha Networks Inc. @@ -24184,6 +24235,7 @@ 7010 MPC8641 PCI Host Bridge 7011 MPC8641D PCI Host Bridge 7018 MPC8610 + 81c0 LS1046A PCI Express Bridge c006 MPC8308 1a56 1201 Bigfoot Killer E2100 Gigabit Ethernet Controller # PCIe interface for emulator @@ -24773,6 +24825,7 @@ 1050 Virtio 1.0 GPU 1052 Virtio 1.0 input 1053 Virtio 1.0 socket + 1058 virtio-mem 105a Virtio file system 1110 Inter-VM shared memory 1af4 1100 QEMU Virtual Machine @@ -24797,6 +24850,7 @@ 0612 ASM1061/ASM1062 Serial ATA Controller 1849 0612 Motherboard 0622 ASM106x Serial ATA AHCI Controller + 4c52 9661 LRST9661 2-port M.2 SATA3(6Gb/s) Raid Adapter 0624 ASM106x SATA/RAID Controller 0625 106x SATA/RAID Controller 1040 ASM1040 SuperSpeed USB Host Controller @@ -24818,6 +24872,7 @@ 1187 ASM1187e 7-Port PCIe x1 Gen2 Packet Switch 118f ASM1187e 7-Port PCIe x1 Gen2 Packet Switch 1242 ASM1142 USB 3.1 Host Controller + 4c52 9a42 LRSU9A42 2-Port Type-A Exchange Adapter 1343 ASM1143 USB 3.1 Host Controller 1806 ASM1806 4-Port PCIe x2 Gen2 Packet Switch 1812 ASM1812 6-Port PCIe x4 Gen2 Packet Switch @@ -24907,10 +24962,13 @@ 1028 2113 BOSS-N1 Modular 1028 2151 BOSS-N1 Modular ET 1028 2196 ROR-N1 + 1028 2286 BOSS-N1 DC-MHS + 1028 2287 BOSS-N1 Modular 1b4b 2241 Santa Cruz NVMe Host Adapter 1b96 4000 WD_BLACK AN1500 NVMe SSD 1d49 0306 ThinkSystem M.2 NVMe 2-Bay RAID Enablement Kit 1d49 0307 ThinkSystem 7mm NVMe 2-Bay Rear RAID Enablement Kit + 4c52 9541 LRNV9541 2-port M.2 NVMe Raid Adapter 2b42 88W8997 2.4/5 GHz Dual-Band 2x2 Wi-Fi® 5 (802.11ac) + Bluetooth® 5.3 Solution 2b43 NXP 88W9098 Wi-Fi 6 (ax) MAC #1 2b44 NXP 88W9098 Wi-Fi 6 (ax) MAC #2 @@ -24919,6 +24977,7 @@ 9123 88SE9123 PCIe SATA 6.0 Gb/s controller dc93 600e DC-6xxe series SATA 6G controller 9125 88SE9125 PCIe SATA 6.0 Gb/s controller + 4c52 9615 LRST9615 4-port SATA3(6Gb/s) Exchange Adapter 9128 88SE9128 PCIe SATA 6 Gb/s RAID controller 9130 88SE9128 PCIe SATA 6 Gb/s RAID controller with HyperDuo 1043 8438 P8P67 Deluxe Motherboard @@ -24947,6 +25006,7 @@ 1d49 0303 ThinkSystem SE350 M.2 SATA 4-Bay Data RAID Mirroring Enablement Kit 1d49 0304 ThinkSystem M.2 SATA 2-Bay RAID Enablement Kit 1d49 0305 ThinkSystem 7mm SATA 2-Bay Rear RAID Enablement Kit + 4c52 9630 LRST9630 4-port SATA3(6Gb/s) Raid Adapter 9235 88SE9235 PCIe 2.0 x2 4-port SATA 6 Gb/s Controller 9445 88SE9445 PCIe 2.0 x4 4-Port SAS/SATA 6 Gbps RAID Controller 9480 88SE9480 SAS/SATA 6Gb/s RAID controller @@ -24960,6 +25020,21 @@ # 2xHDMI and 2xHD-SDI inputs e5f4 MPEG2 and H264 Encoder-Transcoder f1c4 Dual ASI-RX/TX-CI card +1b5e STAR-Dundee Ltd. + 0001 SpaceWire PCI Mk2 + 0002 SpaceWire PCIe Mk1 + 0003 SpaceWire cPCI Mk2 + 0004 SpaceWire PXI Recorder Mk1 + 0005 SpaceWire PXI Interface Mk1 + 0006 SpaceWire PXI Interface Mk1 with RMAP Target + 0008 SpaceWire PXI Router Mk1 + 000b SpaceWire PXI Interface Mk2 + 000c SpaceWire PXI Interface Mk2 with RMAP Target + 000d SpaceWire PXI Router Mk2 + 000e SpaceWire PXI Recorder Mk2 + 0100 STAR-Ultra PCIe + 0102 STAR-Ultra Single-Lane Router + 0200 SpaceWire PCIe Mk2 1b61 Byd Precision Manufacture Co.,Ltd 1b66 DELTACAST 0007 DELTA-3G-elp-d @@ -25121,11 +25196,14 @@ 1bb1 0179 Nytro 5360S - E3.S # Nytro 5360S (Rocinante Single Port) TCG - E3.S 1bb1 0180 Nytro 5360S TCG - E3.S +# Nytro 5060H (Rocinante High Performance) non-SED + 1bb1 0181 Nytro 5060H 1bb1 01a1 Nytro XP7102 5012 FireCuda/IronWolf 510 SSD 5013 BarraCuda Q5 NVMe SSD (DRAM-less) 5016 FireCuda 520/IronWolf 525 SSD 5018 FireCuda 530 SSD + 5019 BarraCuda PCIe SSD (DRAM-less) # 2TB 5021 FireCuda 520 SSD # 1TB @@ -25315,6 +25393,7 @@ 0023 Ultrastar SN200 Series NVMe SSD 1c58 8823 Ultrastar Memory (ME200) 1c5c SK hynix + 1069 PCB01 NVMe Solid State Drive 1282 PC300 NVMe Solid State Drive 128GB 1283 PC300 NVMe Solid State Drive 256GB 1284 PC300 NVMe Solid State Drive 512GB @@ -25525,6 +25604,7 @@ 5762 FALCON, GAMMIX S41, SPECTRIX S40G NVMe SSD (DRAM-less) 5763 XPG GAMMIX S5 NVMe SSD (DRAM-less) 5766 XPG GAMMIXS1 1L, XPG GAMMIX S5, LEGEND 710 / 740, SWORDFISH NVMe SSD (DRAM-less) + 5772 LEGEND 850 LITE NVMe SSD (DRAM-less) 612a LEGEND 750 NVMe SSD (DRAM-less) 613a ATOM 50, LEGEND 840 NVMe SSD (DRAM-less) 621a LEGEND 850 NVMe SSD (DRAM-less) @@ -25565,6 +25645,7 @@ 6304 AM630 PCIe 4.0 NVMe SSD 1024GB 6a02 AM6A0 PCIe 4.0 NVMe SSD 256GB 6a03 RPETJ512MKP1QDQ PCIe 4.0 NVMe SSD 512GB (DRAM-less) + 6a13 RPJYJ512MKN1QWQ PCIe 4.0 NVMe SSD 512GB (DRAM-less) 6a14 RPEYJ1T24MKN2QWY PCIe 4.0 NVMe SSD 1024GB (DRAM-less) 8030 NVMe SSD Controller UH8X2X/UH7X2X series 1cc4 1122 NVMe SSD UH812a U.2 1.92TB @@ -25662,7 +25743,7 @@ 071a KX-5000/KX-6000/KX-6000G/KH-40000 PCI Express Root Port 071b KX-5000/KX-6000/KX-6000G/KH-40000/KX-7000 PCI Express Root Port 071c KX-5000/KX-6000/KX-6000G/KH-40000/KX-7000 PCI Express Root Port - 071d KX-5000/KX-6000/KX-6000G/KH-40000 PCI Express Root Port + 071d KX-5000/KX-6000/KX-6000G/KH-40000/KX-7000 PCI Express Root Port 071e KX-5000/KX-6000/KX-6000G/KH-40000/KX-7000 PCI Express Root Port 071f ZX-200 Upstream Port of PCI Express Switch 0720 ZX-200 PCIE RC6 controller @@ -25851,6 +25932,9 @@ 1024 AR-TK242 [2x10GbE Packet Capture Device] 1025 AR-TK242-FX2 [2x100GbE Gen5 Packet Capture Device] 1026 AR-TK242-FX2 [1x200GbE Gen5 Packet Capture Device] + 1027 AR-P2P-DBG [P2P Debug Function] + 1028 AR-P2P-ATR [P2P Actor Function] + 1029 AR-P2P-UTL [P2P Utility Function] 4200 A5PL-E1-10GETI [10 GbE Ethernet Traffic Instrument] 1d72 Xiaomi 1d78 DERA Storage @@ -25979,6 +26063,7 @@ 1062 Lexar NM710 NVME SSD 1160 FORESEE P900 BGA NVMe SSD (DRAM-less) 1202 Lexar NM610 PRO NVME SSD (DRAM-less) + 12e4 ORCA 4836 Series eSSD 1602 Lexar NM790 NVME SSD (DRAM-less) 1d97 Lexar NM620 NVME SSD (DRAM-less) 2263 SM2263EN/SM2263XT-based OEM NVME SSD (DRAM-less) @@ -26058,6 +26143,9 @@ 1dbe 2006 Dongting-N2 DC SSD U.2 7680GB 1dbe 3001 Donghu-Z2 DC ZNS SSD U.2 4000GB 1dbe 3002 Donghu-Z2 DC ZNS SSD U.2 8000GB + 5666 NVMe SSD Controller IG5666 + 5668 NVMe SSD Controller IG5668 + 5669 NVMe SSD Controller IG5669 [Tacoma] 1dbf Guizhou Huaxintong Semiconductor Technology Co., Ltd 0401 StarDragon4800 PCI Express Root Port 1dc2 Alco Digital Devices Limited @@ -26410,6 +26498,7 @@ 1df8 d100 M.2 NVMe SSD 1df8 d201 M.2 NVMe SSD 1df8 d600 M.2 NVMe SSD +1dfa Astera Labs, Inc. 1dfc JSC NT-COM 1181 TDM 8 Port E1/T1/J1 Adapter 1e0d SambaNova Systems, Inc @@ -26595,10 +26684,12 @@ 1e3b 0069 Enterprise NVMe SSD U.2 3.20TB (R5301D) 1e3b 006c Enterprise NVMe SSD U.2 1.92TB (R5101) 1e3b 006d Enterprise NVMe SSD U.2 1.60TB (J5301) - 1e3b 00b9 Enterprise NVMe SSD U.2 QDP 25.60TB (R5300) - 1e3b 00be Enterprise NVMe SSD U.2 QDP 30.72TB (R5100) - 1e3b 00c1 Enterprise NVMe SSD U.2 QDP 25.60TB (R5300D) - 1e3b 00c4 Enterprise NVMe SSD U.2 QDP 30.72TB (R5100D) + 1e3b 00b9 Enterprise NVMe SSD U.2 ODP 25.60TB (R5301)/(J5301) + 1e3b 00be Enterprise NVMe SSD U.2 ODP 30.72TB (R5101)/(J5101) + 1e3b 00c1 Enterprise NVMe SSD U.2 ODP 25.60TB (R5301D)/(J5301D) + 1e3b 00c4 Enterprise NVMe SSD U.2 ODP 30.72TB (R5101D)/(J5101D) + 1e3b 00c7 Enterprise NVMe SSD U.2 ODP 25.60TB (J5300) + 1e3b 00c8 Enterprise NVMe SSD U.2 ODP 30.72TB (J5100) 1e3b 00c9 Enterprise NVMe SSD U.2 ODP 15.36TB (J5001) 1e3b 00ca Enterprise NVMe SSD U.2 ODP 3.84TB (J5102) 1e3b 00cb Enterprise NVMe SSD U.2 ODP 7.68TB (J5102) @@ -26609,12 +26700,16 @@ 1e3b 00dc Enterprise NVMe SSD U.2 ODP 30.72TB with SAMSUNG 32GB DRAM (J5001) 1e3b 00dd Enterprise NVMe SSD U.2 ODP 30.72TB with MT 32GB DRAM(J5001) 1e3b 00de Enterprise NVMe SSD U.2 ODP 15.36TB with SK 16GB DRAM(J5001D) - 1e3b 00df Enterprise NVMe SSD U.2 ODP 30.72TB with SAMSUNG 32GB DRAM(J5001D) + 1e3b 00df Enterprise NVMe SSD U.2 ODP 30.72TB with SAMSUNG 32GB DRAM(J5001) 1e3b 00e7 Enterprise NVMe SSD U.2 ODP 30.72TB with MT 32GB DRAM(J5001D) 1e3b 00e8 Enterprise NVMe SSD U.2 QDP 3.20TB (J5301) 1e3b 00e9 Enterprise NVMe SSD U.2 ODP 6.40TB (J5301) 1e3b 00ea Enterprise NVMe SSD U.2 QDP 3.20TB (J5301D) 1e3b 00eb Enterprise NVMe SSD U.2 ODP 6.40TB (J5301D) + 1e3b 00ec Enterprise NVMe SSD U.2 ODP 30.72TB with MT 32GB DRAM(J5101) + 1e3b 00ed Enterprise NVMe SSD U.2 ODP 30.72TB with MT 32GB DRAM(R5101) + 1e3b 00ee Enterprise NVMe SSD U.2 ODP 15.36B with SK 16GB DRAM(J5101) + 1e3b 00ef Enterprise NVMe SSD U.2 ODP 12.80TB with SK 16GB DRAM(J5301) 1e3b 00f0 Enterprise NVMe SSD U.2 0.40TB (X2900) 1e3b 00f1 Enterprise NVMe SSD U.2 0.80TB (X2900) 1e3b 00f2 Enterprise NVMe SSD U.2 1.60TB (X2900) @@ -26786,6 +26881,7 @@ 1001 Video Accelerator 1eb4 Quantum Nebula Microelectronics Technology Co.,Ltd. 3401 SSD Contoller +1eb6 Wuxi Stars Microsystem Technology Co., Ltd 1eb9 Senscomm Semiconductor, Inc 2020 SCM2625 Wi-Fi6 Network Adapter 1ebd EMERGETECH Company Ltd. @@ -26829,12 +26925,14 @@ 1eca Lightmatter 0000 Envise-B 1ed0 Hosin Global Electronics + 2283 Patriot P300 NVMe SSD (DRAM-less) 1ed2 FuriosaAI, Inc. 0000 Warboy 1111 RNGD 0000 1111 RNGD-S 0000 2222 RNGD VF 0000 3333 RNGD-S VF + 2222 RNGD-S 1ed3 Yeston 1ed5 Moore Threads Technology Co.,Ltd 0100 MTT S10 @@ -26918,6 +27016,7 @@ 1ef6 GrAI Matter Labs 1ef7 Shenzhen Gunnir Technology Development Co., Ltd 1efb Flexxon Pte Ltd +1eff Rebellions Inc. 1f02 Beijing Dayu Technology 1f03 Shenzhen Shichuangyi Electronics Co., Ltd 1202 MAP1202-Based NVMe SSD (DRAM-less) @@ -26947,7 +27046,27 @@ 1a01 M16104 Family Virtual Function 1f0f 0001 M16104 Family Virtual Function 2022 D1055AS PCI Express Switch Upstream Port + 3403 M18110 Family + 3404 M18110 Lx Family + 3405 M18110 Family BASE-T + 3406 M18110 Lx Family BASE-T + 3407 M18110 Family OCP + 3408 M18110 Lx Family OCP + 3409 M18110 Family BASE-T OCP + 340a M18110 Lx Family BASE-T OCP + 340b M18120 Family + 340c M18120 Lx Family + 340d M18120 Family BASE-T + 340e M18120 Lx Family BASE-T + 340f M18120 Family OCP + 3410 M18120 Lx Family OCP + 3411 M18120 Family BASE-T OCP + 3412 M18120 Lx Family BASE-T OCP + 3413 M18100 Family Virtual Function 9088 D1055AS PCI Express Switch Downstream Port +1f16 XConn Technologies +# XConn XC50256 CXL2.0/PCIe5.0 switch + c500 XC50256 1f17 Zettastone Technology 1f24 xFusion Digital Technologies Co., Ltd. 1058 EP500/EP600 NVMe SSD @@ -27148,6 +27267,25 @@ 1fe4 0077 Enterprise NVMe SSD U.2 6.40TB(HP630) 1fe4 0078 Enterprise NVMe SSD U.2 3.20TB(HP630) 1fe9 MemryX +# LinkData Technology (Tianjin) Co., LTD +1ff2 Linkdata + 10a1 NIC1160 Ethernet Controller Family + 1ff2 0c11 10GE Ethernet Adapter 1160-2X + 10a2 NIC1160 Ethernet Controller Virtual Function Family + 20a1 IOC2110 Storage Controller + 1ff2 0a11 2120-16i SATA3/SAS3 HBA Adapter + 1ff2 0a12 2120-8i SATA3/SAS3 HBA Adapter + 20a2 IOC2250 Storage Controller + 1ff2 0a21 2230-18i Tri-mode HBA Adapter + 1ff2 0a22 2230-10i Tri-mode HBA Adapter + 1ff2 0a23 2230-16i Tri-mode HBA Adapter + 1ff2 0a24 2230-8i Tri-mode HBA Adapter + 1ff2 0a28 2233-16i Tri-mode HBA Adapter + 30a2 ROC3250 Storage Controller + 1ff2 0b21 3260-18i Tri-mode RAID Adapter + 1ff2 0b22 3260-10i Tri-mode RAID Adapter + 1ff2 0b23 3260-16i Tri-mode RAID Adapter + 1ff2 0b24 3260-8i Tri-mode RAID Adapter 1ff4 DEEPX Co., Ltd. 0000 DX_M1 0001 DX_M1A @@ -27600,10 +27738,8 @@ 4c52 LR-LINK 1001 Smart Network Adapter 4c52 a008 LREG1008PT Single-port 1Gb Smart Ethernet Network Adapter - 4c52 a009 LREG1009PT Single-port 2.5Gb Smart Ethernet Network Adapter 1002 Smart Network Adapter 4c52 a006 LREG1006PT Single-port 1.2Gb Network Security Isolation Adapter - 4c52 a007 LREG1007PT Quad-port 10Gb Smart Ethernet Network Adapter 1003 Smart Network Adapter 1004 Smart Network Adapter 4c52 b010 LREG1010PF Single-port 10Gb FPGA Network Security Isolation Adapter @@ -27667,6 +27803,9 @@ 50b2 TerraTec Electronic GmbH 50ce System-on-Chip Engineering S.L. 0001 RELY-MIL-XMC-TSN-SWITCH + 0100 XMC_AV-Dual-ETH + 0101 XMC_AV-ETSN + 0102 XMC_AV-AFDX 5136 S S Technologies 5143 Qualcomm Inc 5145 Ensoniq (Old) @@ -29738,7 +29877,12 @@ 125d Ethernet Controller I226-IT 12d1 Ethernet Controller E830-CC for backplane 12d2 Ethernet Controller E830-CC for QSFP + 8086 0002 Ethernet Network Adapter E830-C-Q2 for OCP 3.0 + 8086 0004 Ethernet Network Adapter E830-CC-Q1 for OCP 3.0 12d3 Ethernet Controller E830-CC for SFP + 8086 0001 Ethernet Network Adapter E830-XXV-2 for OCP 3.0 + 8086 0003 Ethernet Network Adapter E830-XXV-2 + 8086 0004 Ethernet Network Adapter E830-XXV-4 for OCP 3.0 12d4 Ethernet Controller E830-CC for SFP-DD 12d5 Ethernet Controller E830-C for backplane 12d8 Ethernet Controller E830-C for QSFP @@ -30368,6 +30512,7 @@ 15fc Ethernet Connection (13) I219-V 15ff Ethernet Controller X710 for 10GBASE-T 1014 0000 PCIe3 4-port 10GbE Base-T Adapter + 108e 7b1f Quad Port 10GBase-T Adapter - CP 1137 0000 X710TLG GbE RJ45 PCIe NIC 1137 02c1 X710T2LG 2x10 GbE RJ45 PCIe NIC 1137 02c2 X710T4LG 4x10 GbE RJ45 PCIe NIC @@ -34334,6 +34479,7 @@ 37d9 X722 Hyper-V Virtual Function 3882 Ice Lake LPC Controller 38a4 Ice Lake SPI Controller + 38c8 Ice Lake-LP Smart Sound Technology Audio Controller 38e0 Ice Lake Management Engine Interface 3a00 82801JD/DO (ICH10 Family) 4-port SATA IDE Controller 3a02 82801JD/DO (ICH10 Family) SATA AHCI Controller @@ -34932,6 +35078,7 @@ 4641 12th Gen Core Processor Host Bridge/DRAM Registers 1028 0b10 Precision 3571 464d 12th Gen Core Processor PCI Express x4 Controller #0 + 464e Alder Lake-N Thunderbolt 4 USB Controller 464f 12th Gen Core Processor Gaussian & Neural Accelerator 1028 0b10 Precision 3571 4650 12th Gen Core Processor Host Bridge @@ -34978,8 +35125,13 @@ 4908 DG1 [Iris Xe Graphics] 4909 DG1 [Iris Xe MAX 100] 4940 4xxx Series QAT - 4942 4xxx Series QAT - 4944 4xxx Series QAT + 4941 4xxx Series QAT Virtual Function + 4942 401xx Series QAT + 4943 401xx Series QAT Virtual Function + 4944 402xx Series QAT + 4945 402xx Series QAT Virtual Function + 4946 420xx Series QAT + 4947 420xx Series QAT Virtual Function 4b00 Elkhart Lake eSPI Controller 4b23 Elkhart Lake SMBus Controller 4b24 Elkhart Lake SPI (Flash) Controller @@ -35107,6 +35259,7 @@ 51b0 Alder Lake PCI Express Root Port #9 51b1 Alder Lake PCI Express x1 Root Port #10 51bb Alder Lake-P PCH PCIe Root Port #4 + 51bd Alder Lake-P PCH PCIe Root Port #6 51bf Alder Lake PCH-P PCI Express Root Port #9 51c5 Alder Lake-P Serial IO I2C Controller #0 51c6 Alder Lake-P Serial IO I2C Controller #1 @@ -35166,7 +35319,15 @@ 8086 0001 EtherExpress PRO/100 Server Ethernet Adapter 530d 80310 (IOP) IO Processor 5481 Alder Lake-N PCH eSPI Controller + 54a3 Alder Lake-N SMBus + 54a4 Alder Lake-N SPI (flash) Controller + 54a8 Alder Lake-N Serial IO UART Host Controller + 54b0 Alder Lake-N PCI Express Root Port #9 + 54b1 Alder Lake-N PCI Express Root Port #10 + 54b2 Alder Lake-N PCI Express Root Port #11 + 54b3 Alder Lake-N PCI Express Root Port #12 54c8 Alder Lake-N PCH High Definition Audio Controller + 54d3 Alder Lake-N SATA AHCI Controller 54e0 Alder Lake-N PCH HECI Controller 54ed Alder Lake-N PCH USB 3.2 xHCI Host Controller 54ef Alder Lake-N PCH Shared SRAM @@ -35205,7 +35366,7 @@ 56bf DG2 [Arc Graphics A580E] 56c0 ATS-M [Data Center GPU Flex 170] 56c1 ATS-M [Data Center GPU Flex 140] - 56c2 ATS-M [Data Center GPU Flex 170G] + 56c2 ATS-M [Data Center GPU Flex 170V] 5780 Thunderbolt 80/120G Bridge [Barlow Ridge Host 80G 2023] 5781 Thunderbolt 80/120G NHI [Barlow Ridge Host 80G 2023] 5782 Thunderbolt 80/120G USB Controller [Barlow Ridge Host 80G 2023] @@ -35219,9 +35380,12 @@ 579e Ethernet Connection E825-C for SFP 57a4 Thunderbolt Bridge [Barlow Ridge Hub 40G 2023] 57a5 Thunderbolt USB Controller [Barlow Ridge Hub 40G 2023] + 57ae Ethernet Controller E610 Backplane + 57af Ethernet Controller E610 SFP 57b0 Ethernet Controller E610 10GBASE T 57b1 Ethernet Controller E610 2.5GBASE T 8086 0000 Ethernet Converged Network Adapter E610 + 57b2 Ethernet Controller E610 SGMII 5845 QEMU NVM Express Controller 1af4 1100 QEMU Virtual Machine 5900 Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers @@ -35600,6 +35764,7 @@ 7a27 Raptor Lake-S PCH Shared SRAM 7a30 Raptor Lake PCI Express Root Port #9 7a38 Raptor Lake PCI Express Root Port #1 + 7a3a Raptor Point-S PCH - PCI Express Root Port 3 7a3b Raptor Lake PCI Express Root Port #4 7a40 Raptor Lake PCI Express Root Port #17 7a44 Raptor Lake PCI Express Root Port #21 @@ -35646,7 +35811,10 @@ 8086 0094 Wi-Fi 6 AX201 160MHz 7afc Alder Lake-S PCH Serial IO I2C Controller #4 7afd Alder Lake-S PCH Serial IO I2C Controller #5 + 7d03 Meteor Lake-P Dynamic Tuning Technology 7d0b Volume Management Device NVMe RAID Controller Intel Corporation + 7d0d Meteor Lake-P Platform Monitoring Technology + 7d19 Meteor Lake IPU 7d1d Meteor Lake NPU 7d40 Meteor Lake-M [Intel Graphics] 7d41 Arrow Lake-U [Intel Graphics] @@ -35668,10 +35836,15 @@ 7e30 Meteor Lake-P Serial IO SPI Controller #1 7e40 Meteor Lake PCH CNVi WiFi 8086 0094 Wi-Fi 6E AX211 160MHz +# Refer from Intel Meteor Lake EDS (doc#640228) under its "Device IDs" section. + 7e45 Meteor Lake-P Integrated Sensor Hub 7e46 Meteor Lake-P Serial IO SPI Controller #2 + 7e4c Meteor Lake-P Gaussian & Neural-Network Accelerator 7e50 Meteor Lake-P Serial IO I2C Controller #4 7e51 Meteor Lake-P Serial IO I2C Controller #5 7e52 Meteor Lake-P Serial IO UART Controller #2 + 7e70 Meteor Lake-P CSME HECI #1 + 7e73 Meteor Lake-P Keyboard and Text (KT) Redirection 7e78 Meteor Lake-P Serial IO I2C Controller #0 7e79 Meteor Lake-P Serial IO I2C Controller #1 7e7a Meteor Lake-P Serial IO I2C Controller #2 @@ -36758,6 +36931,7 @@ a72f Raptor Lake-P Thunderbolt 4 PCI Express Root Port #2 a73e Raptor Lake-P Thunderbolt 4 NHI #0 1028 0c06 Precision 3580 + a740 Raptor Lake-S 8+12 - Host Bridge/DRAM Controller a74d Raptor Lake PCIe 4.0 Graphics Port a74f GNA Scoring Accelerator module 1028 0c06 Precision 3580 @@ -36851,6 +37025,11 @@ d156 Core Processor Semaphore and Scratchpad Registers d157 Core Processor System Control and Status Registers d158 Core Processor Miscellaneous Registers + e202 Battlemage G21 [Intel Graphics] + e20b Battlemage G21 [Intel Graphics] + e20c Battlemage G21 [Intel Graphics] + e20d Battlemage G21 [Intel Graphics] + e212 Battlemage G21 [Intel Graphics] f1a5 SSD 600P Series 8086 390a SSDPEKKW256G7 256GB f1a6 SSD DC P4101/Pro 7600p/760p/E 6100p Series @@ -36921,7 +37100,7 @@ 0119 WX1860-LC Gigabit Ethernet Controller Virtual Function 011a WX1860A1 Gigabit Ethernet Controller Virtual Function 011b WX1860AL1 Gigabit Ethernet Controller Virtual Function - 1000 Ethernet Controller RP1000 Virtual Function for 10GbE SFP+ + 1000 Ethernet Controller SP1000A Virtual Function for 10GbE SFP+ 1001 Ethernet Controller SP1000A for 10GbE SFP+ 1bd4 0084 Ethernet Controller SP1000A for 10GbE SFP+(lldp) 1bd4 0085 Ethernet Controller SP1000A for 10GBASE-T @@ -36931,7 +37110,7 @@ 8088 0000 Ethernet Network Adaptor RP1000 for 10GbE SFP+ 8088 0300 Ethernet Network Adaptor RP1000-A03 for 10GbE SFP+ 8088 0400 Ethernet Network Adaptor RP1000-A04 for 10GbE SFP+ - 2000 Ethernet Controller RP2000 Virtual Function for 10GbE SFP+ + 2000 Ethernet Controller WX1820AL Virtual Function for 10GbE SFP+ 2001 Ethernet Controller WX1820AL for 10GbE SFP+ 8088 2000 Ethernet Network Adaptor RP2000 for 10GbE SFP+ 8088 2300 Ethernet Network Adaptor RP2000-A03 for 10GbE SFP+ @@ -36943,7 +37122,17 @@ 8384 SigmaTel 8401 TRENDware International Inc. 8510 Sietium Semiconductor Co., Ltd. - 0201 GenBu02 [GB2062-PCIe-C0] + 0201 GenBu02 Series GPU + 8510 0001 GB2062-PUB-LPDDR + 8510 0002 GB2062-PCIe-C0 + 8510 0003 GB2062-PCIe-C41 + 8510 0004 GB2062-PCIe-HIEILP4 + 8510 0005 CQ2040-PCIe-C21 + 8510 0007 GB2062-PCIe-C40 + 8510 0008 CQ2040-MXM-M60 + 8510 0009 GB2062-PCIe-C20 + 8510 000c CQ2040-PUB + 8510 0201 GB2062-PUB-DDR # nee ScaleMP 8686 SAP 1010 vSMP Foundation controller [vSMP CTL] @@ -37316,6 +37505,9 @@ 103c 1101 Smart Array P416ie-m SR G10 105b 1211 HBA 8238-16i 105b 1321 HBA 8242-24i + 1137 02f8 24G TriMode M1 RAID 4GB FBWC 32D + 1137 02f9 24G TriMode M1 RAID 4GB FBWC 16D + 1137 02fa 24G TriMode M1 HBA 16D 13fe 8312 SKY-9200 MIC-8312BridgeB 152d 8a22 QS-8204-8i 152d 8a23 QS-8238-16i diff --git a/share/misc/usb_vendors b/share/misc/usb_vendors index a1e3ea4acd1d..41b367d1a0d6 100644 --- a/share/misc/usb_vendors +++ b/share/misc/usb_vendors @@ -9,8 +9,8 @@ # The latest version can be obtained from # http://www.linux-usb.org/usb.ids # -# Version: 2024.03.18 -# Date: 2024-03-18 20:34:02 +# Version: 2024.07.04 +# Date: 2024-07-04 20:34:02 # # Vendors, devices and interfaces. Please keep sorted. @@ -2400,6 +2400,7 @@ 02e3 Xbox One Elite Controller 02e6 Xbox Wireless Adapter for Windows 02ea Xbox One Controller + 02f3 Xbox One Chatpad 02fd Xbox One S Controller [Bluetooth] 02fe Xbox Wireless Adapter for Windows 0306 Surface Pro 7 SD Card Reader @@ -4992,7 +4993,7 @@ 0a28 INDI AV-IN Device 1301 Network Controller 1302 i3 Gateway - 1303 3 Micro Module + 1303 i3 Micro Module 1304 i3 Module 1305 i3 Multi Sensing Module 04c1 U.S. Robotics (3Com) @@ -6433,6 +6434,7 @@ 2060 PT-E550W P-touch Label Printer 2061 PT-P700 P-touch Label Printer 2064 PT-P700 P-touch Label Printer RemovableDisk + 2065 PT-P750W P-Touch Label Writer 2074 PT-D600 P-touch Label Printer 209b QL-800 Label Printer 209c QL-810W Label Printer @@ -7439,6 +7441,7 @@ 03dd PTH-460 [Intuos Pro BT (S)] tablet 03ec DTH134 [DTH134] touchscreen 03ed DTC121 [DTC121] touchscreen + 03f0 DTH135 [Movink 13] 0400 PenPartner 4x5 4001 TPC4001 4004 TPC4004 @@ -9049,6 +9052,7 @@ 0752 micros Reader 0760 USB 2.0 Card Reader/Writer 0761 Genesys Mass Storage Device + 0769 SPR2801S [Lightspeeur 2801] 0780 USBFS DFU Adapter 07a0 Pen Flash 0880 Wasp (SL-6612) @@ -10898,7 +10902,7 @@ 0056 Agfa AP1100 Photo Printer 005d Mobile Mass Storage 005f Laser Pro LL [MFPrinter] - 0062 XG-76NA 802.11bg + 0062 XG-76NA / XG-760N 802.11b/g Wireless adapter 0078 Laser Pro Monochrome MFP 079d Alfadata Computer Corp. 0201 GamePort Adapter @@ -11169,6 +11173,7 @@ 1228 MPEG-2 Capture Device (M038) 1830 AVerTV Volar Video Capture (H830) 1871 TD310 DVB-T/T2/C dongle + 2553 Live Gamer Ultra 2.1 3835 AVerTV Volar Green HD (A835B) 850a AverTV Volar Black HD (A850) 850b AverTV Red HD+ (A850T) @@ -11180,6 +11185,7 @@ b300 A300 DVB-T TV receiver b800 MR800 FM Radio c039 DVD EZMaker 7 + d553 Live Gamer Ultra Pro-RGB e880 MPEG-2 Capture Device (E880) e882 MPEG-2 Capture Device (E882) 07cb Kingmax Technology, Inc. @@ -12286,7 +12292,7 @@ 0a0b WLU5053 802.11abgn Wireless Module [Broadcom BCM43236B] 0a13 AX88179 Gigabit Ethernet [Toshiba] 0b05 PX1220E-1G25 External hard drive - 0b09 PX1396E-3T01 External hard drive + 0b09 PX139xE 3.5 External HDD 0b1a STOR.E ALU 2S 1300 Wireless Broadband (CDMA EV-DO) SM-Bus Minicard Status Port 1301 Wireless Broadband (CDMA EV-DO) Minicard Status Port @@ -12420,6 +12426,7 @@ 010f nanoKONTROL studio controller 0117 nanoKONTROL2 MIDI Controller 012f SQ-1 + 0154 NTS-1 digital kit mkII 0203 KRONOS 0f03 K-Series K61P MIDI studio controller 0945 Pasco Scientific @@ -13004,6 +13011,7 @@ 5803 BCM5880 Secure Applications Processor with secure keyboard 5804 BCM5880 Secure Applications Processor with fingerprint swipe sensor 5832 BCM5880 Secure Applications Processor Smartcard reader + 5843 BCM58200 ControlVault 3 (FingerPrint sensor + Contacted SmartCard) 6300 Pirelli Remote NDIS Device 6410 BCM20703A1 Bluetooth 4.1 + LE bd11 BCM4320 802.11bg Wireless Adapter @@ -13020,11 +13028,14 @@ 0009 LP2844 Printer 0027 ZTC LP2844-Z-200dpi 0050 P120i / WM120i + 0062 GK420d Label Printer + 0065 ZM400 Label Printer 0080 GK420d Label Printer 0081 GK420t Label Printer 0084 GX420d Desktop Label Printer 008b HC100 wristbands Printer 008c ZP 450 Printer + 00a1 TLP2824 Plus 00d1 GC420d Label Printer 0110 ZD500 Desktop Label Printer 011c ZD410 Direct Thermal Label Printer @@ -13035,6 +13046,7 @@ 0010 MPMan MP-F40 MP3 Player 0a66 ClearCube Technology 0a67 Medeli Electronics Co., Ltd + ffff LCS Audio 0a68 Comaide Corp. 0a69 Chroma ate, Inc. 0a6b Green House Co., Ltd @@ -13246,10 +13258,11 @@ 0ac9 Micro Solutions, Inc. 0000 Backpack CD-ReWriter 0001 BACKPACK 2 Cable - 0010 BACKPACK + 0010 BACKPACK CD Drive 0011 Backpack 40GB Hard Drive 0110 BACKPACK 0111 BackPack + 10ff BACKPACK 1234 BACKPACK 0aca OPEN Networks Ltd 1060 OPEN NT1 Plus II @@ -13308,7 +13321,7 @@ 3102 MemoryStick Card Reader 3201 MMC/SD+MemoryStick Card Reader 3216 HS Card Reader - 3260 7-in-1 Card Reader + 3260 ND3260 7-in-1 Card Reader 5010 ND5010 Card Reader 0af0 Option 5000 UMTS Card @@ -13382,6 +13395,7 @@ 17a0 Xonar U3 sound card 17a1 Eee Note EA800 (mass storage mode) 17ab USB-N13 802.11n Network Adapter (rev. B1) [Realtek RTL8192CU] + 17b5 Broadcom BCM20702A0 Bluetooth 17ba N10 Nano 802.11n Network Adapter [Realtek RTL8192CU] 17c2 ROG Spitfire 17c7 WL-330NUL diff --git a/share/mk/bsd.lib.mk b/share/mk/bsd.lib.mk index a1927181de14..18b6ad8b04e1 100644 --- a/share/mk/bsd.lib.mk +++ b/share/mk/bsd.lib.mk @@ -107,7 +107,7 @@ LDFLAGS+= -Wl,-zretpolineplt LDFLAGS.bfd+= -Wl,-znoexecstack .if ${MK_BRANCH_PROTECTION} != "no" CFLAGS+= -mbranch-protection=standard -.if ${MACHINE_ARCH} == "aarch64" && defined(BTI_REPORT_ERROR) +.if ${LINKER_FEATURES:Mbti-report} && defined(BTI_REPORT_ERROR) LDFLAGS+= -Wl,-zbti-report=error .endif .endif diff --git a/share/mk/bsd.linker.mk b/share/mk/bsd.linker.mk index 344de2aa6a40..f27755976761 100644 --- a/share/mk/bsd.linker.mk +++ b/share/mk/bsd.linker.mk @@ -11,9 +11,11 @@ # LINKER_FEATURES may contain one or more of the following, based on # linker support for that feature: # -# - build-id: support for generating a Build-ID note -# - retpoline: support for generating PLT with retpoline speculative -# execution vulnerability mitigation +# - build-id: support for generating a Build-ID note +# - retpoline: support for generating PLT with retpoline speculative +# execution vulnerability mitigation +# - bti-report: support for specifying how to report the missing +# Branch Target Identification (BTI) property (AArch64) # # LINKER_FREEBSD_VERSION is the linker's internal source version. # @@ -140,6 +142,9 @@ ${X_}LINKER_FEATURES+= ifunc-noplt # If we are using lld 10.0 or newer we can use -Wl,--gdb-index without crashing ${X_}LINKER_FEATURES+= gdb-index .endif +.if ${${X_}LINKER_TYPE} == "lld" && ${${X_}LINKER_VERSION} >= 140000 +${X_}LINKER_FEATURES+= bti-report +.endif .endif .else # Use LD's values diff --git a/share/mk/bsd.prog.mk b/share/mk/bsd.prog.mk index 237794ccf3d2..028e4bbfe882 100644 --- a/share/mk/bsd.prog.mk +++ b/share/mk/bsd.prog.mk @@ -79,7 +79,7 @@ LDFLAGS+= -Wl,-zretpolineplt LDFLAGS.bfd+= -Wl,-znoexecstack .if ${MK_BRANCH_PROTECTION} != "no" CFLAGS+= -mbranch-protection=standard -.if ${MACHINE_ARCH} == "aarch64" && defined(BTI_REPORT_ERROR) +.if ${LINKER_FEATURES:Mbti-report} && defined(BTI_REPORT_ERROR) LDFLAGS+= -Wl,-zbti-report=error .endif .endif diff --git a/stand/lua/drawer.lua b/stand/lua/drawer.lua index ab9f0038c470..2f1de27beb69 100644 --- a/stand/lua/drawer.lua +++ b/stand/lua/drawer.lua @@ -484,7 +484,7 @@ logodefs = { brand_position = {x = 2, y = 1} logo_position = {x = 46, y = 4} menu_position = {x = 5, y = 10} -frame_size = {w = 42, h = 13} +frame_size = {w = 42, h = 14} default_shift = {x = 0, y = 0} shift = default_shift diff --git a/stand/lua/menu.lua b/stand/lua/menu.lua index 0587e5ae6586..ed84360d93b0 100644 --- a/stand/lua/menu.lua +++ b/stand/lua/menu.lua @@ -535,7 +535,7 @@ end function menu.autoboot(delay) local x = loader.getenv("loader_menu_timeout_x") or 4 - local y = loader.getenv("loader_menu_timeout_y") or 23 + local y = loader.getenv("loader_menu_timeout_y") or 24 local endtime = loader.time() + delay local time local last diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index c509dcc2f7dd..025c3c365de5 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -1487,6 +1487,12 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) finishidentcpu(); /* Final stage of CPU initialization */ + invlpgb_works = (amd_extended_feature_extensions & + AMDFEID_INVLPGB) != 0; + TUNABLE_INT_FETCH("vm.pmap.invlpgb_works", &invlpgb_works); + if (invlpgb_works) + invlpgb_maxcnt = cpu_procinfo3 & AMDID_INVLPGB_MAXCNT; + /* * Initialize the clock before the console so that console * initialization can use DELAY(). diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 91737637b714..12abb8b6bf8b 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -679,6 +679,20 @@ smp_targeted_tlb_shootdown_native(pmap_t pmap, vm_offset_t addr1, vm_offset_t ad void smp_masked_invltlb(pmap_t pmap, smp_invl_cb_t curcpu_cb) { + if (invlpgb_works && pmap == kernel_pmap) { + invlpgb(INVLPGB_GLOB, 0, 0); + + /* + * TLBSYNC syncs only against INVLPGB executed on the + * same CPU. Since current thread is pinned by + * caller, we do not need to enter critical section to + * prevent migration. + */ + tlbsync(); + sched_unpin(); + return; + } + smp_targeted_tlb_shootdown(pmap, 0, 0, curcpu_cb, invl_op_tlb); #ifdef COUNT_XINVLTLB_HITS ipi_global++; @@ -688,6 +702,13 @@ smp_masked_invltlb(pmap_t pmap, smp_invl_cb_t curcpu_cb) void smp_masked_invlpg(vm_offset_t addr, pmap_t pmap, smp_invl_cb_t curcpu_cb) { + if (invlpgb_works && pmap == kernel_pmap) { + invlpgb(INVLPGB_GLOB | INVLPGB_VA | trunc_page(addr), 0, 0); + tlbsync(); + sched_unpin(); + return; + } + smp_targeted_tlb_shootdown(pmap, addr, 0, curcpu_cb, invl_op_pg); #ifdef COUNT_XINVLTLB_HITS ipi_page++; @@ -698,6 +719,39 @@ void smp_masked_invlpg_range(vm_offset_t addr1, vm_offset_t addr2, pmap_t pmap, smp_invl_cb_t curcpu_cb) { + if (invlpgb_works && pmap == kernel_pmap) { + vm_offset_t va; + uint64_t cnt, total; + + addr1 = trunc_page(addr1); + addr2 = round_page(addr2); + total = atop(addr2 - addr1); + for (va = addr1; total > 0;) { + if ((va & PDRMASK) != 0 || total < NPDEPG) { + cnt = atop(NBPDR - (va & PDRMASK)); + if (cnt > total) + cnt = total; + if (cnt > invlpgb_maxcnt + 1) + cnt = invlpgb_maxcnt + 1; + invlpgb(INVLPGB_GLOB | INVLPGB_VA | va, 0, + cnt - 1); + va += ptoa(cnt); + total -= cnt; + } else { + cnt = total / NPTEPG; + if (cnt > invlpgb_maxcnt + 1) + cnt = invlpgb_maxcnt + 1; + invlpgb(INVLPGB_GLOB | INVLPGB_VA | va, 0, + INVLPGB_2M_CNT | (cnt - 1)); + va += cnt << PDRSHIFT; + total -= cnt * NPTEPG; + } + } + tlbsync(); + sched_unpin(); + return; + } + smp_targeted_tlb_shootdown(pmap, addr1, addr2, curcpu_cb, invl_op_pgrng); #ifdef COUNT_XINVLTLB_HITS diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 99dbac4aec3f..cd8727c6be7c 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -550,6 +550,10 @@ SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, int invpcid_works = 0; SYSCTL_INT(_vm_pmap, OID_AUTO, invpcid_works, CTLFLAG_RD, &invpcid_works, 0, "Is the invpcid instruction available ?"); +int invlpgb_works; +SYSCTL_INT(_vm_pmap, OID_AUTO, invlpgb_works, CTLFLAG_RD, &invlpgb_works, 0, + "Is the invlpgb instruction available?"); +int invlpgb_maxcnt; int pmap_pcid_invlpg_workaround = 0; SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_invlpg_workaround, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h index 62e782304fca..ca53d73b0186 100644 --- a/sys/amd64/include/cpufunc.h +++ b/sys/amd64/include/cpufunc.h @@ -525,6 +525,29 @@ invpcid(struct invpcid_descr *d, int type) : : "r" (d), "r" ((u_long)type) : "memory"); } +#define INVLPGB_VA 0x0001 +#define INVLPGB_PCID 0x0002 +#define INVLPGB_ASID 0x0004 +#define INVLPGB_GLOB 0x0008 +#define INVLPGB_FIN 0x0010 +#define INVLPGB_NEST 0x0020 + +#define INVLPGB_DESCR(asid, pcid) (((pcid) << 16) | (asid)) + +#define INVLPGB_2M_CNT (1u << 31) + +static __inline void +invlpgb(uint64_t rax, uint32_t edx, uint32_t ecx) +{ + __asm __volatile("invlpgb" : : "a" (rax), "d" (edx), "c" (ecx)); +} + +static __inline void +tlbsync(void) +{ + __asm __volatile("tlbsync"); +} + static __inline u_short rfs(void) { diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index 273693e1f782..0819b3bc2945 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -424,6 +424,8 @@ extern vm_offset_t virtual_end; extern vm_paddr_t dmaplimit; extern int pmap_pcid_enabled; extern int invpcid_works; +extern int invlpgb_works; +extern int invlpgb_maxcnt; extern int pmap_pcid_invlpg_workaround; extern int pmap_pcid_invlpg_workaround_uena; diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c index 3451e91d9de1..9fcfdc7cb441 100644 --- a/sys/amd64/vmm/io/ppt.c +++ b/sys/amd64/vmm/io/ppt.c @@ -151,9 +151,13 @@ static int ppt_attach(device_t dev) { struct pptdev *ppt; + uint16_t cmd; ppt = device_get_softc(dev); + cmd = pci_read_config(dev, PCIR_COMMAND, 2); + cmd &= ~(PCIM_CMD_PORTEN | PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); + pci_write_config(dev, PCIR_COMMAND, cmd, 2); iommu_remove_device(iommu_host_domain(), pci_get_rid(dev)); num_pptdevs++; TAILQ_INSERT_TAIL(&pptdev_list, ppt, next); @@ -176,7 +180,6 @@ ppt_detach(device_t dev) return (EBUSY); num_pptdevs--; TAILQ_REMOVE(&pptdev_list, ppt, next); - pci_disable_busmaster(dev); if (iommu_host_domain() != NULL) iommu_add_device(iommu_host_domain(), pci_get_rid(dev)); @@ -376,11 +379,28 @@ ppt_pci_reset(device_t dev) pci_power_reset(dev); } +static uint16_t +ppt_bar_enables(struct pptdev *ppt) +{ + struct pci_map *pm; + uint16_t cmd; + + cmd = 0; + for (pm = pci_first_bar(ppt->dev); pm != NULL; pm = pci_next_bar(pm)) { + if (PCI_BAR_IO(pm->pm_value)) + cmd |= PCIM_CMD_PORTEN; + if (PCI_BAR_MEM(pm->pm_value)) + cmd |= PCIM_CMD_MEMEN; + } + return (cmd); +} + int ppt_assign_device(struct vm *vm, int bus, int slot, int func) { struct pptdev *ppt; int error; + uint16_t cmd; /* Passing NULL requires the device to be unowned. */ error = ppt_find(NULL, bus, slot, func, &ppt); @@ -392,6 +412,9 @@ ppt_assign_device(struct vm *vm, int bus, int slot, int func) pci_restore_state(ppt->dev); ppt->vm = vm; iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev)); + cmd = pci_read_config(ppt->dev, PCIR_COMMAND, 2); + cmd |= PCIM_CMD_BUSMASTEREN | ppt_bar_enables(ppt); + pci_write_config(ppt->dev, PCIR_COMMAND, cmd, 2); return (0); } @@ -400,11 +423,15 @@ ppt_unassign_device(struct vm *vm, int bus, int slot, int func) { struct pptdev *ppt; int error; + uint16_t cmd; error = ppt_find(vm, bus, slot, func, &ppt); if (error) return (error); + cmd = pci_read_config(ppt->dev, PCIR_COMMAND, 2); + cmd &= ~(PCIM_CMD_PORTEN | PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); + pci_write_config(ppt->dev, PCIR_COMMAND, cmd, 2); pci_save_state(ppt->dev); ppt_pci_reset(ppt->dev); pci_restore_state(ppt->dev); diff --git a/sys/arm/nvidia/drm2/tegra_bo.c b/sys/arm/nvidia/drm2/tegra_bo.c index 08cd3de6a3fe..346118b78c2b 100644 --- a/sys/arm/nvidia/drm2/tegra_bo.c +++ b/sys/arm/nvidia/drm2/tegra_bo.c @@ -62,7 +62,7 @@ tegra_bo_destruct(struct tegra_bo *bo) for (i = 0; i < bo->npages; i++) { m = bo->m[i]; vm_page_busy_acquire(m, 0); - cdev_pager_free_page(bo->cdev_pager, m); + cdev_mgtdev_pager_free_page(bo->cdev_pager, m); m->flags &= ~PG_FICTITIOUS; vm_page_unwire_noq(m); vm_page_free(m); diff --git a/sys/arm64/arm64/efirt_support.S b/sys/arm64/arm64/efirt_support.S index df2daa120499..0975c0ece806 100644 --- a/sys/arm64/arm64/efirt_support.S +++ b/sys/arm64/arm64/efirt_support.S @@ -25,6 +25,8 @@ * SUCH DAMAGE. */ +#include + #include #include @@ -161,3 +163,5 @@ efi_rt_panic_str_ptr: .chericap efi_rt_panic_str .size efi_rt_panic_str_ptr, . - efi_rt_panic_str_ptr #endif + +GNU_PROPERTY_AARCH64_FEATURE_1_NOTE(GNU_PROPERTY_AARCH64_FEATURE_1_VAL) diff --git a/sys/arm64/arm64/freebsd64cb_sigtramp.S b/sys/arm64/arm64/freebsd64cb_sigtramp.S index 55ee528c2afe..9dbf00e95d55 100644 --- a/sys/arm64/arm64/freebsd64cb_sigtramp.S +++ b/sys/arm64/arm64/freebsd64cb_sigtramp.S @@ -27,6 +27,7 @@ */ #include "assym.inc" +#include #include #include @@ -73,3 +74,5 @@ freebsd64cb_esigcode: .size freebsd64cb_szsigcode, 8 freebsd64cb_szsigcode: .quad freebsd64cb_esigcode - freebsd64cb_sigcode + +GNU_PROPERTY_AARCH64_FEATURE_1_NOTE(GNU_PROPERTY_AARCH64_FEATURE_1_VAL) diff --git a/sys/arm64/arm64/genassym.c b/sys/arm64/arm64/genassym.c index 362d8f54ecd1..d141f9c3392c 100644 --- a/sys/arm64/arm64/genassym.c +++ b/sys/arm64/arm64/genassym.c @@ -45,7 +45,6 @@ ASSYM(BP_MODULEP, offsetof(struct arm64_bootparams, modulep)); ASSYM(BP_KERN_STACK, offsetof(struct arm64_bootparams, kern_stack)); ASSYM(BP_KERN_TTBR0, offsetof(struct arm64_bootparams, kern_ttbr0)); ASSYM(BP_BOOT_EL, offsetof(struct arm64_bootparams, boot_el)); -ASSYM(BP_HCR_EL2, offsetof(struct arm64_bootparams, hcr_el2)); ASSYM(PCPU_SIZE, sizeof(struct pcpu)); ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S index c96c702ed2ea..feb6c7963b58 100644 --- a/sys/arm64/arm64/locore.S +++ b/sys/arm64/arm64/locore.S @@ -254,7 +254,7 @@ virtdone: /* Zero the BSS */ buildptr_range 15, 14, _bss_start, _end 1: - str xzr, [PTR(15)], #8 + stp xzr, xzr, [PTR(15)], #16 cmp PTR(15), PTR(14) b.lo 1b @@ -302,7 +302,6 @@ virtdone: str PTR(25), [PTR(0), #BP_KERN_STACK] str x27, [PTR(0), #BP_KERN_TTBR0] str x23, [PTR(0), #BP_BOOT_EL] - str x4, [PTR(0), #BP_HCR_EL2] #ifdef KASAN /* Save bootparams */ @@ -484,8 +483,12 @@ LEND(mpentry_common) #endif /* - * If we are started in EL2, configure the required hypervisor - * registers and drop to EL1. + * Enter the exception level the kernel will use: + * + * - If in EL1 continue in EL1 + * - If the CPU supports FEAT_VHE then set HCR_E2H and HCR_TGE and continue + * in EL2 + * - Configure EL2 to support running the kernel at EL1 and exit to that */ #ifdef __CHERI_PURE_CAPABILITY__ .arch_extension noc64 @@ -527,13 +530,14 @@ LENTRY(enter_kernel_el) isb /* Configure the Hypervisor */ - ldr x2, =(HCR_RW | HCR_APK | HCR_API) + ldr x2, =(HCR_RW | HCR_APK | HCR_API | HCR_E2H) msr hcr_el2, x2 /* Stash value of HCR_EL2 for later */ isb mrs x4, hcr_el2 + /* Load the Virtualization Process ID Register */ mrs x2, midr_el1 msr vpidr_el2, x2 @@ -546,17 +550,25 @@ LENTRY(enter_kernel_el) ldr x2, =INIT_SCTLR_EL1 msr sctlr_el1, x2 + /* Check if the E2H flag is set */ + tst x4, #HCR_E2H + b.eq .Lno_vhe + /* - * On some hardware, e.g., Apple M1, we can't clear E2H, so make sure we - * don't trap to EL2 for SIMD register usage to have at least a - * minimally usable system. + * The kernel will be running in EL2, route exceptions here rather + * than EL1. */ - tst x4, #HCR_E2H - mov x3, #CPTR_RES1 /* HCR_E2H == 0 */ - mov x5, #CPTR_FPEN /* HCR_E2H == 1 */ - csel x2, x3, x5, eq - msr cptr_el2, x2 + orr x4, x4, #(HCR_TGE) + msr hcr_el2, x4 + isb + msr SCTLR_EL12_REG, x2 +#if __has_feature(capabilities) + ldr x2, =(CPTR_E2H_CEN) +#else + mov x2, xzr /* CPTR_EL2 is managed by vfp.c */ +#endif + msr cptr_el2, x2 #if __has_feature(capabilities) /* * Wait for the write to cptr_el2 to complete. It will enable the @@ -565,20 +577,22 @@ LENTRY(enter_kernel_el) * act as in place of this barrier. */ isb + + /* Clear DDC_EL1 */ + msr ddc_el1, czr #endif - /* Don't trap to EL2 for CP15 traps */ - msr hstr_el2, xzr + ldr x3, =(CNTHCTL_E2H_EL1PCTEN | CNTHCTL_E2H_EL1PTEN) + ldr x5, =(PSR_DAIF | PSR_M_EL2h) + b .Ldone_vhe - /* Enable access to the physical timers at EL1 */ - tst x4, #HCR_E2H - ldr x3, =(CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN) - ldr x5, =(CNTHCTL_E2H_EL1PCTEN | CNTHCTL_E2H_EL1PTEN) - csel x2, x3, x5, eq - msr cnthctl_el2, x2 - - /* Set the counter offset to a known value */ - msr cntvoff_el2, xzr +.Lno_vhe: + ldr x2, =(CPTR_RES1) + msr cptr_el2, x2 +#if __has_feature(capabilities) + /* As noted above, wait for the write to cptr_el2 to complete. */ + isb +#endif /* Hypervisor trap functions */ adrp x2, hyp_stub_vectors @@ -590,12 +604,28 @@ LENTRY(enter_kernel_el) msr vbar_el2, x2 #endif + ldr x3, =(CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN) + ldr x5, =(PSR_DAIF | PSR_M_EL1h) + +#ifdef __CHERI_PURE_CAPABILITY__ + /* Clear DDC_EL2 */ + msr ddc, czr +#endif +.Ldone_vhe: + /* Enable access to the physical timers at EL1 */ + msr cnthctl_el2, x3 + /* Set the return PSTATE */ + msr spsr_el2, x5 + + /* Don't trap to EL2 for CP15 traps */ + msr hstr_el2, xzr + + /* Set the counter offset to a known value */ + msr cntvoff_el2, xzr + /* Zero vttbr_el2 so a hypervisor can tell the host and guest apart */ msr vttbr_el2, xzr - mov x2, #(PSR_DAIF | PSR_M_EL1h) - msr spsr_el2, x2 - /* Configure GICv3 CPU interface */ mrs x2, id_aa64pfr0_el1 /* Extract GIC bits from the register */ @@ -614,9 +644,6 @@ LENTRY(enter_kernel_el) mrs x2, cctlr_el2 orr x2, x2, #(CCTLR_EL2_C64E_MASK) msr cctlr_el2, x2 - - /* Clear DDC_EL2 */ - msr ddc, czr #endif /* Set the address to return to our return address */ #if __has_feature(capabilities) diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c index 6fb7b72071a1..1d7011755c23 100644 --- a/sys/arm64/arm64/machdep.c +++ b/sys/arm64/arm64/machdep.c @@ -154,7 +154,6 @@ static struct trapframe proc0_tf; int early_boot = 1; int cold = 1; static int boot_el; -static uint64_t hcr_el2; struct kva_md_info kmi; @@ -230,12 +229,7 @@ pan_enable(void) bool has_hyp(void) { - - /* - * XXX The E2H check is wrong, but it's close enough for now. Needs to - * be re-evaluated once we're running regularly in EL2. - */ - return (boot_el == CURRENTEL_EL_EL2 && (hcr_el2 & HCR_E2H) == 0); + return (boot_el == CURRENTEL_EL_EL2); } bool @@ -939,7 +933,6 @@ initarm(struct arm64_bootparams *abp) TSRAW(&thread0, TS_ENTER, __func__, NULL); boot_el = abp->boot_el; - hcr_el2 = abp->hcr_el2; /* Parse loader or FDT boot parametes. Determine last used address. */ lastaddr = parse_boot_param(abp); diff --git a/sys/arm64/include/armreg.h b/sys/arm64/include/armreg.h index 3b0269d0d8d6..925c67a4f752 100644 --- a/sys/arm64/include/armreg.h +++ b/sys/arm64/include/armreg.h @@ -241,6 +241,22 @@ #define CLIDR_CTYPE_ID 0x3 /* Split instruction and data */ #define CLIDR_CTYPE_UNIFIED 0x4 /* Unified */ +/* CNTKCTL_EL1 - Counter-timer Kernel Control Register */ +#define CNTKCTL_EL1 MRS_REG(CNTKCTL_EL0) +#define CNTKCTL_EL1_op0 3 +#define CNTKCTL_EL1_op1 0 +#define CNTKCTL_EL1_CRn 14 +#define CNTKCTL_EL1_CRm 1 +#define CNTKCTL_EL1_op2 0 + +/* CNTKCTL_EL12 - Counter-timer Kernel Control Register */ +#define CNTKCTL_EL12 MRS_REG(CNTKCTL_EL0) +#define CNTKCTL_EL12_op0 3 +#define CNTKCTL_EL12_op1 5 +#define CNTKCTL_EL12_CRn 14 +#define CNTKCTL_EL12_CRm 1 +#define CNTKCTL_EL12_op2 0 + /* CNTP_CTL_EL0 - Counter-timer Physical Timer Control register */ #define CNTP_CTL_EL0 MRS_REG(CNTP_CTL_EL0) #define CNTP_CTL_EL0_op0 3 @@ -284,6 +300,38 @@ #define CNTFRQ_EL0_CRm 0 #define CNTFRQ_EL0_op2 0 +/* CNTV_CTL_EL0 - Counter-timer Virtual Timer Control register */ +#define CNTV_CTL_EL0 MRS_REG(CNTV_CTL_EL0) +#define CNTV_CTL_EL0_op0 3 +#define CNTV_CTL_EL0_op1 3 +#define CNTV_CTL_EL0_CRn 14 +#define CNTV_CTL_EL0_CRm 3 +#define CNTV_CTL_EL0_op2 1 + +/* CNTV_CTL_EL02 - Counter-timer Virtual Timer Control register */ +#define CNTV_CTL_EL02 MRS_REG(CNTV_CTL_EL02) +#define CNTV_CTL_EL02_op0 3 +#define CNTV_CTL_EL02_op1 5 +#define CNTV_CTL_EL02_CRn 14 +#define CNTV_CTL_EL02_CRm 3 +#define CNTV_CTL_EL02_op2 1 + +/* CNTV_CVAL_EL0 - Counter-timer Virtual Timer CompareValue register */ +#define CNTV_CVAL_EL0 MRS_REG(CNTV_CVAL_EL0) +#define CNTV_CVAL_EL0_op0 3 +#define CNTV_CVAL_EL0_op1 3 +#define CNTV_CVAL_EL0_CRn 14 +#define CNTV_CVAL_EL0_CRm 3 +#define CNTV_CVAL_EL0_op2 2 + +/* CNTV_CVAL_EL02 - Counter-timer Virtual Timer CompareValue register */ +#define CNTV_CVAL_EL02 MRS_REG(CNTV_CVAL_EL02) +#define CNTV_CVAL_EL02_op0 3 +#define CNTV_CVAL_EL02_op1 5 +#define CNTV_CVAL_EL02_CRn 14 +#define CNTV_CVAL_EL02_CRm 3 +#define CNTV_CVAL_EL02_op2 2 + /* CONTEXTIDR_EL1 - Context ID register */ #define CONTEXTIDR_EL1 MRS_REG(CONTEXTIDR_EL1) #define CONTEXTIDR_EL1_REG MRS_REG_ALT_NAME(CONTEXTIDR_EL1) @@ -346,6 +394,20 @@ * CCTLR_EL1/2 - Capability Control Register * The rest of the fields mirror CCTLR_EL0 */ +#define CCTLR_EL1_REG MRS_REG_ALT_NAME(CCTLR_EL1) +#define CCTLR_EL1_op0 3 +#define CCTLR_EL1_op1 0 +#define CCTLR_EL1_CRn 1 +#define CCTLR_EL1_CRm 2 +#define CCTLR_EL1_op2 2 + +#define CCTLR_EL12_REG MRS_REG_ALT_NAME(CCTLR_EL12) +#define CCTLR_EL12_op0 3 +#define CCTLR_EL12_op1 5 +#define CCTLR_EL12_CRn 1 +#define CCTLR_EL12_CRm 2 +#define CCTLR_EL12_op2 2 + #define CCTLR_EL1_C64E_MASK (0x1 << 5) /* Enable C64 mode upon exception */ #define CCTLR_EL1_TGEN1_MASK (0x1 << 1) /* Page table CLG bit for TTBR1 */ #define CCTLR_EL1_TGEN0_MASK (0x1 << 0) /* Page table CLG bit for TTBR0 */ diff --git a/sys/arm64/include/atomic.h b/sys/arm64/include/atomic.h index ff904e7eb1bb..dbe22f2c904c 100644 --- a/sys/arm64/include/atomic.h +++ b/sys/arm64/include/atomic.h @@ -66,8 +66,9 @@ extern _Bool lse_supported; #include #include -#ifdef _KERNEL - +#if defined(__ARM_FEATURE_ATOMICS) +#define _ATOMIC_LSE_SUPPORTED 1 +#elif defined(_KERNEL) #ifdef LSE_ATOMICS #define _ATOMIC_LSE_SUPPORTED 1 #else diff --git a/sys/arm64/include/hypervisor.h b/sys/arm64/include/hypervisor.h index 79145bf14fe0..cd00f1198708 100644 --- a/sys/arm64/include/hypervisor.h +++ b/sys/arm64/include/hypervisor.h @@ -41,6 +41,10 @@ #define CNTHCTL_EL1PCTEN (1 << 0) /* Allow physical counter access */ #define CNTHCTL_EL1PCEN (1 << 1) /* Allow physical timer access */ /* Valid if HCR_EL2.E2H == 1 */ +#define CNTHCTL_E2H_EL0PCTEN (1 << 0) /* Allow EL0 physical counter access */ +#define CNTHCTL_E2H_EL0VCTEN (1 << 1) /* Allow EL0 virtual counter access */ +#define CNTHCTL_E2H_EL0VTEN (1 << 8) +#define CNTHCTL_E2H_EL0PTEN (1 << 9) #define CNTHCTL_E2H_EL1PCTEN (1 << 10) /* Allow physical counter access */ #define CNTHCTL_E2H_EL1PTEN (1 << 11) /* Allow physical timer access */ /* Unconditionally valid */ @@ -49,19 +53,26 @@ /* CPTR_EL2 - Architecture feature trap register */ /* Valid if HCR_EL2.E2H == 0 */ -#define CPTR_RES0 0x7fefc800 +#define CPTR_TRAP_ALL 0xc01037ff /* Enable all traps */ +#define CPTR_RES0 0x7fefc800 #if __has_feature(capabilities) -#define CPTR_RES1 0x000031ff -#define CPTR_TC 0x00000200 /* Trap Capabilities */ +#define CPTR_RES1 0x000030ff +#define CPTR_TC 0x00000200 /* Trap Capabilities */ #else -#define CPTR_RES1 0x000033ff +#define CPTR_RES1 0x000032ff #endif -#define CPTR_TFP 0x00000400 +#define CPTR_TFP 0x00000400 +#define CPTR_TTA 0x00100000 /* Valid if HCR_EL2.E2H == 1 */ -#define CPTR_FPEN 0x00300000 +#define CPTR_E2H_TRAP_ALL 0xd0000000 +#define CPTR_E2H_ZPEN 0x00030000 +#if __has_feature(capabilities) +#define CPTR_E2H_CEN 0x000c0000 +#endif +#define CPTR_E2H_FPEN 0x00300000 +#define CPTR_E2H_TTA 0x10000000 /* Unconditionally valid */ -#define CPTR_TTA 0x00100000 -#define CPTR_TCPAC 0x80000000 +#define CPTR_TCPAC 0x80000000 /* HCR_EL2 - Hypervisor Config Register */ #define HCR_VM (UL(0x1) << 0) diff --git a/sys/arm64/include/machdep.h b/sys/arm64/include/machdep.h index 9708fcd7aae3..66819f45caf6 100644 --- a/sys/arm64/include/machdep.h +++ b/sys/arm64/include/machdep.h @@ -33,7 +33,6 @@ struct arm64_bootparams { vm_pointer_t modulep; vm_pointer_t kern_stack; vm_paddr_t kern_ttbr0; - uint64_t hcr_el2; int boot_el; /* EL the kernel booted from */ int pad; }; diff --git a/sys/arm64/vmm/io/vgic_v3.c b/sys/arm64/vmm/io/vgic_v3.c index 40664fb1c51d..ba7104bdab6d 100644 --- a/sys/arm64/vmm/io/vgic_v3.c +++ b/sys/arm64/vmm/io/vgic_v3.c @@ -68,6 +68,7 @@ #include #include #include +#include #include "vgic.h" #include "vgic_v3.h" @@ -2223,7 +2224,7 @@ vgic_v3_init(device_t dev) uint64_t ich_vtr_el2; uint32_t pribits, prebits; - ich_vtr_el2 = vmm_call_hyp1(HYP_READ_REGISTER, HYP_REG_ICH_VTR); + ich_vtr_el2 = vmm_read_reg(HYP_REG_ICH_VTR); /* TODO: These fields are common with the vgicv2 driver */ pribits = ICH_VTR_EL2_PRIBITS(ich_vtr_el2); diff --git a/sys/arm64/vmm/io/vtimer.c b/sys/arm64/vmm/io/vtimer.c index 0bff21cb63ec..46b07b34f6c6 100644 --- a/sys/arm64/vmm/io/vtimer.c +++ b/sys/arm64/vmm/io/vtimer.c @@ -129,14 +129,42 @@ vtimer_vminit(struct hyp *hyp) { uint64_t now; + hyp->vtimer.cnthctl_el2 = cnthctl_el2_reg; + /* * Configure the Counter-timer Hypervisor Control Register for the VM. - * - * CNTHCTL_EL1PCEN: trap access to CNTP_{CTL, CVAL, TVAL}_EL0 from EL1 - * CNTHCTL_EL1PCTEN: trap access to CNTPCT_EL0 */ - hyp->vtimer.cnthctl_el2 = cnthctl_el2_reg & ~CNTHCTL_EL1PCEN; - hyp->vtimer.cnthctl_el2 &= ~CNTHCTL_EL1PCTEN; + if (in_vhe()) { + /* + * CNTHCTL_E2H_EL0PCTEN: trap EL0 access to CNTP{CT,CTSS}_EL0 + * CNTHCTL_E2H_EL1VCTEN: don't trap EL0 access to + * CNTV{CT,CTSS}_EL0 + * CNTHCTL_E2H_EL0VTEN: don't trap EL0 access to + * CNTV_{CTL,CVAL,TVAL}_EL0 + * CNTHCTL_E2H_EL0PTEN: trap EL0 access to + * CNTP_{CTL,CVAL,TVAL}_EL0 + * CNTHCTL_E2H_EL1PCEN: trap EL1 access to + CNTP_{CTL,CVAL,TVAL}_EL0 + * CNTHCTL_E2H_EL1PCTEN: trap access to CNTPCT_EL0 + * + * TODO: Don't trap when FEAT_ECV is present + */ + hyp->vtimer.cnthctl_el2 &= ~CNTHCTL_E2H_EL0PCTEN; + hyp->vtimer.cnthctl_el2 |= CNTHCTL_E2H_EL0VCTEN; + hyp->vtimer.cnthctl_el2 |= CNTHCTL_E2H_EL0VTEN; + hyp->vtimer.cnthctl_el2 &= ~CNTHCTL_E2H_EL0PTEN; + + hyp->vtimer.cnthctl_el2 &= ~CNTHCTL_E2H_EL1PTEN; + hyp->vtimer.cnthctl_el2 &= ~CNTHCTL_E2H_EL1PCTEN; + } else { + /* + * CNTHCTL_EL1PCEN: trap access to CNTP_{CTL, CVAL, TVAL}_EL0 + * from EL1 + * CNTHCTL_EL1PCTEN: trap access to CNTPCT_EL0 + */ + hyp->vtimer.cnthctl_el2 &= ~CNTHCTL_EL1PCEN; + hyp->vtimer.cnthctl_el2 &= ~CNTHCTL_EL1PCTEN; + } now = READ_SPECIALREG(cntpct_el0); hyp->vtimer.cntvoff_el2 = now; diff --git a/sys/arm64/vmm/vmm_arm64.c b/sys/arm64/vmm/vmm_arm64.c index a66d5ed1ba97..60fdeb5c6d6d 100644 --- a/sys/arm64/vmm/vmm_arm64.c +++ b/sys/arm64/vmm/vmm_arm64.c @@ -67,6 +67,7 @@ #include "io/vgic.h" #include "io/vgic_v3.h" #include "io/vtimer.h" +#include "vmm_handlers.h" #include "vmm_stat.h" #define HANDLED 1 @@ -103,9 +104,6 @@ static vm_pointer_t stack_hyp_va[MAXCPU]; static vmem_t *el2_mem_alloc; static void arm_setup_vectors(void *arg); -static void vmm_pmap_clean_stage2_tlbi(void); -static void vmm_pmap_invalidate_range(uint64_t, vm_offset_t, vm_offset_t, bool); -static void vmm_pmap_invalidate_all(uint64_t); DPCPU_DEFINE_STATIC(struct hypctx *, vcpu); @@ -133,33 +131,6 @@ arm_setup_vectors(void *arg) el2_regs = arg; arm64_set_active_vcpu(NULL); - daif = intr_disable(); - - /* - * Install the temporary vectors which will be responsible for - * initializing the VMM when we next trap into EL2. - * - * x0: the exception vector table responsible for hypervisor - * initialization on the next call. - */ -#if __has_feature(capabilities) -#ifdef __CHERI_PURE_CAPABILITY__ - codep = (uintcap_t)cheri_setaddress(kernel_root_cap, - vtophys(&vmm_hyp_code)); - codep = cheri_andperm(codep, cheri_getperm(cheri_getpcc())); -#else - codep = (uintcap_t)cheri_setaddress(cheri_getpcc(), - vtophys(&vmm_hyp_code)); -#endif - codep = (uintcap_t)cheri_setbounds(codep, hyp_code_len); -#else - codep = vtophys(&vmm_hyp_code); -#endif - vmm_call_hyp_init(codep); - - /* Create and map the hypervisor stack */ - stack_top = stack_hyp_va[PCPU_GET(cpuid)] + VMM_STACK_SIZE; - /* * Configure the system control register for EL2: * @@ -177,9 +148,40 @@ arm_setup_vectors(void *arg) sctlr_el2 |= SCTLR_EL2_WXN; sctlr_el2 &= ~SCTLR_EL2_EE; - /* Special call to initialize EL2 */ - vmm_call_hyp4(vmmpmap_to_ttbr0(), stack_top, el2_regs->tcr_el2, - sctlr_el2, el2_regs->vtcr_el2); + daif = intr_disable(); + + if (in_vhe()) { + WRITE_SPECIALREG(vtcr_el2, el2_regs->vtcr_el2); + } else { + /* + * Install the temporary vectors which will be responsible for + * initializing the VMM when we next trap into EL2. + * + * x0: the exception vector table responsible for hypervisor + * initialization on the next call. + */ +#if __has_feature(capabilities) +#ifdef __CHERI_PURE_CAPABILITY__ + codep = (uintcap_t)cheri_setaddress(kernel_root_cap, + vtophys(&vmm_hyp_code)); + codep = cheri_andperm(codep, cheri_getperm(cheri_getpcc())); +#else + codep = (uintcap_t)cheri_setaddress(cheri_getpcc(), + vtophys(&vmm_hyp_code)); +#endif + codep = (uintcap_t)cheri_setbounds(codep, hyp_code_len); +#else + codep = vtophys(&vmm_hyp_code); +#endif + vmm_call_hyp_init(codep); + + /* Create and map the hypervisor stack */ + stack_top = stack_hyp_va[PCPU_GET(cpuid)] + VMM_STACK_SIZE; + + /* Special call to initialize EL2 */ + vmm_call_hyp4(vmmpmap_to_ttbr0(), stack_top, el2_regs->tcr_el2, + sctlr_el2, el2_regs->vtcr_el2); + } intr_restore(daif); } @@ -293,7 +295,6 @@ vmmops_modinit(int ipinum) vm_paddr_t vmm_base; uint64_t id_aa64mmfr0_el1, pa_range_bits, pa_range_field; uint64_t cnthctl_el2; - register_t daif; int cpu, i; bool rv __diagused; @@ -336,82 +337,86 @@ vmmops_modinit(int ipinum) } pa_range_bits = pa_range_field >> ID_AA64MMFR0_PARange_SHIFT; - /* Initialise the EL2 MMU */ - if (!vmmpmap_init()) { - printf("vmm: Failed to init the EL2 MMU\n"); - return (ENOMEM); + if (!in_vhe()) { + /* Initialise the EL2 MMU */ + if (!vmmpmap_init()) { + printf("vmm: Failed to init the EL2 MMU\n"); + return (ENOMEM); + } } /* Set up the stage 2 pmap callbacks */ MPASS(pmap_clean_stage2_tlbi == NULL); - pmap_clean_stage2_tlbi = vmm_pmap_clean_stage2_tlbi; - pmap_stage2_invalidate_range = vmm_pmap_invalidate_range; - pmap_stage2_invalidate_all = vmm_pmap_invalidate_all; + pmap_clean_stage2_tlbi = vmm_clean_s2_tlbi; + pmap_stage2_invalidate_range = vmm_s2_tlbi_range; + pmap_stage2_invalidate_all = vmm_s2_tlbi_all; - /* - * Create an allocator for the virtual address space used by EL2. - * EL2 code is identity-mapped; the allocator is used to find space for - * VM structures. - */ - el2_mem_alloc = vmem_create("VMM EL2", 0, 0, PAGE_SIZE, 0, M_WAITOK, - VMEM_CAPABILITY_ARENA); + if (!in_vhe()) { + /* + * Create an allocator for the virtual address space used by + * EL2. EL2 code is identity-mapped; the allocator is used to + * find space for VM structures. + */ + el2_mem_alloc = vmem_create("VMM EL2", 0, 0, PAGE_SIZE, 0, + M_WAITOK, VMEM_CAPABILITY_ARENA); - /* Create the mappings for the hypervisor translation table. */ - hyp_code_len = round_page(&vmm_hyp_code_end - &vmm_hyp_code); + /* Create the mappings for the hypervisor translation table. */ + hyp_code_len = round_page(&vmm_hyp_code_end - &vmm_hyp_code); - /* We need an physical identity mapping for when we activate the MMU */ - hyp_code_base = vmm_base = vtophys(&vmm_hyp_code); - rv = vmmpmap_enter(vmm_base, hyp_code_len, vmm_base, - VM_PROT_READ | VM_PROT_READ_CAP | VM_PROT_EXECUTE); - MPASS(rv); + /* We need an physical identity mapping for when we activate the MMU */ + hyp_code_base = vmm_base = vtophys(&vmm_hyp_code); + rv = vmmpmap_enter(vmm_base, hyp_code_len, vmm_base, + VM_PROT_READ | VM_PROT_READ_CAP | VM_PROT_EXECUTE); + MPASS(rv); - next_hyp_va = roundup2(vmm_base + hyp_code_len, L2_SIZE); + next_hyp_va = roundup2(vmm_base + hyp_code_len, L2_SIZE); - /* Create a per-CPU hypervisor stack */ - CPU_FOREACH(cpu) { - vm_pointer_t stack_base; + /* Create a per-CPU hypervisor stack */ + CPU_FOREACH(cpu) { + vm_pointer_t stack_base; - stack[cpu] = malloc(VMM_STACK_SIZE, M_HYP, M_WAITOK | M_ZERO); + stack[cpu] = malloc(VMM_STACK_SIZE, M_HYP, M_WAITOK | M_ZERO); #ifdef __CHERI_PURE_CAPABILITY__ - stack_base = (vm_pointer_t)cheri_setaddress(vmm_el2_root_cap, - next_hyp_va); - stack_base = (vm_pointer_t)cheri_setboundsexact(stack_base, - VMM_STACK_SIZE); + stack_base = + (vm_pointer_t)cheri_setaddress(vmm_el2_root_cap, + next_hyp_va); + stack_base = cheri_setboundsexact(stack_base, + VMM_STACK_SIZE); #else - stack_base = next_hyp_va; + stack_base = next_hyp_va; #endif - - stack_hyp_va[cpu] = stack_base; - for (i = 0; i < VMM_STACK_PAGES; i++) { - rv = vmmpmap_enter(stack_hyp_va[cpu] + ptoa(i), - PAGE_SIZE, vtophys(stack[cpu] + ptoa(i)), - VM_PROT_READ | VM_PROT_READ_CAP | VM_PROT_WRITE | - VM_PROT_WRITE_CAP); - MPASS(rv); + stack_hyp_va[cpu] = stack_base; + for (i = 0; i < VMM_STACK_PAGES; i++) { + rv = vmmpmap_enter(stack_hyp_va[cpu] + ptoa(i), + PAGE_SIZE, vtophys(stack[cpu] + ptoa(i)), + VM_PROT_READ | VM_PROT_READ_CAP | + VM_PROT_WRITE | VM_PROT_WRITE_CAP); + MPASS(rv); + } + next_hyp_va += L2_SIZE; } - next_hyp_va += L2_SIZE; - } - el2_regs.tcr_el2 = TCR_EL2_RES1; - el2_regs.tcr_el2 |= min(pa_range_bits << TCR_EL2_PS_SHIFT, - TCR_EL2_PS_52BITS); - el2_regs.tcr_el2 |= TCR_EL2_T0SZ(64 - EL2_VIRT_BITS); - el2_regs.tcr_el2 |= TCR_EL2_IRGN0_WBWA | TCR_EL2_ORGN0_WBWA; + el2_regs.tcr_el2 = TCR_EL2_RES1; + el2_regs.tcr_el2 |= min(pa_range_bits << TCR_EL2_PS_SHIFT, + TCR_EL2_PS_52BITS); + el2_regs.tcr_el2 |= TCR_EL2_T0SZ(64 - EL2_VIRT_BITS); + el2_regs.tcr_el2 |= TCR_EL2_IRGN0_WBWA | TCR_EL2_ORGN0_WBWA; #if PAGE_SIZE == PAGE_SIZE_4K - el2_regs.tcr_el2 |= TCR_EL2_TG0_4K; + el2_regs.tcr_el2 |= TCR_EL2_TG0_4K; #elif PAGE_SIZE == PAGE_SIZE_16K - el2_regs.tcr_el2 |= TCR_EL2_TG0_16K; + el2_regs.tcr_el2 |= TCR_EL2_TG0_16K; #else #error Unsupported page size #endif #ifdef SMP - el2_regs.tcr_el2 |= TCR_EL2_SH0_IS; + el2_regs.tcr_el2 |= TCR_EL2_SH0_IS; #endif #if __has_feature(capabilities) - el2_regs.tcr_el2 |= TCR_EL2_HWU | TCR_EL2_HPD; + el2_regs.tcr_el2 |= TCR_EL2_HWU | TCR_EL2_HPD; #endif + } - switch (el2_regs.tcr_el2 & TCR_EL2_PS_MASK) { + switch (pa_range_bits << TCR_EL2_PS_SHIFT) { case TCR_EL2_PS_32BITS: vmm_max_ipa_bits = 32; break; @@ -473,37 +478,37 @@ vmmops_modinit(int ipinum) smp_rendezvous(NULL, arm_setup_vectors, NULL, &el2_regs); - /* Add memory to the vmem allocator (checking there is space) */ - if (vmm_base > (L2_SIZE + PAGE_SIZE)) { - /* - * Ensure there is an L2 block before the vmm code to check - * for buffer overflows on earlier data. Include the PAGE_SIZE - * of the minimum we can allocate. - */ - vmm_base -= L2_SIZE + PAGE_SIZE; - vmm_base = rounddown2(vmm_base, L2_SIZE); + if (!in_vhe()) { + /* Add memory to the vmem allocator (checking there is space) */ + if (vmm_base > (L2_SIZE + PAGE_SIZE)) { + /* + * Ensure there is an L2 block before the vmm code to check + * for buffer overflows on earlier data. Include the PAGE_SIZE + * of the minimum we can allocate. + */ + vmm_base -= L2_SIZE + PAGE_SIZE; + vmm_base = rounddown2(vmm_base, L2_SIZE); + + /* + * Check there is memory before the vmm code to add. + * + * Reserve the L2 block at address 0 so NULL dereference will + * raise an exception. + */ + if (vmm_base > L2_SIZE) + el2_vmem_add(L2_SIZE, vmm_base - L2_SIZE); + } /* - * Check there is memory before the vmm code to add. - * - * Reserve the L2 block at address 0 so NULL dereference will - * raise an exception. + * Add the memory after the stacks. There is most of an L2 block + * between the last stack and the first allocation so this should + * be safe without adding more padding. */ - if (vmm_base > L2_SIZE) - el2_vmem_add(L2_SIZE, vmm_base - L2_SIZE); + if (next_hyp_va < HYP_VM_MAX_ADDRESS - PAGE_SIZE) + el2_vmem_add(next_hyp_va, + HYP_VM_MAX_ADDRESS - next_hyp_va); } - - /* - * Add the memory after the stacks. There is most of an L2 block - * between the last stack and the first allocation so this should - * be safe without adding more padding. - */ - if (next_hyp_va < HYP_VM_MAX_ADDRESS - PAGE_SIZE) - el2_vmem_add(next_hyp_va, HYP_VM_MAX_ADDRESS - next_hyp_va); - - daif = intr_disable(); - cnthctl_el2 = vmm_call_hyp1(HYP_READ_REGISTER, HYP_REG_CNTHCTL); - intr_restore(daif); + cnthctl_el2 = vmm_read_reg(HYP_REG_CNTHCTL); vgic_init(); vtimer_init(cnthctl_el2); @@ -516,21 +521,25 @@ vmmops_modcleanup(void) { int cpu; - smp_rendezvous(NULL, arm_teardown_vectors, NULL, NULL); + if (!in_vhe()) { + smp_rendezvous(NULL, arm_teardown_vectors, NULL, NULL); - CPU_FOREACH(cpu) { - vmmpmap_remove(stack_hyp_va[cpu], VMM_STACK_PAGES * PAGE_SIZE, - false); - } + CPU_FOREACH(cpu) { + vmmpmap_remove(stack_hyp_va[cpu], + VMM_STACK_PAGES * PAGE_SIZE, false); + } - vmmpmap_remove(hyp_code_base, hyp_code_len, false); + vmmpmap_remove(hyp_code_base, hyp_code_len, false); + } vtimer_cleanup(); - vmmpmap_fini(); + if (!in_vhe()) { + vmmpmap_fini(); - CPU_FOREACH(cpu) - free(stack[cpu], M_HYP); + CPU_FOREACH(cpu) + free(stack[cpu], M_HYP); + } pmap_clean_stage2_tlbi = NULL; pmap_stage2_invalidate_range = NULL; @@ -582,9 +591,10 @@ vmmops_init(struct vm *vm, pmap_t pmap) vtimer_vminit(hyp); vgic_vminit(hyp); - hyp->el2_addr = el2_map_enter((vm_offset_t)hyp, size, - VM_PROT_READ | VM_PROT_WRITE | - VM_PROT_READ_CAP | VM_PROT_WRITE_CAP); + if (!in_vhe()) + hyp->el2_addr = el2_map_enter((vm_offset_t)hyp, size, + VM_PROT_READ | VM_PROT_WRITE | + VM_PROT_READ_CAP | VM_PROT_WRITE_CAP); return (hyp); } @@ -612,9 +622,10 @@ vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid) vtimer_cpuinit(hypctx); vgic_cpuinit(hypctx); - hypctx->el2_addr = el2_map_enter((vm_offset_t)hypctx, size, - VM_PROT_READ | VM_PROT_WRITE | - VM_PROT_READ_CAP | VM_PROT_WRITE_CAP); + if (!in_vhe()) + hypctx->el2_addr = el2_map_enter((vm_offset_t)hypctx, size, + VM_PROT_READ | VM_PROT_WRITE | + VM_PROT_READ_CAP | VM_PROT_WRITE_CAP); return (hypctx); } @@ -651,26 +662,6 @@ vmmops_vmspace_free(struct vmspace *vmspace) vmspace_free(vmspace); } -static void -vmm_pmap_clean_stage2_tlbi(void) -{ - vmm_call_hyp0(HYP_CLEAN_S2_TLBI); -} - -static void -vmm_pmap_invalidate_range(uint64_t vttbr, vm_offset_t sva, vm_offset_t eva, - bool final_only) -{ - MPASS(eva > sva); - vmm_call_hyp4(HYP_S2_TLBI_RANGE, vttbr, sva, eva, final_only); -} - -static void -vmm_pmap_invalidate_all(uint64_t vttbr) -{ - vmm_call_hyp1(HYP_S2_TLBI_ALL, vttbr); -} - static inline void arm64_print_hyp_regs(struct vm_exit *vme) { @@ -1228,16 +1219,14 @@ vmmops_run(void *vcpui, uintcap_t pc, pmap_t pmap, struct vm_eventinfo *evinfo) arm64_set_active_vcpu(hypctx); vgic_flush_hwstate(hypctx); - excp_type = vmm_call_hyp2(HYP_ENTER_GUEST, - hyp->el2_addr, hypctx->el2_addr); + /* Call into EL2 to switch to the guest */ + excp_type = vmm_enter_guest(hyp, hypctx); vgic_sync_hwstate(hypctx); vtimer_sync_hwstate(hypctx); /* - * Deactivate the stage2 pmap. vmm_pmap_clean_stage2_tlbi - * depends on this meaning we activate the VM before entering - * the vm again + * Deactivate the stage2 pmap. */ PCPU_SET(curvmpmap, NULL); intr_restore(daif); @@ -1290,7 +1279,8 @@ vmmops_vcpu_cleanup(void *vcpui) vtimer_cpucleanup(hypctx); vgic_cpucleanup(hypctx); - vmmpmap_remove(hypctx->el2_addr, el2_hypctx_size(), true); + if (!in_vhe()) + vmmpmap_remove(hypctx->el2_addr, el2_hypctx_size(), true); free(hypctx, M_HYP); } @@ -1305,7 +1295,8 @@ vmmops_cleanup(void *vmi) smp_rendezvous(NULL, arm_pcpu_vmcleanup, NULL, hyp); - vmmpmap_remove(hyp->el2_addr, el2_hyp_size(hyp->vm), true); + if (!in_vhe()) + vmmpmap_remove(hyp->el2_addr, el2_hyp_size(hyp->vm), true); free(hyp, M_HYP); } diff --git a/sys/arm64/vmm/vmm_handlers.c b/sys/arm64/vmm/vmm_handlers.c new file mode 100644 index 000000000000..848b4afedc05 --- /dev/null +++ b/sys/arm64/vmm/vmm_handlers.c @@ -0,0 +1,114 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Arm Ltd + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include + +#include + +#include "arm64.h" +#include "vmm_handlers.h" + +/* Read an EL2 register */ +static uint64_t +vmm_nvhe_read_reg(uint64_t reg) +{ + return (vmm_call_hyp1(HYP_READ_REGISTER, reg)); +} + +DEFINE_IFUNC(, uint64_t, vmm_read_reg, (uint64_t reg)) +{ + if (in_vhe()) + return (vmm_vhe_read_reg); + return (vmm_nvhe_read_reg); +} + +/* Enter the guest */ +static uint64_t +vmm_nvhe_enter_guest(struct hyp *hyp, struct hypctx *hypctx) +{ + return (vmm_call_hyp2(HYP_ENTER_GUEST, hyp->el2_addr, + hypctx->el2_addr)); +} + +DEFINE_IFUNC(, uint64_t, vmm_enter_guest, + (struct hyp *hyp, struct hypctx *hypctx)) +{ + if (in_vhe()) + return (vmm_vhe_enter_guest); + return (vmm_nvhe_enter_guest); +} + +/* Clean the TLB for all guests */ +static void +vmm_nvhe_clean_s2_tlbi(void) +{ + vmm_call_hyp0(HYP_CLEAN_S2_TLBI); +} + +DEFINE_IFUNC(, void, vmm_clean_s2_tlbi, (void)) +{ + if (in_vhe()) + return (vmm_vhe_clean_s2_tlbi); + return (vmm_nvhe_clean_s2_tlbi); +} + +/* + * Switch to a guest vttbr and clean the TLB for a range of guest + * virtual address space. + */ +static void +vmm_nvhe_s2_tlbi_range(uint64_t vttbr, vm_offset_t sva, vm_offset_t eva, + bool final_only) +{ + vmm_call_hyp4(HYP_S2_TLBI_RANGE, vttbr, sva, eva, final_only); +} + +DEFINE_IFUNC(, void, vmm_s2_tlbi_range, + (uint64_t vttbr, vm_offset_t sva, vm_offset_t eva, bool final_only)) +{ + if (in_vhe()) + return (vmm_vhe_s2_tlbi_range); + return (vmm_nvhe_s2_tlbi_range); +} + +/* + * Switch to a guest vttbr and clean the TLB for all the guest + * virtual address space. + */ +static void +vmm_nvhe_s2_tlbi_all(uint64_t vttbr) +{ + vmm_call_hyp1(HYP_S2_TLBI_ALL, vttbr); +} + +DEFINE_IFUNC(, void, vmm_s2_tlbi_all, (uint64_t vttbr)) +{ + if (in_vhe()) + return (vmm_vhe_s2_tlbi_all); + return (vmm_nvhe_s2_tlbi_all); +} diff --git a/sys/arm64/vmm/vmm_handlers.h b/sys/arm64/vmm/vmm_handlers.h new file mode 100644 index 000000000000..f651fce6f32d --- /dev/null +++ b/sys/arm64/vmm/vmm_handlers.h @@ -0,0 +1,48 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Arm Ltd + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_VMM_HANDLERS_H_ +#define _VMM_VMM_HANDLERS_H_ + +#include + +struct hyp; +struct hypctx; + +void vmm_clean_s2_tlbi(void); +uint64_t vmm_enter_guest(struct hyp *, struct hypctx *); +uint64_t vmm_read_reg(uint64_t); +void vmm_s2_tlbi_range(uint64_t, vm_offset_t, vm_offset_t, bool); +void vmm_s2_tlbi_all(uint64_t); + +void vmm_vhe_clean_s2_tlbi(void); +uint64_t vmm_vhe_enter_guest(struct hyp *, struct hypctx *); +uint64_t vmm_vhe_read_reg(uint64_t); +void vmm_vhe_s2_tlbi_range(uint64_t, vm_offset_t, vm_offset_t, bool); +void vmm_vhe_s2_tlbi_all(uint64_t); + +#endif /* _VMM_VMM_HANDLERS_H_ */ diff --git a/sys/arm64/vmm/vmm_hyp.c b/sys/arm64/vmm/vmm_hyp.c index 2ddfba73b45b..f9c7cb0bbd7f 100644 --- a/sys/arm64/vmm/vmm_hyp.c +++ b/sys/arm64/vmm/vmm_hyp.c @@ -40,9 +40,7 @@ struct hypctx; -uint64_t vmm_hyp_enter(uint64_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, - uintptr_t, uintptr_t, uintptr_t); -uint64_t vmm_enter_guest(struct hypctx *); +uint64_t VMM_HYP_FUNC(do_call_guest)(struct hypctx *); static void vmm_hyp_reg_store(struct hypctx *hypctx, struct hyp *hyp, bool guest) @@ -52,11 +50,12 @@ vmm_hyp_reg_store(struct hypctx *hypctx, struct hyp *hyp, bool guest) /* Store the guest VFP registers */ if (guest) { /* Store the timer registers */ - hypctx->vtimer_cpu.cntkctl_el1 = READ_SPECIALREG(cntkctl_el1); + hypctx->vtimer_cpu.cntkctl_el1 = + READ_SPECIALREG(EL1_REG(CNTKCTL)); hypctx->vtimer_cpu.virt_timer.cntx_cval_el0 = - READ_SPECIALREG(cntv_cval_el0); + READ_SPECIALREG(EL0_REG(CNTV_CVAL)); hypctx->vtimer_cpu.virt_timer.cntx_ctl_el0 = - READ_SPECIALREG(cntv_ctl_el0); + READ_SPECIALREG(EL0_REG(CNTV_CTL)); /* Store the GICv3 registers */ hypctx->vgic_v3_regs.ich_eisr_el2 = @@ -228,53 +227,60 @@ vmm_hyp_reg_store(struct hypctx *hypctx, struct hyp *hyp, bool guest) hypctx->tf.tf_spsr = READ_SPECIALREG(spsr_el2); if (guest) { hypctx->tf.tf_esr = READ_SPECIALREG(esr_el2); + hypctx->par_el1 = READ_SPECIALREG(par_el1); } /* Store the guest special registers */ #if __has_feature(capabilities) hypctx->cctlr_el0 = READ_SPECIALREG(cctlr_el0); - hypctx->cctlr_el1 = READ_SPECIALREG(cctlr_el1); hypctx->cid_el0 = READ_SPECIALREG_CAP(cid_el0); hypctx->ddc_el0 = READ_SPECIALREG_CAP(ddc_el0); hypctx->rcsp_el0 = READ_SPECIALREG_CAP(rcsp_el0); hypctx->rddc_el0 = READ_SPECIALREG_CAP(rddc_el0); hypctx->rctpidr_el0 = READ_SPECIALREG_CAP(rctpidr_el0); - hypctx->elr_el1 = READ_SPECIALREG_CAP(celr_el1); hypctx->sp_el0 = READ_SPECIALREG_CAP(csp_el0); hypctx->tpidr_el0 = READ_SPECIALREG_CAP(ctpidr_el0); hypctx->tpidrro_el0 = READ_SPECIALREG_CAP(ctpidrro_el0); hypctx->tpidr_el1 = READ_SPECIALREG_CAP(ctpidr_el1); - hypctx->vbar_el1 = READ_SPECIALREG_CAP(cvbar_el1); #else - hypctx->elr_el1 = READ_SPECIALREG(elr_el1); hypctx->sp_el0 = READ_SPECIALREG(sp_el0); hypctx->tpidr_el0 = READ_SPECIALREG(tpidr_el0); hypctx->tpidrro_el0 = READ_SPECIALREG(tpidrro_el0); hypctx->tpidr_el1 = READ_SPECIALREG(tpidr_el1); - hypctx->vbar_el1 = READ_SPECIALREG(vbar_el1); #endif hypctx->actlr_el1 = READ_SPECIALREG(actlr_el1); - hypctx->afsr0_el1 = READ_SPECIALREG(afsr0_el1); - hypctx->afsr1_el1 = READ_SPECIALREG(afsr1_el1); - hypctx->amair_el1 = READ_SPECIALREG(amair_el1); - hypctx->contextidr_el1 = READ_SPECIALREG(contextidr_el1); - hypctx->cpacr_el1 = READ_SPECIALREG(cpacr_el1); hypctx->csselr_el1 = READ_SPECIALREG(csselr_el1); - hypctx->esr_el1 = READ_SPECIALREG(esr_el1); - hypctx->far_el1 = READ_SPECIALREG(far_el1); - hypctx->mair_el1 = READ_SPECIALREG(mair_el1); hypctx->mdccint_el1 = READ_SPECIALREG(mdccint_el1); hypctx->mdscr_el1 = READ_SPECIALREG(mdscr_el1); - hypctx->par_el1 = READ_SPECIALREG(par_el1); - hypctx->sctlr_el1 = READ_SPECIALREG(sctlr_el1); - hypctx->spsr_el1 = READ_SPECIALREG(spsr_el1); - hypctx->tcr_el1 = READ_SPECIALREG(tcr_el1); - /* TODO: Support when this is not res0 */ - hypctx->tcr2_el1 = 0; - hypctx->ttbr0_el1 = READ_SPECIALREG(ttbr0_el1); - hypctx->ttbr1_el1 = READ_SPECIALREG(ttbr1_el1); + + if (guest_or_nonvhe(guest)) { +#if __has_feature(capabilities) + hypctx->elr_el1 = READ_SPECIALREG_CAP(EL1_REG(ELR)); + hypctx->cctlr_el1 = READ_SPECIALREG(EL1_REG(CCTLR)); + hypctx->vbar_el1 = READ_SPECIALREG_CAP(EL1_REG(VBAR)); +#else + hypctx->elr_el1 = READ_SPECIALREG(EL1_REG(ELR)); + hypctx->vbar_el1 = READ_SPECIALREG(EL1_REG(VBAR)); +#endif + + hypctx->afsr0_el1 = READ_SPECIALREG(EL1_REG(AFSR0)); + hypctx->afsr1_el1 = READ_SPECIALREG(EL1_REG(AFSR1)); + hypctx->amair_el1 = READ_SPECIALREG(EL1_REG(AMAIR)); + hypctx->contextidr_el1 = READ_SPECIALREG(EL1_REG(CONTEXTIDR)); + hypctx->cpacr_el1 = READ_SPECIALREG(EL1_REG(CPACR)); + hypctx->esr_el1 = READ_SPECIALREG(EL1_REG(ESR)); + hypctx->far_el1 = READ_SPECIALREG(EL1_REG(FAR)); + hypctx->mair_el1 = READ_SPECIALREG(EL1_REG(MAIR)); + hypctx->sctlr_el1 = READ_SPECIALREG(EL1_REG(SCTLR)); + hypctx->spsr_el1 = READ_SPECIALREG(EL1_REG(SPSR)); + hypctx->tcr_el1 = READ_SPECIALREG(EL1_REG(TCR)); + /* TODO: Support when this is not res0 */ + hypctx->tcr2_el1 = 0; + hypctx->ttbr0_el1 = READ_SPECIALREG(EL1_REG(TTBR0)); + hypctx->ttbr1_el1 = READ_SPECIALREG(EL1_REG(TTBR1)); + } hypctx->cptr_el2 = READ_SPECIALREG(cptr_el2); hypctx->hcr_el2 = READ_SPECIALREG(hcr_el2); @@ -288,52 +294,65 @@ vmm_hyp_reg_restore(struct hypctx *hypctx, struct hyp *hyp, bool guest) uint64_t dfr0; /* Restore the special registers */ + WRITE_SPECIALREG(hcr_el2, hypctx->hcr_el2); + isb(); + #if __has_feature(capabilities) WRITE_SPECIALREG(cctlr_el0, hypctx->cctlr_el0); - WRITE_SPECIALREG(cctlr_el1, hypctx->cctlr_el1); WRITE_SPECIALREG_CAP(cid_el0, hypctx->cid_el0); WRITE_SPECIALREG_CAP(ddc_el0, hypctx->ddc_el0); WRITE_SPECIALREG_CAP(rcsp_el0, hypctx->rcsp_el0); WRITE_SPECIALREG_CAP(rddc_el0, hypctx->rddc_el0); WRITE_SPECIALREG_CAP(rctpidr_el0, hypctx->rctpidr_el0); - WRITE_SPECIALREG_CAP(celr_el1, hypctx->elr_el1); WRITE_SPECIALREG_CAP(csp_el0, hypctx->sp_el0); WRITE_SPECIALREG_CAP(ctpidr_el0, hypctx->tpidr_el0); WRITE_SPECIALREG_CAP(ctpidrro_el0, hypctx->tpidrro_el0); WRITE_SPECIALREG_CAP(ctpidr_el1, hypctx->tpidr_el1); - WRITE_SPECIALREG_CAP(cvbar_el1, hypctx->vbar_el1); #else - WRITE_SPECIALREG(elr_el1, hypctx->elr_el1); WRITE_SPECIALREG(sp_el0, hypctx->sp_el0); WRITE_SPECIALREG(tpidr_el0, hypctx->tpidr_el0); WRITE_SPECIALREG(tpidrro_el0, hypctx->tpidrro_el0); WRITE_SPECIALREG(tpidr_el1, hypctx->tpidr_el1); - WRITE_SPECIALREG(vbar_el1, hypctx->vbar_el1); #endif WRITE_SPECIALREG(actlr_el1, hypctx->actlr_el1); - WRITE_SPECIALREG(afsr0_el1, hypctx->afsr0_el1); - WRITE_SPECIALREG(afsr1_el1, hypctx->afsr1_el1); - WRITE_SPECIALREG(amair_el1, hypctx->amair_el1); - WRITE_SPECIALREG(contextidr_el1, hypctx->contextidr_el1); - WRITE_SPECIALREG(cpacr_el1, hypctx->cpacr_el1); WRITE_SPECIALREG(csselr_el1, hypctx->csselr_el1); - WRITE_SPECIALREG(esr_el1, hypctx->esr_el1); - WRITE_SPECIALREG(far_el1, hypctx->far_el1); WRITE_SPECIALREG(mdccint_el1, hypctx->mdccint_el1); WRITE_SPECIALREG(mdscr_el1, hypctx->mdscr_el1); - WRITE_SPECIALREG(mair_el1, hypctx->mair_el1); - WRITE_SPECIALREG(par_el1, hypctx->par_el1); - WRITE_SPECIALREG(sctlr_el1, hypctx->sctlr_el1); - WRITE_SPECIALREG(tcr_el1, hypctx->tcr_el1); - /* TODO: tcr2_el1 */ - WRITE_SPECIALREG(ttbr0_el1, hypctx->ttbr0_el1); - WRITE_SPECIALREG(ttbr1_el1, hypctx->ttbr1_el1); - WRITE_SPECIALREG(spsr_el1, hypctx->spsr_el1); + + if (guest_or_nonvhe(guest)) { +#if __has_feature(capabilities) + WRITE_SPECIALREG_CAP(EL1_REG(ELR), hypctx->elr_el1); + WRITE_SPECIALREG(EL1_REG(CCTLR), hypctx->cctlr_el1); + WRITE_SPECIALREG_CAP(EL1_REG(VBAR), hypctx->vbar_el1); +#else + WRITE_SPECIALREG(EL1_REG(ELR), hypctx->elr_el1); + WRITE_SPECIALREG(EL1_REG(VBAR), hypctx->vbar_el1); +#endif + + WRITE_SPECIALREG(EL1_REG(AFSR0), hypctx->afsr0_el1); + WRITE_SPECIALREG(EL1_REG(AFSR1), hypctx->afsr1_el1); + WRITE_SPECIALREG(EL1_REG(AMAIR), hypctx->amair_el1); + WRITE_SPECIALREG(EL1_REG(CONTEXTIDR), hypctx->contextidr_el1); + WRITE_SPECIALREG(EL1_REG(CPACR), hypctx->cpacr_el1); + WRITE_SPECIALREG(EL1_REG(ESR), hypctx->esr_el1); + WRITE_SPECIALREG(EL1_REG(FAR), hypctx->far_el1); + WRITE_SPECIALREG(EL1_REG(MAIR), hypctx->mair_el1); // + + WRITE_SPECIALREG(EL1_REG(SCTLR), hypctx->sctlr_el1); + WRITE_SPECIALREG(EL1_REG(SPSR), hypctx->spsr_el1); + WRITE_SPECIALREG(EL1_REG(TCR), hypctx->tcr_el1); + /* TODO: tcr2_el1 */ + WRITE_SPECIALREG(EL1_REG(TTBR0), hypctx->ttbr0_el1); + WRITE_SPECIALREG(EL1_REG(TTBR1), hypctx->ttbr1_el1); + } + + if (guest) { + WRITE_SPECIALREG(par_el1, hypctx->par_el1); + } WRITE_SPECIALREG(cptr_el2, hypctx->cptr_el2); - WRITE_SPECIALREG(hcr_el2, hypctx->hcr_el2); WRITE_SPECIALREG(vpidr_el2, hypctx->vpidr_el2); WRITE_SPECIALREG(vmpidr_el2, hypctx->vmpidr_el2); @@ -460,10 +479,11 @@ vmm_hyp_reg_restore(struct hypctx *hypctx, struct hyp *hyp, bool guest) if (guest) { /* Load the timer registers */ - WRITE_SPECIALREG(cntkctl_el1, hypctx->vtimer_cpu.cntkctl_el1); - WRITE_SPECIALREG(cntv_cval_el0, + WRITE_SPECIALREG(EL1_REG(CNTKCTL), + hypctx->vtimer_cpu.cntkctl_el1); + WRITE_SPECIALREG(EL0_REG(CNTV_CVAL), hypctx->vtimer_cpu.virt_timer.cntx_cval_el0); - WRITE_SPECIALREG(cntv_ctl_el0, + WRITE_SPECIALREG(EL0_REG(CNTV_CTL), hypctx->vtimer_cpu.virt_timer.cntx_ctl_el0); WRITE_SPECIALREG(cnthctl_el2, hyp->vtimer.cnthctl_el2); WRITE_SPECIALREG(cntvoff_el2, hyp->vtimer.cntvoff_el2); @@ -543,7 +563,7 @@ vmm_hyp_call_guest(struct hyp *hyp, struct hypctx *hypctx) WRITE_SPECIALREG(mdcr_el2, hypctx->mdcr_el2); /* Call into the guest */ - ret = vmm_enter_guest(hypctx); + ret = VMM_HYP_FUNC(do_call_guest)(hypctx); WRITE_SPECIALREG(mdcr_el2, host_hypctx.mdcr_el2); isb(); @@ -613,8 +633,20 @@ vmm_hyp_call_guest(struct hyp *hyp, struct hypctx *hypctx) return (ret); } -static uint64_t -vmm_hyp_read_reg(uint64_t reg) +VMM_STATIC uint64_t +VMM_HYP_FUNC(enter_guest)(struct hyp *hyp, struct hypctx *hypctx) +{ + uint64_t ret; + + do { + ret = vmm_hyp_call_guest(hyp, hypctx); + } while (ret == EXCP_TYPE_REENTER); + + return (ret); +} + +VMM_STATIC uint64_t +VMM_HYP_FUNC(read_reg)(uint64_t reg) { switch (reg) { case HYP_REG_ICH_VTR: @@ -626,22 +658,27 @@ vmm_hyp_read_reg(uint64_t reg) return (0); } -static int -vmm_clean_s2_tlbi(void) +VMM_STATIC void +VMM_HYP_FUNC(clean_s2_tlbi)(void) { dsb(ishst); __asm __volatile("tlbi alle1is"); dsb(ish); - - return (0); } -static int -vm_s2_tlbi_range(uint64_t vttbr, vm_offset_t sva, vm_size_t eva, +VMM_STATIC void +VMM_HYP_FUNC(s2_tlbi_range)(uint64_t vttbr, vm_offset_t sva, vm_offset_t eva, bool final_only) { uint64_t end, r, start; uint64_t host_vttbr; +#ifdef VMM_VHE + uint64_t host_tcr; +#endif + +#ifdef VMM_VHE + dsb(ishst); +#endif #define TLBI_VA_SHIFT 12 #define TLBI_VA_MASK ((1ul << 44) - 1) @@ -654,6 +691,12 @@ vm_s2_tlbi_range(uint64_t vttbr, vm_offset_t sva, vm_size_t eva, WRITE_SPECIALREG(vttbr_el2, vttbr); isb(); +#ifdef VMM_VHE + host_tcr = READ_SPECIALREG(tcr_el2); + WRITE_SPECIALREG(tcr_el2, host_tcr & ~HCR_TGE); + isb(); +#endif + /* * The CPU can cache the stage 1 + 2 combination so we need to ensure * the stage 2 is invalidated first, then when this has completed we @@ -678,18 +721,25 @@ vm_s2_tlbi_range(uint64_t vttbr, vm_offset_t sva, vm_size_t eva, dsb(ish); isb(); - /* Switch back t othe host vttbr */ - WRITE_SPECIALREG(vttbr_el2, host_vttbr); +#ifdef VMM_VHE + WRITE_SPECIALREG(tcr_el2, host_tcr); isb(); +#endif - return (0); + /* Switch back to the host vttbr */ + WRITE_SPECIALREG(vttbr_el2, host_vttbr); + isb(); } -static int -vm_s2_tlbi_all(uint64_t vttbr) +VMM_STATIC void +VMM_HYP_FUNC(s2_tlbi_all)(uint64_t vttbr) { uint64_t host_vttbr; +#ifdef VMM_VHE + dsb(ishst); +#endif + /* Switch to the guest vttbr */ /* TODO: Handle Cortex-A57/A72 erratum 131936 */ host_vttbr = READ_SPECIALREG(vttbr_el2); @@ -703,83 +753,4 @@ vm_s2_tlbi_all(uint64_t vttbr) /* Switch back t othe host vttbr */ WRITE_SPECIALREG(vttbr_el2, host_vttbr); isb(); - - return (0); -} - -static int -vmm_dc_civac(uintptr_t start, uint64_t len) -{ - size_t line_size, end; - uint64_t ctr; - - ctr = READ_SPECIALREG(ctr_el0); - line_size = sizeof(int) << CTR_DLINE_SIZE(ctr); - end = start + len; - dsb(ishst); - /* Clean and Invalidate the D-cache */ - for (; start < end; start += line_size) - __asm __volatile("dc civac, %0" - : - : ASM_PTR_CONSTR (start) - : "memory"); - dsb(ish); - return (0); -} - -static int -vmm_el2_tlbi(uint64_t type, uint64_t start, uint64_t len) -{ - uint64_t end, r; - - dsb(ishst); - switch (type) { - default: - case HYP_EL2_TLBI_ALL: - __asm __volatile("tlbi alle2" ::: "memory"); - break; - case HYP_EL2_TLBI_VA: - end = TLBI_VA(start + len); - start = TLBI_VA(start); - for (r = start; r < end; r += TLBI_VA_L3_INCR) { - __asm __volatile("tlbi vae2is, %0" :: "r"(r)); - } - break; - } - dsb(ish); - - return (0); -} - -uint64_t -vmm_hyp_enter(uint64_t handle, uintptr_t x1, uintptr_t x2, uintptr_t x3, - uintptr_t x4, uintptr_t x5, uintptr_t x6, uintptr_t x7) -{ - uint64_t ret; - - switch (handle) { - case HYP_ENTER_GUEST: - do { - ret = vmm_hyp_call_guest((struct hyp *)x1, - (struct hypctx *)x2); - } while (ret == EXCP_TYPE_REENTER); - return (ret); - case HYP_READ_REGISTER: - return (vmm_hyp_read_reg(x1)); - case HYP_CLEAN_S2_TLBI: - return (vmm_clean_s2_tlbi()); - case HYP_DC_CIVAC: - return (vmm_dc_civac(x1, x2)); - case HYP_EL2_TLBI: - return (vmm_el2_tlbi(x1, x2, x3)); - case HYP_S2_TLBI_RANGE: - return (vm_s2_tlbi_range(x1, x2, x3, x4)); - case HYP_S2_TLBI_ALL: - return (vm_s2_tlbi_all(x1)); - case HYP_CLEANUP: /* Handled in vmm_hyp_exception.S */ - default: - break; - } - - return (0); } diff --git a/sys/arm64/vmm/vmm_hyp_exception.S b/sys/arm64/vmm/vmm_hyp_exception.S index 1d864df2b912..38b8e18e02ce 100644 --- a/sys/arm64/vmm/vmm_hyp_exception.S +++ b/sys/arm64/vmm/vmm_hyp_exception.S @@ -30,6 +30,7 @@ */ +#include #include #include @@ -145,29 +146,6 @@ b handle_\name .endm - .section ".vmm_vectors","ax" - .align 11 -hyp_init_vectors: - vempty /* Synchronous EL2t */ - vempty /* IRQ EL2t */ - vempty /* FIQ EL2t */ - vempty /* Error EL2t */ - - vempty /* Synchronous EL2h */ - vempty /* IRQ EL2h */ - vempty /* FIQ EL2h */ - vempty /* Error EL2h */ - - vector hyp_init /* Synchronous 64-bit EL1 */ - vempty /* IRQ 64-bit EL1 */ - vempty /* FIQ 64-bit EL1 */ - vempty /* Error 64-bit EL1 */ - - vempty /* Synchronous 32-bit EL1 */ - vempty /* IRQ 32-bit EL1 */ - vempty /* FIQ 32-bit EL1 */ - vempty /* Error 32-bit EL1 */ - .text .align 11 hyp_vectors: @@ -191,57 +169,6 @@ hyp_vectors: vempty /* FIQ 32-bit EL1 */ vempty /* Error 32-bit EL1 */ -/* - * Initialize the hypervisor mode with a new exception vector table, translation - * table and stack. - * - * Expecting: - * x0 - translation tables physical address - * x1 - stack top virtual address - * x2 - TCR_EL2 value - * x3 - SCTLR_EL2 value - * x4 - VTCR_EL2 value - */ -LENTRY(handle_hyp_init) - /* Install the new exception vectors */ - adrp PTR(6), hyp_vectors - add PTR(6), PTR(6), :lo12:hyp_vectors -#if __has_feature(capabilities) -#ifndef __CHERI_PURE_CAPABILITY__ - cvtp c6, x6 -#endif - msr cvbar_el2, c6 -#else - msr vbar_el2, x6 -#endif - /* Set the stack top address */ - mov PTRN(sp), PTR(1) - /* Use the host VTTBR_EL2 to tell the host and the guests apart */ - mov x9, #VTTBR_HOST - msr vttbr_el2, x9 - /* Load the base address for the translation tables */ - msr ttbr0_el2, x0 - /* Invalidate the TLB */ - dsb ish - tlbi alle2 - dsb ishst - isb - /* Use the same memory attributes as EL1 */ - mrs x9, mair_el1 - msr mair_el2, x9 - /* Configure address translation */ - msr tcr_el2, x2 - isb - /* Set the system control register for EL2 */ - msr sctlr_el2, x3 - /* Set the Stage 2 translation control register */ - msr vtcr_el2, x4 - /* Return success */ - mov x0, #0 - /* MMU is up and running */ - ERET -LEND(handle_hyp_init) - .macro do_world_switch_to_host save_guest_registers restore_host_registers @@ -249,10 +176,19 @@ LEND(handle_hyp_init) /* Restore host VTTBR */ mov x9, #VTTBR_HOST msr vttbr_el2, x9 + +#ifdef VMM_VHE +#if __has_feature(capabilities) + msr cvbar_el1, c1 +#else + msr vbar_el1, x1 +#endif +#endif .endm .macro handle_el2_excp type +#ifndef VMM_VHE /* Save registers before modifying so we can restore them */ str CAP(9), [PTRN(sp), #-16]! @@ -263,15 +199,18 @@ LEND(handle_hyp_init) /* We got the exception while the guest was running */ ldr CAP(9), [PTRN(sp)], #16 +#endif /* !VMM_VHE */ do_world_switch_to_host mov x0, \type ret +#ifndef VMM_VHE 1: /* We got the exception while the host was running */ ldr CAP(9), [PTRN(sp)], #16 mov x0, \type ERET +#endif /* !VMM_VHE */ .endm @@ -292,6 +231,7 @@ LENTRY(handle_el2_el2h_error) LEND(handle_el2_el2h_error) LENTRY(handle_el2_el1_sync64) +#ifndef VMM_VHE /* Save registers before modifying so we can restore them */ str CAP(9), [PTRN(sp), #-16]! /* Check for host hypervisor call */ @@ -313,7 +253,9 @@ LENTRY(handle_el2_el1_sync64) ldr CAPN(lr), [PTRN(sp)], #16 ERET -1: /* Guest exception taken to EL2 */ +1: +#endif + /* Guest exception taken to EL2 */ do_world_switch_to_host mov x0, #EXCP_TYPE_EL1_SYNC ret @@ -354,12 +296,31 @@ LEND(handle_el2_el1_error64) /* * Usage: - * uint64_t vmm_enter_guest(struct hypctx *hypctx) + * uint64_t vmm_do_call_guest(struct hypctx *hypctx) * * Expecting: * x0 - hypctx address */ -ENTRY(vmm_enter_guest) +ENTRY(VMM_HYP_FUNC(do_call_guest)) +#ifdef VMM_VHE +#if __has_feature(capabilities) + mrs c1, cvbar_el1 +#else + mrs x1, vbar_el1 +#endif + adrp PTR(2), hyp_vectors + add PTR(2), PTR(2), :lo12:hyp_vectors +#if __has_feature(capabilities) +#ifndef __CHERI_PURE_CAPABILITY__ + cvtp c2, x2 +#endif + msr cvbar_el1, c2 +#else + msr vbar_el1, x2 +#endif + isb +#endif + /* Save hypctx address */ msr PTRN(tpidr_el2), PTR(0) @@ -368,32 +329,6 @@ ENTRY(vmm_enter_guest) /* Enter guest */ ERET -END(vmm_enter_guest) - -/* - * Usage: - * void vmm_cleanup(uint64_t handle, void *hyp_stub_vectors) - * - * Expecting: - * x1 - physical address of hyp_stub_vectors - */ -LENTRY(vmm_cleanup) - /* Restore the stub vectors */ -#if __has_feature(capabilities) -#ifndef __CHERI_PURE_CAPABILITY__ - cvtp c1, x1 -#endif - msr cvbar_el2, c1 -#else - msr vbar_el2, x1 -#endif +END(VMM_HYP_FUNC(do_call_guest)) - /* Disable the MMU */ - dsb sy - mrs x2, sctlr_el2 - bic x2, x2, #SCTLR_EL2_M - msr sctlr_el2, x2 - isb - - ERET -LEND(vmm_cleanup) +GNU_PROPERTY_AARCH64_FEATURE_1_NOTE(GNU_PROPERTY_AARCH64_FEATURE_1_VAL) diff --git a/sys/arm64/vmm/vmm_nvhe.c b/sys/arm64/vmm/vmm_nvhe.c new file mode 100644 index 000000000000..52824e11be45 --- /dev/null +++ b/sys/arm64/vmm/vmm_nvhe.c @@ -0,0 +1,121 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2021 Andrew Turner + * Copyright (c) 2024 Arm Ltd + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define VMM_STATIC static +#define VMM_HYP_FUNC(func) vmm_nvhe_ ## func + +#define guest_or_nonvhe(guest) (true) +#define EL1_REG(reg) MRS_REG_ALT_NAME(reg ## _EL1) +#define EL0_REG(reg) MRS_REG_ALT_NAME(reg ## _EL0) + +#include "vmm_hyp.c" + +uint64_t vmm_hyp_enter(uint64_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t, uintptr_t, uintptr_t); + +/* + * Handlers for EL2 addres space. Only needed by non-VHE code as in VHE the + * kernel is in EL2 so pmap will manage the address space. + */ +static int +vmm_dc_civac(uintptr_t start, uint64_t len) +{ + size_t line_size, end; + uint64_t ctr; + + ctr = READ_SPECIALREG(ctr_el0); + line_size = sizeof(int) << CTR_DLINE_SIZE(ctr); + end = start + len; + dsb(ishst); + /* Clean and Invalidate the D-cache */ + for (; start < end; start += line_size) + __asm __volatile("dc civac, %0" + : + : ASM_PTR_CONSTR (start) + : "memory"); + dsb(ish); + return (0); +} + +static int +vmm_el2_tlbi(uint64_t type, uint64_t start, uint64_t len) +{ + uint64_t end, r; + + dsb(ishst); + switch (type) { + default: + case HYP_EL2_TLBI_ALL: + __asm __volatile("tlbi alle2" ::: "memory"); + break; + case HYP_EL2_TLBI_VA: + end = TLBI_VA(start + len); + start = TLBI_VA(start); + for (r = start; r < end; r += TLBI_VA_L3_INCR) { + __asm __volatile("tlbi vae2is, %0" :: "r"(r)); + } + break; + } + dsb(ish); + + return (0); +} + +uint64_t +vmm_hyp_enter(uint64_t handle, uintptr_t x1, uintptr_t x2, uintptr_t x3, + uintptr_t x4, uintptr_t x5, uintptr_t x6, uintptr_t x7) +{ + switch (handle) { + case HYP_ENTER_GUEST: + return (VMM_HYP_FUNC(enter_guest)((struct hyp *)x1, + (struct hypctx *)x2)); + case HYP_READ_REGISTER: + return (VMM_HYP_FUNC(read_reg)(x1)); + case HYP_CLEAN_S2_TLBI: + VMM_HYP_FUNC(clean_s2_tlbi()); + return (0); + case HYP_DC_CIVAC: + return (vmm_dc_civac(x1, x2)); + case HYP_EL2_TLBI: + return (vmm_el2_tlbi(x1, x2, x3)); + case HYP_S2_TLBI_RANGE: + VMM_HYP_FUNC(s2_tlbi_range)(x1, x2, x3, x4); + return (0); + case HYP_S2_TLBI_ALL: + VMM_HYP_FUNC(s2_tlbi_all)(x1); + return (0); + case HYP_CLEANUP: /* Handled in vmm_hyp_exception.S */ + default: + break; + } + + return (0); +} diff --git a/sys/arm64/vmm/vmm_nvhe_exception.S b/sys/arm64/vmm/vmm_nvhe_exception.S new file mode 100644 index 000000000000..cb440e0f8d91 --- /dev/null +++ b/sys/arm64/vmm/vmm_nvhe_exception.S @@ -0,0 +1,134 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Arm Ltd + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define VMM_HYP_FUNC(func) vmm_nvhe_ ## func + +#include "vmm_hyp_exception.S" + + .section ".vmm_vectors","ax" + .align 11 +hyp_init_vectors: + vempty /* Synchronous EL2t */ + vempty /* IRQ EL2t */ + vempty /* FIQ EL2t */ + vempty /* Error EL2t */ + + vempty /* Synchronous EL2h */ + vempty /* IRQ EL2h */ + vempty /* FIQ EL2h */ + vempty /* Error EL2h */ + + vector hyp_init /* Synchronous 64-bit EL1 */ + vempty /* IRQ 64-bit EL1 */ + vempty /* FIQ 64-bit EL1 */ + vempty /* Error 64-bit EL1 */ + + vempty /* Synchronous 32-bit EL1 */ + vempty /* IRQ 32-bit EL1 */ + vempty /* FIQ 32-bit EL1 */ + vempty /* Error 32-bit EL1 */ + + .text + +/* + * Initialize the hypervisor mode with a new exception vector table, translation + * table and stack. + * + * Expecting: + * x0 - translation tables physical address + * x1 - stack top virtual address + * x2 - TCR_EL2 value + * x3 - SCTLR_EL2 value + * x4 - VTCR_EL2 value + */ +LENTRY(handle_hyp_init) + /* Install the new exception vectors */ + adrp PTR(6), hyp_vectors + add PTR(6), PTR(6), :lo12:hyp_vectors +#if __has_feature(capabilities) +#ifndef __CHERI_PURE_CAPABILITY__ + cvtp c6, x6 +#endif + msr cvbar_el2, c6 +#else + msr vbar_el2, x6 +#endif + /* Set the stack top address */ + mov PTRN(sp), PTR(1) + /* Use the host VTTBR_EL2 to tell the host and the guests apart */ + mov x9, #VTTBR_HOST + msr vttbr_el2, x9 + /* Load the base address for the translation tables */ + msr ttbr0_el2, x0 + /* Invalidate the TLB */ + dsb ish + tlbi alle2 + dsb ishst + isb + /* Use the same memory attributes as EL1 */ + mrs x9, mair_el1 + msr mair_el2, x9 + /* Configure address translation */ + msr tcr_el2, x2 + isb + /* Set the system control register for EL2 */ + msr sctlr_el2, x3 + /* Set the Stage 2 translation control register */ + msr vtcr_el2, x4 + /* Return success */ + mov x0, #0 + /* MMU is up and running */ + ERET +LEND(handle_hyp_init) + +/* + * Usage: + * void vmm_cleanup(uint64_t handle, void *hyp_stub_vectors) + * + * Expecting: + * x1 - physical address of hyp_stub_vectors + */ +LENTRY(vmm_cleanup) + /* Restore the stub vectors */ +#if __has_feature(capabilities) +#ifndef __CHERI_PURE_CAPABILITY__ + cvtp c1, x1 +#endif + msr cvbar_el2, c1 +#else + msr vbar_el2, x1 +#endif + + /* Disable the MMU */ + dsb sy + mrs x2, sctlr_el2 + bic x2, x2, #SCTLR_EL2_M + msr sctlr_el2, x2 + isb + + ERET +LEND(vmm_cleanup) diff --git a/sys/arm64/vmm/vmm_reset.c b/sys/arm64/vmm/vmm_reset.c index 75aa16c81727..9946e2baae95 100644 --- a/sys/arm64/vmm/vmm_reset.c +++ b/sys/arm64/vmm/vmm_reset.c @@ -70,6 +70,9 @@ reset_vm_el01_regs(void *vcpu) set_arch_unknown(el2ctx->mdccint_el1); set_arch_unknown(el2ctx->mdscr_el1); set_arch_unknown(el2ctx->par_el1); +#if __has_feature(capabilities) + set_arch_unknown(el2ctx->cctlr_el1); +#endif /* * Guest starts with: @@ -144,6 +147,9 @@ reset_vm_el2_regs(void *vcpu) */ el2ctx->hcr_el2 = HCR_RW | HCR_TID3 | HCR_TWI | HCR_BSU_IS | HCR_FB | HCR_AMO | HCR_IMO | HCR_FMO | HCR_SWIO | HCR_VM; + if (in_vhe()) { + el2ctx->hcr_el2 |= HCR_E2H; + } /* TODO: Trap all extensions we don't support */ el2ctx->mdcr_el2 = 0; @@ -174,7 +180,18 @@ reset_vm_el2_regs(void *vcpu) * Don't trap accesses to CPACR_EL1, trace, SVE, Advanced SIMD * and floating point functionality to EL2. */ - el2ctx->cptr_el2 = CPTR_RES1; + if (in_vhe()) + el2ctx->cptr_el2 = CPTR_E2H_TRAP_ALL | CPTR_E2H_FPEN; + else + el2ctx->cptr_el2 = CPTR_TRAP_ALL & ~CPTR_TFP; +#if __has_feature(capabilities) + /* Don't trap accesses to capability registers. */ + if (in_vhe()) + el2ctx->cptr_el2 |= CPTR_E2H_CEN; + else + el2ctx->cptr_el2 &= ~CPTR_TC; +#endif + el2ctx->cptr_el2 &= ~CPTR_TCPAC; /* * Disable interrupts in the guest. The guest OS will re-enable * them. diff --git a/sys/arm64/vmm/vmm_vhe.c b/sys/arm64/vmm/vmm_vhe.c new file mode 100644 index 000000000000..8a12852e2a7a --- /dev/null +++ b/sys/arm64/vmm/vmm_vhe.c @@ -0,0 +1,39 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Arm Ltd + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "vmm_handlers.h" + +#define VMM_VHE + +#define VMM_STATIC +#define VMM_HYP_FUNC(func) vmm_vhe_ ## func + +#define guest_or_nonvhe(guest) (guest) +#define EL1_REG(reg) MRS_REG_ALT_NAME(reg ## _EL12) +#define EL0_REG(reg) MRS_REG_ALT_NAME(reg ## _EL02) + +#include "vmm_hyp.c" diff --git a/sys/arm64/vmm/vmm_vhe_exception.S b/sys/arm64/vmm/vmm_vhe_exception.S new file mode 100644 index 000000000000..286f5df03707 --- /dev/null +++ b/sys/arm64/vmm/vmm_vhe_exception.S @@ -0,0 +1,31 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Arm Ltd + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define VMM_VHE +#define VMM_HYP_FUNC(func) vmm_vhe_ ## func + +#include "vmm_hyp_exception.S" diff --git a/sys/compat/freebsd32/syscalls.conf b/sys/compat/freebsd32/syscalls.conf index 1f4b62cfa1c3..fddf56fb0ac6 100644 --- a/sys/compat/freebsd32/syscalls.conf +++ b/sys/compat/freebsd32/syscalls.conf @@ -19,10 +19,10 @@ abi_ptr_array_t="uint32_t" abi_headers="#include " # -# Variables below this line are exceptions to the ABI changes -# programmatically detected by sys/tools/syscalls. New system calls -# should not require an entry here in virtually all cases. New entries -# are almost certainly representative of badly designed interfaces. +# Variables below this line are exceptions to the ABI changes programmatically +# detected by makesyscalls.lua. New system calls should not require an entry +# here in nearly virtually all cases. New entries are almost certainly +# representative of badly designed interfaces. # # System calls that require freebsd32-specific handling: @@ -52,6 +52,6 @@ obsol="getkerninfo" # nlm_syscall - requires significant porting, probably doesn't make sense # nnpfs_syscall - requires significant porting, probably doesn't make sense # ntp_gettime - should be implemented -# thr_create - was unimplemented and appears to be unnecessicary +# thr_create - was unimplemented and appears to be unnecessary # cheri_revoke* - Don't implement CheriABI-specific syscalls unimpl="afs3_syscall kldsym __mac_get_proc __mac_set_proc __mac_get_fd __mac_get_file __mac_set_fd __mac_set_file __mac_get_pid __mac_get_link __mac_set_link __mac_execve nfssvc nlm_syscall ntp_gettime lgetfh nnpfs_syscall thr_create cheri_revoke_get_shadow cheri_revoke cheri_cidcap_alloc" diff --git a/sys/compat/linuxkpi/common/src/linux_page.c b/sys/compat/linuxkpi/common/src/linux_page.c index 0fe1dc6a224a..da51c18cc062 100644 --- a/sys/compat/linuxkpi/common/src/linux_page.c +++ b/sys/compat/linuxkpi/common/src/linux_page.c @@ -444,7 +444,7 @@ lkpi_unmap_mapping_range(void *obj, loff_t const holebegin __unused, continue; if (!vm_page_busy_acquire(page, VM_ALLOC_WAITFAIL)) goto retry; - cdev_pager_free_page(devobj, page); + cdev_mgtdev_pager_free_page(devobj, page); } VM_OBJECT_WUNLOCK(devobj); vm_object_deallocate(devobj); diff --git a/sys/conf/NOTES b/sys/conf/NOTES index a531620ede55..47d6bcb4e5da 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -977,6 +977,9 @@ device lagg # WireGuard interface. device wg +# dummymbuf – mbuf alteration pfil hooks +device dummymbuf + # # Internet family options: # diff --git a/sys/conf/files b/sys/conf/files index 860ce16784ab..1ce6e07a7697 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -3933,7 +3933,7 @@ kern/kern_idle.c standard kern/kern_intr.c standard kern/kern_jail.c standard kern/kern_kcov.c optional kcov \ - compile-with "${NORMAL_C:N-fsanitize*} ${NORMAL_C:M-fsanitize=kernel-memory}" + compile-with "${NOSAN_C} ${MSAN_CFLAGS}" kern/kern_khelp.c standard kern/kern_kthread.c standard kern/kern_ktr.c optional ktr @@ -4001,7 +4001,7 @@ kern/stack_protector.c standard \ kern/subr_acl_nfs4.c optional ufs_acl | zfs kern/subr_acl_posix1e.c optional ufs_acl kern/subr_asan.c optional kasan \ - compile-with "${NORMAL_C:N-fsanitize*:N-fstack-protector*}" + compile-with "${NOSAN_C:N-fstack-protector*}" kern/subr_autoconf.c standard kern/subr_blist.c standard kern/subr_boot.c standard @@ -4013,10 +4013,10 @@ kern/subr_clock.c standard kern/subr_compressor.c standard \ compile-with "${NORMAL_C} -I$S/contrib/zstd/lib/freebsd" kern/subr_coverage.c optional coverage \ - compile-with "${NORMAL_C:N-fsanitize*:N-fno-sanitize*}" + compile-with "${NOSAN_C}" kern/subr_counter.c standard kern/subr_csan.c optional kcsan \ - compile-with "${NORMAL_C:N-fsanitize*:N-fstack-protector*}" + compile-with "${NOSAN_C:N-fstack-protector*}" kern/subr_devstat.c standard kern/subr_disk.c standard kern/subr_early.c standard @@ -4036,7 +4036,7 @@ kern/subr_mchain.c optional libmchain kern/subr_memdesc.c standard kern/subr_module.c standard kern/subr_msan.c optional kmsan \ - compile-with "${NORMAL_C:N-fsanitize*:N-fno-sanitize*:N-fstack-protector*}" + compile-with "${NOSAN_C:N-fstack-protector*}" kern/subr_msgbuf.c standard kern/subr_param.c standard kern/subr_pcpu.c standard diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64 index 644239d973bc..f210da5c4ee0 100644 --- a/sys/conf/files.arm64 +++ b/sys/conf/files.arm64 @@ -94,7 +94,7 @@ arm64/arm64/trap.c standard arm64/arm64/uio_machdep.c standard arm64/arm64/undefined.c standard arm64/arm64/unwind.c optional ddb | kdtrace_hooks | stack \ - compile-with "${NORMAL_C:N-fsanitize*:N-fno-sanitize*}" + compile-with "${NOSAN_C}" arm64/arm64/vfp.c standard arm64/arm64/vm_machdep.c standard @@ -177,15 +177,16 @@ arm64/vmm/vmm_instruction_emul.c optional vmm arm64/vmm/vmm_stat.c optional vmm arm64/vmm/vmm_arm64.c optional vmm arm64/vmm/vmm_reset.c optional vmm +arm64/vmm/vmm_handlers.c optional vmm arm64/vmm/vmm_call.S optional vmm -arm64/vmm/vmm_hyp_exception.S optional vmm \ - compile-with "${NORMAL_C:N-fsanitize*:N-fno-sanitize*:N-mbranch-protection*} -fpie" \ +arm64/vmm/vmm_nvhe_exception.S optional vmm \ + compile-with "${NOSAN_C} -fpie" \ no-obj -arm64/vmm/vmm_hyp.c optional vmm \ - compile-with "${NORMAL_C:N-fsanitize*:N-fno-sanitize*:N-mbranch-protection*} -fpie" \ +arm64/vmm/vmm_nvhe.c optional vmm \ + compile-with "${NOSAN_C} -fpie" \ no-obj vmm_hyp_blob.elf.full optional vmm \ - dependency "vmm_hyp.o vmm_hyp_exception.o" \ + dependency "vmm_nvhe.o vmm_nvhe_exception.o" \ compile-with "${SYSTEM_LD_BASECMD} -o ${.TARGET} ${.ALLSRC} --defsym=_start='0x0' --defsym=text_start='0x0'" \ no-obj no-implicit-rule vmm_hyp_blob.elf optional vmm \ @@ -199,6 +200,8 @@ vmm_hyp_blob.bin optional vmm \ arm64/vmm/vmm_hyp_el2.S optional vmm \ dependency vmm_hyp_blob.bin arm64/vmm/vmm_mmu.c optional vmm +arm64/vmm/vmm_vhe.c optional vmm +arm64/vmm/vmm_vhe_exception.S optional vmm arm64/vmm/io/vgic.c optional vmm arm64/vmm/io/vgic_v3.c optional vmm arm64/vmm/io/vgic_if.m optional vmm diff --git a/sys/conf/kern.mk b/sys/conf/kern.mk index c93c39171898..e1a276d81a63 100644 --- a/sys/conf/kern.mk +++ b/sys/conf/kern.mk @@ -143,6 +143,9 @@ CFLAGS += -mgeneral-regs-only CFLAGS += -ffixed-x18 # Build with BTI+PAC CFLAGS += -mbranch-protection=standard +.if ${LINKER_FEATURES:Mbti-report} +LDFLAGS += -Wl,-zbti-report=error +.endif # TODO: support outline atomics CFLAGS += -mno-outline-atomics INLINE_LIMIT?= 8000 @@ -339,6 +342,8 @@ CSTD?= gnu99 CFLAGS+= -std=${CSTD} .endif # CSTD +NOSAN_CFLAGS= ${CFLAGS:N-fsanitize*:N-fno-sanitize*:N-fasan-shadow-offset*} + # Please keep this if in sync with bsd.sys.mk .if ${LD} != "ld" && (${CC:[1]:H} != ${LD:[1]:H} || ${LD:[1]:T} != "ld") # Add -fuse-ld=${LD} if $LD is in a different directory or not called "ld". diff --git a/sys/conf/kern.post.mk b/sys/conf/kern.post.mk index b0442ca6aa90..d45275cf452c 100644 --- a/sys/conf/kern.post.mk +++ b/sys/conf/kern.post.mk @@ -245,21 +245,21 @@ offset.inc: $S/kern/genoffset.sh genoffset.o NM='${NM}' NMFLAGS='${NMFLAGS}' sh $S/kern/genoffset.sh genoffset.o > ${.TARGET} genoffset.o: $S/kern/genoffset.c - ${CC} -c ${CFLAGS:N-flto*:N-fno-common:N-fsanitize*:N-fno-sanitize*} \ + ${CC} -c ${NOSAN_CFLAGS:N-flto*:N-fno-common} \ -fcommon $S/kern/genoffset.c # genoffset_test.o is not actually used for anything - the point of compiling it # is to exercise the CTASSERT that checks that the offsets in the offset.inc # _lite struct(s) match those in the original(s). genoffset_test.o: $S/kern/genoffset.c offset.inc - ${CC} -c ${CFLAGS:N-flto*:N-fno-common:N-fsanitize*:N-fno-sanitize*} \ + ${CC} -c ${NOSAN_CFLAGS:N-flto*:N-fno-common} \ -fcommon -DOFFSET_TEST $S/kern/genoffset.c -o ${.TARGET} assym.inc: $S/kern/genassym.sh genassym.o genoffset_test.o NM='${NM}' NMFLAGS='${NMFLAGS}' sh $S/kern/genassym.sh genassym.o > ${.TARGET} genassym.o: $S/$M/$M/genassym.c offset.inc - ${CC} -c ${CFLAGS:N-flto*:N-fno-common:N-fsanitize*:N-fno-sanitize*} \ + ${CC} -c ${NOSAN_CFLAGS:N-flto*:N-fno-common} \ -fcommon $S/$M/$M/genassym.c OBJS_DEPEND_GUESS+= opt_global.h diff --git a/sys/conf/kern.pre.mk b/sys/conf/kern.pre.mk index 88bf3a42a3af..49adca6944be 100644 --- a/sys/conf/kern.pre.mk +++ b/sys/conf/kern.pre.mk @@ -17,6 +17,7 @@ _srcconf_included_: .include .include .include "kern.opts.mk" +.-include # The kernel build always occurs in the object directory which is .CURDIR. .if ${.MAKE.MODE:Unormal:Mmeta} @@ -129,11 +130,12 @@ KMSAN_ENABLED!= grep KMSAN opt_global.h || true ; echo .if !empty(KMSAN_ENABLED) # Disable -fno-sanitize-memory-param-retval until interceptors have been # updated to work properly with it. -SAN_CFLAGS+= -DSAN_NEEDS_INTERCEPTORS -DSAN_INTERCEPTOR_PREFIX=kmsan \ +MSAN_CFLAGS+= -DSAN_NEEDS_INTERCEPTORS -DSAN_INTERCEPTOR_PREFIX=kmsan \ -fsanitize=kernel-memory .if ${COMPILER_TYPE} == "clang" && ${COMPILER_VERSION} >= 160000 -SAN_CFLAGS+= -fno-sanitize-memory-param-retval +MSAN_CFLAGS+= -fno-sanitize-memory-param-retval .endif +SAN_CFLAGS+= ${MSAN_CFLAGS} .endif KUBSAN_ENABLED!= grep KUBSAN opt_global.h || true ; echo @@ -212,6 +214,10 @@ NORMAL_FWO= ${CC:N${CCACHE_BIN}} -c ${ASM_CFLAGS} ${WERROR} -o ${.TARGET} \ $S/kern/firmw.S -DFIRMW_FILE=\""${.ALLSRC:M*.fw}"\" \ -DFIRMW_SYMBOL="${.ALLSRC:M*.fw:C/[-.\/]/_/g}" +# Remove sanitizer arguments. Some -fno-sanitize* and -fasan-shadow-offset* +# arguments become an error if the appropriate sanitizer is not enabled. +NOSAN_C= ${NORMAL_C:N-fsanitize*:N-fno-sanitize*:N-fasan-shadow-offset*} + # for ZSTD in the kernel (include zstd/lib/freebsd before other CFLAGS) ZSTD_C= ${CC} -c -DZSTD_HEAPMODE=1 -I$S/contrib/zstd/lib/freebsd ${CFLAGS} \ -I$S/contrib/zstd/lib -I$S/contrib/zstd/lib/common ${WERROR} \ diff --git a/sys/conf/kmod.mk b/sys/conf/kmod.mk index d674bd06a4ad..b2f5c7447d3e 100644 --- a/sys/conf/kmod.mk +++ b/sys/conf/kmod.mk @@ -526,13 +526,13 @@ assym.inc: ${SYSDIR}/kern/genassym.sh sh ${SYSDIR}/kern/genassym.sh genassym.o > ${.TARGET} genassym.o: ${SYSDIR}/${MACHINE}/${MACHINE}/genassym.c offset.inc genassym.o: ${SRCS:Mopt_*.h} - ${CC} -c ${CFLAGS:N-flto*:N-fno-common:N-fsanitize*:N-fno-sanitize*} -fcommon \ + ${CC} -c ${NOSAN_CFLAGS:N-flto*:N-fno-common} -fcommon \ ${SYSDIR}/${MACHINE}/${MACHINE}/genassym.c offset.inc: ${SYSDIR}/kern/genoffset.sh genoffset.o sh ${SYSDIR}/kern/genoffset.sh genoffset.o > ${.TARGET} genoffset.o: ${SYSDIR}/kern/genoffset.c genoffset.o: ${SRCS:Mopt_*.h} - ${CC} -c ${CFLAGS:N-flto*:N-fno-common:N-fsanitize*:N-fno-sanitize*} -fcommon \ + ${CC} -c ${NOSAN_CFLAGS:N-flto*:N-fno-common} -fcommon \ ${SYSDIR}/kern/genoffset.c CLEANDEPENDFILES+= ${_ILINKS} diff --git a/sys/dev/amdsmn/amdsmn.c b/sys/dev/amdsmn/amdsmn.c index cb2ddbd86c2e..9a0428608a27 100644 --- a/sys/dev/amdsmn/amdsmn.c +++ b/sys/dev/amdsmn/amdsmn.c @@ -60,6 +60,7 @@ #define PCI_DEVICE_ID_AMD_17H_M60H_ROOT 0x1630 #define PCI_DEVICE_ID_AMD_19H_M10H_ROOT 0x14a4 #define PCI_DEVICE_ID_AMD_19H_M60H_ROOT 0x14d8 +#define PCI_DEVICE_ID_AMD_19H_M70H_ROOT 0x14e8 struct pciid; struct amdsmn_softc { @@ -115,6 +116,12 @@ static const struct pciid { .amdsmn_addr_reg = F17H_SMN_ADDR_REG, .amdsmn_data_reg = F17H_SMN_DATA_REG, }, + { + .amdsmn_vendorid = CPU_VENDOR_AMD, + .amdsmn_deviceid = PCI_DEVICE_ID_AMD_19H_M70H_ROOT, + .amdsmn_addr_reg = F17H_SMN_ADDR_REG, + .amdsmn_data_reg = F17H_SMN_DATA_REG, + }, }; /* diff --git a/sys/dev/amdtemp/amdtemp.c b/sys/dev/amdtemp/amdtemp.c index 9ff7388fd70c..ff9866c6221b 100644 --- a/sys/dev/amdtemp/amdtemp.c +++ b/sys/dev/amdtemp/amdtemp.c @@ -115,6 +115,7 @@ struct amdtemp_softc { #define DEVICEID_AMD_HOSTB17H_M60H_ROOT 0x1630 #define DEVICEID_AMD_HOSTB19H_M10H_ROOT 0x14a4 #define DEVICEID_AMD_HOSTB19H_M60H_ROOT 0x14d8 +#define DEVICEID_AMD_HOSTB19H_M70H_ROOT 0x14e8 static const struct amdtemp_product { uint16_t amdtemp_vendorid; @@ -141,6 +142,7 @@ static const struct amdtemp_product { { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_M60H_ROOT, false }, { VENDORID_AMD, DEVICEID_AMD_HOSTB19H_M10H_ROOT, false }, { VENDORID_AMD, DEVICEID_AMD_HOSTB19H_M60H_ROOT, false }, + { VENDORID_AMD, DEVICEID_AMD_HOSTB19H_M70H_ROOT, false }, }; /* @@ -873,6 +875,7 @@ amdtemp_probe_ccd_sensors19h(device_t dev, uint32_t model) _Static_assert((int)NUM_CCDS >= 12, ""); break; case 0x60 ... 0x6f: /* Zen4 Ryzen "Raphael" */ + case 0x70 ... 0x7f: /* Zen4 Ryzen "Phoenix" */ sc->sc_temp_base = AMDTEMP_ZEN4_CCD_TMP_BASE; maxreg = 8; _Static_assert((int)NUM_CCDS >= 8, ""); diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index ba2287c7b059..53449d15d52e 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -1561,7 +1561,10 @@ t4_wrq_tx(struct adapter *sc, struct wrqe *wr) struct sge_wrq *wrq = wr->wrq; TXQ_LOCK(wrq); - t4_wrq_tx_locked(sc, wrq, wr); + if (__predict_true(wrq->eq.flags & EQ_HW_ALLOCATED)) + t4_wrq_tx_locked(sc, wrq, wr); + else + free(wr, M_CXGBE); TXQ_UNLOCK(wrq); } diff --git a/sys/dev/cxgbe/iw_cxgbe/device.c b/sys/dev/cxgbe/iw_cxgbe/device.c index 279bdb20d511..209d12767a1c 100644 --- a/sys/dev/cxgbe/iw_cxgbe/device.c +++ b/sys/dev/cxgbe/iw_cxgbe/device.c @@ -284,7 +284,7 @@ c4iw_activate(struct adapter *sc) } if (uld_active(sc, ULD_IWARP)) { - KASSERT(0, ("%s: RDMA already eanbled on sc %p", __func__, sc)); + KASSERT(0, ("%s: RDMA already enabled on sc %p", __func__, sc)); return (0); } diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index c4bf3a6aa302..349c86f7cf6e 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -2060,7 +2060,9 @@ stop_lld(struct adapter *sc) } #if defined(TCP_OFFLOAD) || defined(RATELIMIT) for_each_ofld_txq(vi, k, ofld_txq) { + TXQ_LOCK(&ofld_txq->wrq); ofld_txq->wrq.eq.flags &= ~EQ_HW_ALLOCATED; + TXQ_UNLOCK(&ofld_txq->wrq); } #endif for_each_rxq(vi, k, rxq) { @@ -2078,7 +2080,9 @@ stop_lld(struct adapter *sc) if (sc->flags & FULL_INIT_DONE) { /* Control queue */ wrq = &sc->sge.ctrlq[i]; + TXQ_LOCK(wrq); wrq->eq.flags &= ~EQ_HW_ALLOCATED; + TXQ_UNLOCK(wrq); quiesce_wrq(wrq); } } @@ -2530,6 +2534,15 @@ reset_adapter_with_pl_rst(struct adapter *sc) return (0); } +static inline int +reset_adapter(struct adapter *sc) +{ + if (vm_guest == 0) + return (reset_adapter_with_pci_bus_reset(sc)); + else + return (reset_adapter_with_pl_rst(sc)); +} + static void reset_adapter_task(void *arg, int pending) { @@ -2540,10 +2553,7 @@ reset_adapter_task(void *arg, int pending) if (pending > 1) CH_ALERT(sc, "%s: pending %d\n", __func__, pending); - if (vm_guest == 0) - rc = reset_adapter_with_pci_bus_reset(sc); - else - rc = reset_adapter_with_pl_rst(sc); + rc = reset_adapter(sc); if (rc != 0) { CH_ERR(sc, "adapter did not reset properly, rc = %d, " "flags 0x%08x -> 0x%08x, err_flags 0x%08x -> 0x%08x.\n", @@ -3647,7 +3657,7 @@ fatal_error_task(void *arg, int pending) if (t4_reset_on_fatal_err) { CH_ALERT(sc, "resetting adapter after fatal error.\n"); - rc = reset_adapter_with_pci_bus_reset(sc); + rc = reset_adapter(sc); if (rc == 0 && t4_panic_on_fatal_err) { CH_ALERT(sc, "reset was successful, " "system will NOT panic.\n"); @@ -7048,8 +7058,22 @@ quiesce_txq(struct sge_txq *txq) static void quiesce_wrq(struct sge_wrq *wrq) { + struct wrqe *wr; - /* XXXTX */ + TXQ_LOCK(wrq); + while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL) { + STAILQ_REMOVE_HEAD(&wrq->wr_list, link); +#ifdef INVARIANTS + wrq->nwr_pending--; + wrq->ndesc_needed -= howmany(wr->wr_len, EQ_ESIZE); +#endif + free(wr, M_CXGBE); + } + MPASS(wrq->nwr_pending == 0); + MPASS(wrq->ndesc_needed == 0); + wrq->nwr_pending = 0; + wrq->ndesc_needed = 0; + TXQ_UNLOCK(wrq); } static void diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c index b4eb0701821a..bc81a0251deb 100644 --- a/sys/dev/cxgbe/t4_sge.c +++ b/sys/dev/cxgbe/t4_sge.c @@ -2921,6 +2921,10 @@ start_wrq_wr(struct sge_wrq *wrq, int len16, struct wrq_cookie *cookie) MPASS(ndesc > 0 && ndesc <= SGE_MAX_WR_NDESC); EQ_LOCK(eq); + if (__predict_false((eq->flags & EQ_HW_ALLOCATED) == 0)) { + EQ_UNLOCK(eq); + return (NULL); + } if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) drain_wrq_wr_list(sc, wrq); @@ -3016,7 +3020,10 @@ commit_wrq_wr(struct sge_wrq *wrq, void *w, struct wrq_cookie *cookie) F_FW_WR_EQUEQ); } - ring_eq_db(wrq->adapter, eq, ndesc); + if (__predict_true(eq->flags & EQ_HW_ALLOCATED)) + ring_eq_db(wrq->adapter, eq, ndesc); + else + IDXINCR(eq->dbidx, ndesc, eq->sidx); } else { MPASS(IDXDIFF(next->pidx, pidx, eq->sidx) == ndesc); next->pidx = pidx; @@ -3852,6 +3859,8 @@ alloc_ctrlq(struct adapter *sc, int idx) if (!(ctrlq->eq.flags & EQ_HW_ALLOCATED)) { MPASS(ctrlq->eq.flags & EQ_SW_ALLOCATED); + MPASS(ctrlq->nwr_pending == 0); + MPASS(ctrlq->ndesc_needed == 0); rc = alloc_eq_hwq(sc, NULL, &ctrlq->eq); if (rc != 0) { @@ -4554,6 +4563,7 @@ free_wrq(struct adapter *sc, struct sge_wrq *wrq) { free_eq(sc, &wrq->eq); MPASS(wrq->nwr_pending == 0); + MPASS(wrq->ndesc_needed == 0); MPASS(TAILQ_EMPTY(&wrq->incomplete_wrs)); MPASS(STAILQ_EMPTY(&wrq->wr_list)); bzero(wrq, sizeof(*wrq)); @@ -4848,6 +4858,9 @@ alloc_ofld_txq(struct vi_info *vi, struct sge_ofld_txq *ofld_txq, int idx) } if (!(eq->flags & EQ_HW_ALLOCATED)) { + MPASS(eq->flags & EQ_SW_ALLOCATED); + MPASS(ofld_txq->wrq.nwr_pending == 0); + MPASS(ofld_txq->wrq.ndesc_needed == 0); rc = alloc_eq_hwq(sc, vi, eq); if (rc != 0) { CH_ERR(vi, "failed to create hw ofld_txq%d: %d\n", idx, diff --git a/sys/dev/drm2/ttm/ttm_bo_vm.c b/sys/dev/drm2/ttm/ttm_bo_vm.c index 4f6c66382453..e543b8dfb993 100644 --- a/sys/dev/drm2/ttm/ttm_bo_vm.c +++ b/sys/dev/drm2/ttm/ttm_bo_vm.c @@ -376,7 +376,7 @@ ttm_bo_release_mmap(struct ttm_buffer_object *bo) continue; if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0) goto retry; - cdev_pager_free_page(vm_obj, m); + cdev_mgtdev_pager_free_page(vm_obj, m); } VM_OBJECT_WUNLOCK(vm_obj); diff --git a/sys/dev/md/embedfs.S b/sys/dev/md/embedfs.S index 33f37cd04ae1..033c73391938 100644 --- a/sys/dev/md/embedfs.S +++ b/sys/dev/md/embedfs.S @@ -42,3 +42,9 @@ mfs_root: .type mfs_root_end, %object mfs_root_end: .size mfs_root_end, . - mfs_root_end + +#if defined(__aarch64__) +#include +#include +GNU_PROPERTY_AARCH64_FEATURE_1_NOTE(GNU_PROPERTY_AARCH64_FEATURE_1_VAL) +#endif diff --git a/sys/dev/mlx5/mlx5_accel/ipsec.h b/sys/dev/mlx5/mlx5_accel/ipsec.h index 1658542fc9c6..95742c4099f1 100644 --- a/sys/dev/mlx5/mlx5_accel/ipsec.h +++ b/sys/dev/mlx5/mlx5_accel/ipsec.h @@ -37,6 +37,8 @@ #define MLX5E_IPSEC_SADB_RX_BITS 10 #define MLX5_IPSEC_METADATA_MARKER(ipsec_metadata) ((ipsec_metadata >> 31) & 0x1) +#define VLAN_NONE 0xfff + struct mlx5e_priv; struct mlx5e_tx_wqe; struct mlx5e_ipsec_tx; @@ -135,6 +137,7 @@ struct mlx5e_ipsec_rule { struct mlx5_flow_handle *rule; struct mlx5_flow_handle *kspi_rule; struct mlx5_flow_handle *reqid_rule; + struct mlx5_flow_handle *vid_zero_rule; struct mlx5_modify_hdr *modify_hdr; struct mlx5_pkt_reformat *pkt_reformat; struct mlx5_fc *fc; @@ -149,6 +152,7 @@ struct mlx5e_ipsec_esn_state { struct mlx5e_ipsec_sa_entry { struct secasvar *savp; if_t ifp; + if_t ifpo; struct mlx5e_ipsec *ipsec; struct mlx5_accel_esp_xfrm_attrs attrs; struct mlx5e_ipsec_rule ipsec_rule; @@ -158,6 +162,7 @@ struct mlx5e_ipsec_sa_entry { u32 enc_key_id; u16 kspi; /* Stack allocated unique SA identifier */ struct mlx5e_ipsec_esn_state esn_state; + u16 vid; }; struct upspec { @@ -184,6 +189,7 @@ struct mlx5_accel_pol_xfrm_attrs { u8 dir : 2; u32 reqid; u32 prio; + u16 vid; }; struct mlx5e_ipsec_pol_entry { @@ -251,27 +257,22 @@ void mlx5e_accel_ipsec_fs_rx_tables_destroy(struct mlx5e_priv *priv); int mlx5e_accel_ipsec_fs_rx_tables_create(struct mlx5e_priv *priv); void mlx5e_accel_ipsec_fs_rx_catchall_rules_destroy(struct mlx5e_priv *priv); int mlx5e_accel_ipsec_fs_rx_catchall_rules(struct mlx5e_priv *priv); -int mlx5_accel_ipsec_rx_tag_add(if_t ifp, struct mbuf *mb); -int mlx5e_accel_ipsec_handle_rx_cqe(struct mbuf *mb, struct mlx5_cqe64 *cqe); +int mlx5_accel_ipsec_rx_tag_add(if_t ifp, struct mlx5e_rq_mbuf *mr); +void mlx5e_accel_ipsec_handle_rx_cqe(struct mbuf *mb, struct mlx5_cqe64 *cqe, + struct mlx5e_rq_mbuf *mr); + static inline int mlx5e_accel_ipsec_flow(struct mlx5_cqe64 *cqe) { return MLX5_IPSEC_METADATA_MARKER(be32_to_cpu(cqe->ft_metadata)); } -static inline void mlx5e_accel_ipsec_handle_rx(struct mbuf *mb, struct mlx5_cqe64 *cqe) +static inline void +mlx5e_accel_ipsec_handle_rx(struct mbuf *mb, struct mlx5_cqe64 *cqe, + struct mlx5e_rq_mbuf *mr) { u32 ipsec_meta_data = be32_to_cpu(cqe->ft_metadata); - if (!MLX5_IPSEC_METADATA_MARKER(ipsec_meta_data)) { - struct m_tag *mtag; - - mtag = m_tag_find(mb, PACKET_TAG_IPSEC_ACCEL_IN, NULL); - if (mtag != NULL) - m_tag_delete(mb, mtag); - - return; - } - - mlx5e_accel_ipsec_handle_rx_cqe(mb, cqe); + if (MLX5_IPSEC_METADATA_MARKER(ipsec_meta_data)) + mlx5e_accel_ipsec_handle_rx_cqe(mb, cqe, mr); } #endif /* __MLX5_ACCEL_IPSEC_H__ */ diff --git a/sys/dev/mlx5/mlx5_accel/mlx5_ipsec.c b/sys/dev/mlx5/mlx5_accel/mlx5_ipsec.c index 555847717779..a25ed4c1c51f 100644 --- a/sys/dev/mlx5/mlx5_accel/mlx5_ipsec.c +++ b/sys/dev/mlx5/mlx5_accel/mlx5_ipsec.c @@ -45,6 +45,8 @@ #define MLX5_IPSEC_RESCHED msecs_to_jiffies(1000) +static void mlx5e_if_sa_deinstall_onekey(struct ifnet *ifp, u_int dev_spi, + void *priv); static int mlx5e_if_sa_deinstall(struct ifnet *ifp, u_int dev_spi, void *priv); static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(void *x) @@ -95,8 +97,8 @@ mlx5e_ipsec_handle_counters(struct work_struct *_work) bytes += bytes1; #ifdef IPSEC_OFFLOAD - ipsec_accel_drv_sa_lifetime_update(sa_entry->savp, sa_entry->ifp, - sa_entry->kspi, bytes, packets); + ipsec_accel_drv_sa_lifetime_update( + sa_entry->savp, sa_entry->ifpo, sa_entry->kspi, bytes, packets); #endif queue_delayed_work(sa_entry->ipsec->wq, &dwork->dwork, @@ -319,19 +321,23 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, } static int -mlx5e_if_sa_newkey_onedir(struct ifnet *ifp, void *sav, int dir, - u_int drv_spi, struct mlx5e_ipsec_sa_entry **privp, - struct mlx5e_ipsec_priv_bothdir *pb) +mlx5e_if_sa_newkey_onedir(struct ifnet *ifp, void *sav, int dir, u_int drv_spi, + struct mlx5e_ipsec_sa_entry **privp, struct mlx5e_ipsec_priv_bothdir *pb, + struct ifnet *ifpo) { struct mlx5e_ipsec_sa_entry *sa_entry = NULL; struct mlx5e_priv *priv = if_getsoftc(ifp); struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_ipsec *ipsec = priv->ipsec; + u16 vid = VLAN_NONE; int err; if (priv->gone != 0 || ipsec == NULL) return (EOPNOTSUPP); + if (if_gettype(ifpo) == IFT_L2VLAN) + VLAN_TAG(ifpo, &vid); + err = mlx5e_xfrm_validate_state(mdev, sav); if (err) return err; @@ -343,7 +349,9 @@ mlx5e_if_sa_newkey_onedir(struct ifnet *ifp, void *sav, int dir, sa_entry->kspi = drv_spi; sa_entry->savp = sav; sa_entry->ifp = ifp; + sa_entry->ifpo = ifpo; sa_entry->ipsec = ipsec; + sa_entry->vid = vid; mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs, dir); @@ -378,32 +386,48 @@ mlx5e_if_sa_newkey_onedir(struct ifnet *ifp, void *sav, int dir, mlx5_ipsec_free_sa_ctx(sa_entry); err_sa_ctx: kfree(sa_entry->dwork); + sa_entry->dwork = NULL; err_xfrm: kfree(sa_entry); mlx5_en_err(ifp, "Device failed to offload this state"); return err; } +#define GET_TRUNK_IF(vifp, ifp, ept) \ + if (if_gettype(vifp) == IFT_L2VLAN) { \ + NET_EPOCH_ENTER(ept); \ + ifp = VLAN_TRUNKDEV(vifp); \ + NET_EPOCH_EXIT(ept); \ + } else { \ + ifp = vifp; \ + } + static int -mlx5e_if_sa_newkey(struct ifnet *ifp, void *sav, u_int dev_spi, void **privp) +mlx5e_if_sa_newkey(struct ifnet *ifpo, void *sav, u_int dev_spi, void **privp) { struct mlx5e_ipsec_priv_bothdir *pb; + struct epoch_tracker et; + struct ifnet *ifp; int error; + GET_TRUNK_IF(ifpo, ifp, et); + pb = malloc(sizeof(struct mlx5e_ipsec_priv_bothdir), M_DEVBUF, M_WAITOK | M_ZERO); - error = mlx5e_if_sa_newkey_onedir(ifp, sav, IPSEC_DIR_INBOUND, - dev_spi, &pb->priv_in, pb); + error = mlx5e_if_sa_newkey_onedir( + ifp, sav, IPSEC_DIR_INBOUND, dev_spi, &pb->priv_in, pb, ifpo); if (error != 0) { free(pb, M_DEVBUF); return (error); } - error = mlx5e_if_sa_newkey_onedir(ifp, sav, IPSEC_DIR_OUTBOUND, - dev_spi, &pb->priv_out, pb); + error = mlx5e_if_sa_newkey_onedir( + ifp, sav, IPSEC_DIR_OUTBOUND, dev_spi, &pb->priv_out, pb, ifpo); if (error == 0) { *privp = pb; } else { - mlx5e_if_sa_deinstall(ifp, dev_spi, pb->priv_in); + if (pb->priv_in->dwork != NULL) + cancel_delayed_work_sync(&pb->priv_in->dwork->dwork); + mlx5e_if_sa_deinstall_onekey(ifp, dev_spi, pb->priv_in); free(pb, M_DEVBUF); } return (error); @@ -426,9 +450,13 @@ mlx5e_if_sa_deinstall_onekey(struct ifnet *ifp, u_int dev_spi, void *priv) } static int -mlx5e_if_sa_deinstall(struct ifnet *ifp, u_int dev_spi, void *priv) +mlx5e_if_sa_deinstall(struct ifnet *ifpo, u_int dev_spi, void *priv) { struct mlx5e_ipsec_priv_bothdir pb, *pbp; + struct epoch_tracker et; + struct ifnet *ifp; + + GET_TRUNK_IF(ifpo, ifp, et); pbp = priv; pb = *(struct mlx5e_ipsec_priv_bothdir *)priv; @@ -457,12 +485,16 @@ mlx5e_if_sa_cnt_one(struct ifnet *ifp, void *sa, uint32_t drv_spi, } static int -mlx5e_if_sa_cnt(struct ifnet *ifp, void *sa, uint32_t drv_spi, - void *priv, struct seclifetime *lt) +mlx5e_if_sa_cnt(struct ifnet *ifpo, void *sa, uint32_t drv_spi, void *priv, + struct seclifetime *lt) { struct mlx5e_ipsec_priv_bothdir *pb; u64 packets_in, packets_out; u64 bytes_in, bytes_out; + struct epoch_tracker et; + struct ifnet *ifp; + + GET_TRUNK_IF(ifpo, ifp, et); pb = priv; mlx5e_if_sa_cnt_one(ifp, sa, drv_spi, pb->priv_in, @@ -541,9 +573,9 @@ static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev, return 0; } -static void mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry, - struct mlx5_accel_pol_xfrm_attrs *attrs, - struct inpcb *inp) +static void +mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry, + struct mlx5_accel_pol_xfrm_attrs *attrs, struct inpcb *inp, u16 vid) { struct secpolicy *sp = pol_entry->sp; struct secpolicyindex *spidx = &sp->spidx; @@ -587,15 +619,22 @@ static void mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_ attrs->action = IPSEC_POLICY_IPSEC; } attrs->dir = spidx->dir; + attrs->vid = vid; } -static int mlx5e_if_spd_install(struct ifnet *ifp, void *sp, void *inp1, - void **ifdatap) +static int +mlx5e_if_spd_install(struct ifnet *ifpo, void *sp, void *inp1, void **ifdatap) { struct mlx5e_ipsec_pol_entry *pol_entry; struct mlx5e_priv *priv; + struct epoch_tracker et; + u16 vid = VLAN_NONE; + struct ifnet *ifp; int err; + GET_TRUNK_IF(ifpo, ifp, et); + if (if_gettype(ifpo) == IFT_L2VLAN) + VLAN_TAG(ifpo, &vid); priv = if_getsoftc(ifp); if (priv->gone || !priv->ipsec) return (EOPNOTSUPP); @@ -611,7 +650,8 @@ static int mlx5e_if_spd_install(struct ifnet *ifp, void *sp, void *inp1, pol_entry->sp = sp; pol_entry->ipsec = priv->ipsec; - mlx5e_ipsec_build_accel_pol_attrs(pol_entry, &pol_entry->attrs, inp1); + mlx5e_ipsec_build_accel_pol_attrs(pol_entry, &pol_entry->attrs, + inp1, vid); err = mlx5e_accel_ipsec_fs_add_pol(pol_entry); if (err) goto err_pol; @@ -625,11 +665,12 @@ static int mlx5e_if_spd_install(struct ifnet *ifp, void *sp, void *inp1, return err; } - -static int mlx5e_if_spd_deinstall(struct ifnet *ifp, void *sp, void *ifdata) +static int +mlx5e_if_spd_deinstall(struct ifnet *ifpo, void *sp, void *ifdata) { - struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(ifdata); + struct mlx5e_ipsec_pol_entry *pol_entry; + pol_entry = to_ipsec_pol_entry(ifdata); mlx5e_accel_ipsec_fs_del_pol(pol_entry); kfree(pol_entry); return 0; @@ -649,9 +690,17 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) } static int -mlx5e_if_ipsec_hwassist(if_t ifnet, void *sav __unused, +mlx5e_if_ipsec_hwassist(if_t ifneto, void *sav __unused, uint32_t drv_spi __unused, void *priv __unused) { + if_t ifnet; + + if (if_gettype(ifneto) == IFT_L2VLAN) { + ifnet = VLAN_TRUNKDEV(ifneto); + } else { + ifnet = ifneto; + } + return (if_gethwassist(ifnet) & (CSUM_TSO | CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_IP6_TSO | CSUM_IP6_TCP | CSUM_IP6_UDP)); } diff --git a/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_fs.c b/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_fs.c index a6a0398f9dca..e348ab1992a5 100644 --- a/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_fs.c +++ b/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_fs.c @@ -185,6 +185,44 @@ static void setup_fte_spi(struct mlx5_flow_spec *spec, u32 spi, bool encap) } } +static void +setup_fte_vid(struct mlx5_flow_spec *spec, u16 vid) +{ + /* virtual lan tag */ + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET(fte_match_param, spec->match_value, + outer_headers.cvlan_tag, 1); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, + vid); +} + +static void +clear_fte_vid(struct mlx5_flow_spec *spec) +{ + MLX5_SET(fte_match_param, spec->match_criteria, + outer_headers.cvlan_tag, 0); + MLX5_SET(fte_match_param, spec->match_value, + outer_headers.cvlan_tag, 0); + MLX5_SET(fte_match_param, spec->match_criteria, + outer_headers.first_vid, 0); + MLX5_SET(fte_match_param, spec->match_value, + outer_headers.first_vid, 0); +} + +static void +setup_fte_no_vid(struct mlx5_flow_spec *spec) +{ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET(fte_match_param, spec->match_value, + outer_headers.cvlan_tag, 0); +} + static struct mlx5_fs_chains * ipsec_chains_create(struct mlx5_core_dev *mdev, struct mlx5_flow_table *miss_ft, enum mlx5_flow_namespace_type ns, int base_prio, @@ -474,17 +512,6 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) if (!spec) return -ENOMEM; - if (attrs->family == AF_INET) - setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4); - else - setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); - - if (!attrs->encap) - setup_fte_esp(spec); - - setup_fte_spi(spec, attrs->spi, attrs->encap); - setup_fte_no_frags(spec); - if (!attrs->drop) { err = setup_modify_header(mdev, sa_entry->kspi | BIT(31), IPSEC_DIR_INBOUND, &flow_act); @@ -520,15 +547,46 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dest[1].counter_id = mlx5_fc_id(counter); + if (attrs->family == AF_INET) + setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4); + else + setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); + + if (!attrs->encap) + setup_fte_esp(spec); + + setup_fte_spi(spec, attrs->spi, attrs->encap); + setup_fte_no_frags(spec); + + if (sa_entry->vid != VLAN_NONE) + setup_fte_vid(spec, sa_entry->vid); + else + setup_fte_no_vid(spec); + rule = mlx5_add_flow_rules(rx->ft.sa, spec, &flow_act, dest, 2); if (IS_ERR(rule)) { err = PTR_ERR(rule); mlx5_core_err(mdev, "fail to add RX ipsec rule err=%d\n", err); goto err_add_flow; } + ipsec_rule->rule = rule; + + /* Add another rule for zero vid */ + if (sa_entry->vid == VLAN_NONE) { + clear_fte_vid(spec); + setup_fte_vid(spec, 0); + rule = mlx5_add_flow_rules(rx->ft.sa, spec, &flow_act, dest, 2); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, + "fail to add RX ipsec zero vid rule err=%d\n", + err); + goto err_add_flow; + } + ipsec_rule->vid_zero_rule = rule; + } kvfree(spec); - ipsec_rule->rule = rule; ipsec_rule->fc = counter; ipsec_rule->modify_hdr = flow_act.modify_hdr; ipsec_rule->pkt_reformat = flow_act.pkt_reformat; @@ -536,10 +594,12 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) err_add_flow: mlx5_fc_destroy(mdev, counter); + if (ipsec_rule->rule != NULL) + mlx5_del_flow_rules(&ipsec_rule->rule); err_add_cnt: mlx5_packet_reformat_dealloc(mdev, flow_act.pkt_reformat); err_pkt_reformat: - if (flow_act.modify_hdr) + if (flow_act.modify_hdr != NULL) mlx5_modify_header_dealloc(mdev, flow_act.modify_hdr); err_mod_header: kvfree(spec); @@ -1222,8 +1282,6 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) switch (attrs->action) { case IPSEC_POLICY_IPSEC: flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - /*if (!attrs->reqid) - break;*/ err = setup_modify_header(mdev, attrs->reqid, IPSEC_DIR_OUTBOUND, &flow_act); if (err) @@ -1278,7 +1336,7 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) struct mlx5_flow_spec *spec; struct mlx5_flow_table *ft; struct mlx5e_ipsec_rx *rx; - int err, dstn = 0; + int err, dstn = 0; rx = (attrs->family == AF_INET) ? ipsec->rx_ipv4 : ipsec->rx_ipv6; ft = rx->chains ? ipsec_chains_get_table(rx->chains, attrs->prio) : rx->ft.pol; @@ -1291,14 +1349,6 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) goto err_alloc; } - if (attrs->family == AF_INET) - setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4); - else - setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); - - setup_fte_no_frags(spec); - setup_fte_upper_proto_match(spec, &attrs->upspec); - switch (attrs->action) { case IPSEC_POLICY_IPSEC: flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; @@ -1318,21 +1368,52 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[dstn].ft = rx->ft.sa; dstn++; - rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, dstn); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - mlx5_core_err(mdev, "Fail to add RX IPsec policy rule err=%d\n", err); - goto err_action; - } - kvfree(spec); - pol_entry->ipsec_rule.rule = rule; + if (attrs->family == AF_INET) + setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4); + else + setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); + + setup_fte_no_frags(spec); + setup_fte_upper_proto_match(spec, &attrs->upspec); + if (attrs->vid != VLAN_NONE) + setup_fte_vid(spec, attrs->vid); + else + setup_fte_no_vid(spec); + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, dstn); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, + "Failed to add RX IPsec policy rule err=%d\n", err); + goto err_action; + } + pol_entry->ipsec_rule.rule = rule; + + /* Add also rule for zero vid */ + if (attrs->vid == VLAN_NONE) { + clear_fte_vid(spec); + setup_fte_vid(spec, 0); + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, dstn); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, + "Failed to add RX IPsec policy rule err=%d\n", + err); + goto err_action; + } + pol_entry->ipsec_rule.vid_zero_rule = rule; + } + + kvfree(spec); return 0; err_action: - kvfree(spec); + if (pol_entry->ipsec_rule.rule != NULL) + mlx5_del_flow_rules(&pol_entry->ipsec_rule.rule); + kvfree(spec); err_alloc: - if (rx->chains) + if (rx->chains != NULL) ipsec_chains_put_table(rx->chains, attrs->prio); return err; } @@ -1854,7 +1935,9 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry) mlx5_del_flow_rules(&ipsec_rule->rule); mlx5_del_flow_rules(&ipsec_rule->kspi_rule); - if (ipsec_rule->reqid_rule) + if (ipsec_rule->vid_zero_rule != NULL) + mlx5_del_flow_rules(&ipsec_rule->vid_zero_rule); + if (ipsec_rule->reqid_rule != NULL) mlx5_del_flow_rules(&ipsec_rule->reqid_rule); mlx5_fc_destroy(mdev, ipsec_rule->fc); mlx5_packet_reformat_dealloc(mdev, ipsec_rule->pkt_reformat); @@ -1863,7 +1946,7 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry) return; } - if (ipsec_rule->modify_hdr) + if (ipsec_rule->modify_hdr != NULL) mlx5_modify_header_dealloc(mdev, ipsec_rule->modify_hdr); } @@ -1881,6 +1964,8 @@ void mlx5e_accel_ipsec_fs_del_pol(struct mlx5e_ipsec_pol_entry *pol_entry) struct mlx5_core_dev *mdev = mlx5e_ipsec_pol2dev(pol_entry); mlx5_del_flow_rules(&ipsec_rule->rule); + if (ipsec_rule->vid_zero_rule != NULL) + mlx5_del_flow_rules(&ipsec_rule->vid_zero_rule); if (pol_entry->attrs.dir == IPSEC_DIR_INBOUND) { struct mlx5e_ipsec_rx *rx; diff --git a/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_rxtx.c b/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_rxtx.c index 5ff8e021b196..0883cfb2d510 100644 --- a/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_rxtx.c +++ b/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_rxtx.c @@ -35,36 +35,41 @@ #define MLX5_IPSEC_METADATA_HANDLE(ipsec_metadata) (ipsec_metadata & 0xFFFFFF) -int mlx5_accel_ipsec_rx_tag_add(if_t ifp, struct mbuf *mb) +int +mlx5_accel_ipsec_rx_tag_add(if_t ifp, struct mlx5e_rq_mbuf *mr) { struct mlx5e_priv *priv; - struct ipsec_accel_in_tag *tag; - struct m_tag *mtag; + struct ipsec_accel_in_tag *mtag; priv = if_getsoftc(ifp); if (priv->ipsec == NULL) return (0); + if (mr->ipsec_mtag != NULL) + return (0); - mtag = m_tag_get(PACKET_TAG_IPSEC_ACCEL_IN, sizeof(*tag), M_NOWAIT); + mtag = (struct ipsec_accel_in_tag *)m_tag_get( + PACKET_TAG_IPSEC_ACCEL_IN, sizeof(*mtag), M_NOWAIT); if (mtag == NULL) - return -ENOMEM; - - m_tag_prepend(mb, mtag); - return 0; + return (-ENOMEM); + mr->ipsec_mtag = mtag; + return (0); } -int mlx5e_accel_ipsec_handle_rx_cqe(struct mbuf *mb, struct mlx5_cqe64 *cqe) +void +mlx5e_accel_ipsec_handle_rx_cqe(struct mbuf *mb, struct mlx5_cqe64 *cqe, + struct mlx5e_rq_mbuf *mr) { - struct ipsec_accel_in_tag *tag; - u32 drv_spi; + struct ipsec_accel_in_tag *mtag; + u32 drv_spi; drv_spi = MLX5_IPSEC_METADATA_HANDLE(be32_to_cpu(cqe->ft_metadata)); - tag = (struct ipsec_accel_in_tag *) m_tag_find(mb, PACKET_TAG_IPSEC_ACCEL_IN, NULL); - WARN_ON(tag == NULL); - if (tag) - tag->drv_spi = drv_spi; - - return 0; + mtag = mr->ipsec_mtag; + WARN_ON(mtag == NULL); + mr->ipsec_mtag = NULL; + if (mtag != NULL) { + mtag->drv_spi = drv_spi; + m_tag_prepend(mb, &mtag->tag); + } } void diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h index cdc8caa838d6..80e0b7fbdedb 100644 --- a/sys/dev/mlx5/mlx5_en/en.h +++ b/sys/dev/mlx5/mlx5_en/en.h @@ -70,7 +70,6 @@ #include #include #include -#include #define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) @@ -748,10 +747,13 @@ struct mlx5e_cq { struct mlx5_wq_ctrl wq_ctrl; } __aligned(MLX5E_CACHELINE_SIZE); +struct ipsec_accel_in_tag; + struct mlx5e_rq_mbuf { bus_dmamap_t dma_map; caddr_t data; struct mbuf *mbuf; + struct ipsec_accel_in_tag *ipsec_mtag; }; struct mlx5e_rq { diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c b/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c index 1601557e52cc..ac275b5b145c 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c @@ -27,6 +27,7 @@ #include "opt_ratelimit.h" #include +#include #include #include diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c index b2b6b24158b2..953fbabfe9c4 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c @@ -30,6 +30,7 @@ #include "opt_ratelimit.h" #include +#include #include #include @@ -1324,6 +1325,8 @@ mlx5e_destroy_rq(struct mlx5e_rq *rq) wq_sz = mlx5_wq_ll_get_size(&rq->wq); for (i = 0; i != wq_sz; i++) { if (rq->mbuf[i].mbuf != NULL) { + if (rq->mbuf[i].ipsec_mtag != NULL) + m_tag_free(&rq->mbuf[i].ipsec_mtag->tag); bus_dmamap_unload(rq->dma_tag, rq->mbuf[i].dma_map); m_freem(rq->mbuf[i].mbuf); } diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c index 3d4b75884354..a24bbe3d193e 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c @@ -70,7 +70,7 @@ mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, /* get IP header aligned */ m_adj(mb, MLX5E_NET_IP_ALIGN); - err = mlx5_accel_ipsec_rx_tag_add(rq->ifp, mb); + err = mlx5_accel_ipsec_rx_tag_add(rq->ifp, &rq->mbuf[ix]); if (err) goto err_free_mbuf; err = -bus_dmamap_load_mbuf_sg(rq->dma_tag, rq->mbuf[ix].dma_map, @@ -277,9 +277,8 @@ mlx5e_mbuf_tstmp(struct mlx5e_priv *priv, uint64_t hw_tstmp) } static inline void -mlx5e_build_rx_mbuf(struct mlx5_cqe64 *cqe, - struct mlx5e_rq *rq, struct mbuf *mb, - u32 cqe_bcnt) +mlx5e_build_rx_mbuf(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, + struct mbuf *mb, struct mlx5e_rq_mbuf *mr, u32 cqe_bcnt) { if_t ifp = rq->ifp; struct mlx5e_channel *c; @@ -423,7 +422,7 @@ mlx5e_build_rx_mbuf(struct mlx5_cqe64 *cqe, break; } - mlx5e_accel_ipsec_handle_rx(mb, cqe); + mlx5e_accel_ipsec_handle_rx(mb, cqe, mr); } static inline void @@ -588,7 +587,8 @@ mlx5e_poll_rx_cq(struct mlx5e_rq *rq, int budget) rq->mbuf[wqe_counter].dma_map); } rx_common: - mlx5e_build_rx_mbuf(cqe, rq, mb, byte_cnt); + mlx5e_build_rx_mbuf(cqe, rq, mb, &rq->mbuf[wqe_counter], + byte_cnt); rq->stats.bytes += byte_cnt; rq->stats.packets++; #ifdef NUMA diff --git a/sys/dev/xen/gntdev/gntdev.c b/sys/dev/xen/gntdev/gntdev.c index 4530feb1c76d..49f8aefad62e 100644 --- a/sys/dev/xen/gntdev/gntdev.c +++ b/sys/dev/xen/gntdev/gntdev.c @@ -600,7 +600,7 @@ notify_unmap_cleanup(struct gntdev_gmap *gmap) continue; if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0) goto retry; - cdev_pager_free_page(gmap->map->mem, m); + cdev_mgtdev_pager_free_page(gmap->map->mem, m); } VM_OBJECT_WUNLOCK(gmap->map->mem); diff --git a/sys/dev/xen/privcmd/privcmd.c b/sys/dev/xen/privcmd/privcmd.c index 02e268b23d42..c04ac287183b 100644 --- a/sys/dev/xen/privcmd/privcmd.c +++ b/sys/dev/xen/privcmd/privcmd.c @@ -135,7 +135,7 @@ privcmd_pg_dtor(void *handle) continue; if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0) goto retry; - cdev_pager_free_page(map->mem, m); + cdev_mgtdev_pager_free_page(map->mem, m); } VM_OBJECT_WUNLOCK(map->mem); diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c index 7744718184b7..2ff2fd52ca7d 100644 --- a/sys/fs/nfs/nfs_commonsubs.c +++ b/sys/fs/nfs/nfs_commonsubs.c @@ -610,8 +610,18 @@ nfscl_fillsattr(struct nfsrv_descript *nd, struct vattr *vap, break; case ND_NFSV4: NFSZERO_ATTRBIT(&attrbits); - if (vap->va_mode != (mode_t)VNOVAL) - NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MODE); + np = NULL; + if (strcmp(vp->v_mount->mnt_vfc->vfc_name, "nfs") == 0) + np = VTONFS(vp); + if (vap->va_mode != (mode_t)VNOVAL) { + if ((flags & NFSSATTR_NEWFILE) != 0 && np != NULL && + NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr, + NFSATTRBIT_MODEUMASK)) + NFSSETBIT_ATTRBIT(&attrbits, + NFSATTRBIT_MODEUMASK); + else + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MODE); + } if ((flags & NFSSATTR_FULL) && vap->va_uid != (uid_t)VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); if ((flags & NFSSATTR_FULL) && vap->va_gid != (gid_t)VNOVAL) @@ -622,18 +632,14 @@ nfscl_fillsattr(struct nfsrv_descript *nd, struct vattr *vap, NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET); if (vap->va_mtime.tv_sec != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET); - if (vap->va_birthtime.tv_sec != VNOVAL && - strcmp(vp->v_mount->mnt_vfc->vfc_name, "nfs") == 0) { - /* - * We can only test for support of TimeCreate if - * the "vp" argument is for an NFS vnode. - */ - np = VTONFS(vp); - if (NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr, - NFSATTRBIT_TIMECREATE)) - NFSSETBIT_ATTRBIT(&attrbits, - NFSATTRBIT_TIMECREATE); - } + /* + * We can only test for support of TimeCreate if + * the "vp" argument is for an NFS vnode. + */ + if (vap->va_birthtime.tv_sec != VNOVAL && np != NULL && + NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr, + NFSATTRBIT_TIMECREATE)) + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMECREATE); (void) nfsv4_fillattr(nd, vp->v_mount, vp, NULL, vap, NULL, 0, &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL); break; @@ -3108,6 +3114,18 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; + case NFSATTRBIT_MODEUMASK: + NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); + /* + * Since FreeBSD applies the umask above the VFS/VOP, + * there is no umask to handle here. If FreeBSD + * moves handling of umask to below the VFS/VOP, + * this could change. + */ + *tl++ = vtonfsv34_mode(vap->va_mode); + *tl = 0; + retnum += 2 * NFSX_UNSIGNED; + break; default: printf("EEK! Bad V4 attribute bitpos=%d\n", bitpos); } diff --git a/sys/fs/nfs/nfscl.h b/sys/fs/nfs/nfscl.h index a52b9e433145..3b1445e1923c 100644 --- a/sys/fs/nfs/nfscl.h +++ b/sys/fs/nfs/nfscl.h @@ -68,10 +68,11 @@ struct nfsv4node { * These flag bits are used for the argument to nfscl_fillsattr() to * indicate special handling of the attributes. */ -#define NFSSATTR_FULL 0x1 -#define NFSSATTR_SIZE0 0x2 -#define NFSSATTR_SIZENEG1 0x4 -#define NFSSATTR_SIZERDEV 0x8 +#define NFSSATTR_FULL 0x01 +#define NFSSATTR_SIZE0 0x02 +#define NFSSATTR_SIZENEG1 0x04 +#define NFSSATTR_SIZERDEV 0x08 +#define NFSSATTR_NEWFILE 0x10 /* Use this macro for debug printfs. */ #define NFSCL_DEBUG(level, ...) do { \ diff --git a/sys/fs/nfs/nfsproto.h b/sys/fs/nfs/nfsproto.h index 0268940fd8a6..ce7acf102d41 100644 --- a/sys/fs/nfs/nfsproto.h +++ b/sys/fs/nfs/nfsproto.h @@ -1183,7 +1183,8 @@ struct nfsv3_sattr { */ #define NFSATTRBIT_SUPPSETONLY1 (NFSATTRBM_TIMEACCESSSET | \ NFSATTRBM_TIMEMODIFYSET) -#define NFSATTRBIT_SUPPSETONLY2 (NFSATTRBM_MODESETMASKED) +#define NFSATTRBIT_SUPPSETONLY2 (NFSATTRBM_MODESETMASKED | \ + NFSATTRBM_MODEUMASK) /* * NFSATTRBIT_SETABLE - SETABLE0 - bits 0<->31 @@ -1197,11 +1198,12 @@ struct nfsv3_sattr { (NFSATTRBM_MODE | \ NFSATTRBM_OWNER | \ NFSATTRBM_OWNERGROUP | \ - NFSATTRBM_TIMECREATE | \ + NFSATTRBM_TIMECREATE | \ NFSATTRBM_TIMEACCESSSET | \ NFSATTRBM_TIMEMODIFYSET) #define NFSATTRBIT_SETABLE2 \ - (NFSATTRBM_MODESETMASKED) + (NFSATTRBM_MODESETMASKED | \ + NFSATTRBM_MODEUMASK) /* * NFSATTRBIT_NFSV41 - Attributes only supported by NFSv4.1. @@ -1218,7 +1220,9 @@ struct nfsv3_sattr { /* * NFSATTRBIT_NFSV42 - Attributes only supported by NFSv4.2. */ -#define NFSATTRBIT_NFSV42_2 NFSATTRBM_XATTRSUPPORT +#define NFSATTRBIT_NFSV42_2 \ + (NFSATTRBM_XATTRSUPPORT | \ + NFSATTRBM_MODEUMASK) /* * Set of attributes that the getattr vnode op needs. diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c index 043e531d5e2c..16f4e83434df 100644 --- a/sys/fs/nfsclient/nfs_clrpcops.c +++ b/sys/fs/nfsclient/nfs_clrpcops.c @@ -2428,7 +2428,7 @@ nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap, *tl = vtonfsv34_type(vtyp); } if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) - nfscl_fillsattr(nd, vap, dvp, 0, 0); + nfscl_fillsattr(nd, vap, dvp, NFSSATTR_NEWFILE, 0); if ((nd->nd_flag & ND_NFSV3) && (vtyp == VCHR || vtyp == VBLK)) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); @@ -2650,14 +2650,16 @@ nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap, if (NFSHASSESSPERSIST(nmp)) { /* Use GUARDED for persistent sessions. */ *tl = txdr_unsigned(NFSCREATE_GUARDED); - nfscl_fillsattr(nd, vap, dvp, 0, 0); + nfscl_fillsattr(nd, vap, dvp, NFSSATTR_NEWFILE, + 0); } else { /* Otherwise, use EXCLUSIVE4_1. */ *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41); NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); *tl++ = cverf.lval[0]; *tl = cverf.lval[1]; - nfscl_fillsattr(nd, vap, dvp, 0, 0); + nfscl_fillsattr(nd, vap, dvp, NFSSATTR_NEWFILE, + 0); } } else { /* NFSv4.0 */ @@ -2668,7 +2670,7 @@ nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap, } } else { *tl = txdr_unsigned(NFSCREATE_UNCHECKED); - nfscl_fillsattr(nd, vap, dvp, 0, 0); + nfscl_fillsattr(nd, vap, dvp, NFSSATTR_NEWFILE, 0); } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL); @@ -3243,7 +3245,7 @@ nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap, *tl = txdr_unsigned(NFDIR); } (void) nfsm_strtom(nd, name, namelen); - nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0); + nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1 | NFSSATTR_NEWFILE, 0); if (nd->nd_flag & ND_NFSV4) { NFSGETATTR_ATTRBIT(&attrbits); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); @@ -8432,18 +8434,18 @@ nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap, if (NFSHASSESSPERSIST(nmp)) { /* Use GUARDED for persistent sessions. */ *tl = txdr_unsigned(NFSCREATE_GUARDED); - nfscl_fillsattr(nd, vap, dvp, 0, 0); + nfscl_fillsattr(nd, vap, dvp, NFSSATTR_NEWFILE, 0); } else { /* Otherwise, use EXCLUSIVE4_1. */ *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41); NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); *tl++ = cverf.lval[0]; *tl = cverf.lval[1]; - nfscl_fillsattr(nd, vap, dvp, 0, 0); + nfscl_fillsattr(nd, vap, dvp, NFSSATTR_NEWFILE, 0); } } else { *tl = txdr_unsigned(NFSCREATE_UNCHECKED); - nfscl_fillsattr(nd, vap, dvp, 0, 0); + nfscl_fillsattr(nd, vap, dvp, NFSSATTR_NEWFILE, 0); } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL); diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c index daecdf2c3a18..ef63218edf78 100644 --- a/sys/fs/nfsserver/nfs_nfsdport.c +++ b/sys/fs/nfsserver/nfs_nfsdport.c @@ -3207,6 +3207,23 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, nd->nd_repstat = moderet; attrsum += 2 * NFSX_UNSIGNED; break; + case NFSATTRBIT_MODEUMASK: + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + mode = fxdr_unsigned(u_short, *tl++); + mask = fxdr_unsigned(u_short, *tl); + /* + * If moderet != 0, mode has already been done. + * If vp != NULL, this is not a file object creation. + */ + if ((nd->nd_flag & ND_NFSV42) == 0) + nd->nd_repstat = NFSERR_ATTRNOTSUPP; + else if ((mask & ~0777) != 0 || vp != NULL || + moderet != 0) + nd->nd_repstat = NFSERR_INVAL; + else + nvap->na_mode = (mode & ~mask); + attrsum += 2 * NFSX_UNSIGNED; + break; default: nd->nd_repstat = NFSERR_ATTRNOTSUPP; /* diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 015799d018b2..9c87be3775ee 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -353,13 +353,13 @@ mi_startup(void) } static void -print_caddr_t(void *data) +print_caddr_t(const void *data) { - printf("%s", (char *)data); + printf("%s", (const char *)data); } static void -print_version(void *data __unused) +print_version(const void *data __unused) { int len; diff --git a/sys/kern/kern_mib.c b/sys/kern/kern_mib.c index e035bd94abb2..f6190b47d3be 100644 --- a/sys/kern/kern_mib.c +++ b/sys/kern/kern_mib.c @@ -484,10 +484,10 @@ SYSCTL_PROC(_kern, KERN_SECURELVL, securelevel, #ifdef INCLUDE_CONFIG_FILE /* Actual kernel configuration options. */ -extern char kernconfstring[]; +extern const char kernconfstring[]; -SYSCTL_STRING(_kern, OID_AUTO, conftxt, CTLFLAG_RD, - kernconfstring, 0, "Kernel configuration file"); +SYSCTL_CONST_STRING(_kern, OID_AUTO, conftxt, CTLFLAG_RD, + kernconfstring, "Kernel configuration file"); #endif static int diff --git a/sys/kern/kern_rangelock.c b/sys/kern/kern_rangelock.c index 911017f6ffb9..f96919ac9e69 100644 --- a/sys/kern/kern_rangelock.c +++ b/sys/kern/kern_rangelock.c @@ -288,6 +288,9 @@ struct rl_q_entry { static uma_zone_t rl_entry_zone; static smr_t rl_smr; +static void rangelock_free_free(struct rl_q_entry *free); +static void rangelock_noncheating_destroy(struct rangelock *lock); + static void rangelock_sys_init(void) { @@ -338,16 +341,10 @@ rangelock_init(struct rangelock *lock) void rangelock_destroy(struct rangelock *lock) { - struct rl_q_entry *e, *ep; - MPASS(!lock->sleepers); - if (rangelock_cheat_destroy(lock)) - return; - for (e = (struct rl_q_entry *)atomic_load_ptr(&lock->head); - e != NULL; e = rl_e_unmark(ep)) { - ep = atomic_load_ptr(&e->rl_q_next); - uma_zfree_smr(rl_entry_zone, e); - } + if (!rangelock_cheat_destroy(lock)) + rangelock_noncheating_destroy(lock); + DEBUG_POISON_POINTER(*(void **)&lock->head); } static bool @@ -392,6 +389,26 @@ rl_e_is_rlock(const struct rl_q_entry *e) return ((e->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ); } +static void +rangelock_free_free(struct rl_q_entry *free) +{ + struct rl_q_entry *x, *xp; + struct thread *td; + + td = curthread; + for (x = free; x != NULL; x = xp) { + MPASS(!rl_e_is_marked(x)); + xp = x->rl_q_free; + MPASS(!rl_e_is_marked(xp)); + if (td->td_rlqe == NULL) { + smr_synchronize(rl_smr); + td->td_rlqe = x; + } else { + uma_zfree_smr(rl_entry_zone, x); + } + } +} + static void rangelock_unlock_int(struct rangelock *lock, struct rl_q_entry *e) { @@ -461,10 +478,55 @@ static bool rl_q_cas(struct rl_q_entry **prev, struct rl_q_entry *old, struct rl_q_entry *new) { + MPASS(!rl_e_is_marked(old)); return (atomic_cmpset_rel_ptr((uintptr_t *)prev, (uintptr_t)old, (uintptr_t)new) != 0); } +static void +rangelock_noncheating_destroy(struct rangelock *lock) +{ + struct rl_q_entry *cur, *free, *next, **prev; + + free = NULL; +again: + smr_enter(rl_smr); + prev = (struct rl_q_entry **)&lock->head; + cur = rl_q_load(prev); + MPASS(!rl_e_is_marked(cur)); + + for (;;) { + if (cur == NULL) + break; + if (rl_e_is_marked(cur)) + goto again; + + next = rl_q_load(&cur->rl_q_next); + if (rl_e_is_marked(next)) { + next = rl_e_unmark(next); + if (rl_q_cas(prev, cur, next)) { +#ifdef INVARIANTS + cur->rl_q_owner = NULL; +#endif + cur->rl_q_free = free; + free = cur; + cur = next; + continue; + } + smr_exit(rl_smr); + goto again; + } + + sleepq_lock(&lock->sleepers); + if (!rl_e_is_marked(cur)) { + rl_insert_sleep(lock); + goto again; + } + } + smr_exit(rl_smr); + rangelock_free_free(free); +} + enum RL_INSERT_RES { RL_TRYLOCK_FAILED, RL_LOCK_SUCCESS, @@ -477,6 +539,7 @@ rl_r_validate(struct rangelock *lock, struct rl_q_entry *e, bool trylock, { struct rl_q_entry *cur, *next, **prev; +again: prev = &e->rl_q_next; cur = rl_q_load(prev); MPASS(!rl_e_is_marked(cur)); /* nobody can unlock e yet */ @@ -489,9 +552,10 @@ rl_r_validate(struct rangelock *lock, struct rl_q_entry *e, bool trylock, if (rl_q_cas(prev, cur, next)) { cur->rl_q_free = *free; *free = cur; + cur = next; + continue; } - cur = next; - continue; + goto again; } if (rl_e_is_rlock(cur)) { prev = &cur->rl_q_next; @@ -521,6 +585,7 @@ rl_w_validate(struct rangelock *lock, struct rl_q_entry *e, { struct rl_q_entry *cur, *next, **prev; +again: prev = (struct rl_q_entry **)&lock->head; cur = rl_q_load(prev); MPASS(!rl_e_is_marked(cur)); /* head is not marked */ @@ -531,11 +596,12 @@ rl_w_validate(struct rangelock *lock, struct rl_q_entry *e, if (rl_e_is_marked(next)) { next = rl_e_unmark(next); if (rl_q_cas(prev, cur, next)) { - cur->rl_q_next = *free; + cur->rl_q_free = *free; *free = cur; + cur = next; + continue; } - cur = next; - continue; + goto again; } if (cur->rl_q_end <= e->rl_q_start) { prev = &cur->rl_q_next; @@ -543,6 +609,12 @@ rl_w_validate(struct rangelock *lock, struct rl_q_entry *e, continue; } sleepq_lock(&lock->sleepers); + /* Reload after sleepq is locked */ + next = rl_q_load(&cur->rl_q_next); + if (rl_e_is_marked(next)) { + sleepq_release(&lock->sleepers); + goto again; + } rangelock_unlock_int(lock, e); if (trylock) { sleepq_release(&lock->sleepers); @@ -580,12 +652,14 @@ rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock, #endif cur->rl_q_free = *free; *free = cur; + cur = next; + continue; } - cur = next; - continue; + goto again; } } + MPASS(!rl_e_is_marked(cur)); r = rl_e_compare(cur, e); if (r == -1) { prev = &cur->rl_q_next; @@ -623,14 +697,12 @@ static struct rl_q_entry * rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start, vm_ooffset_t end, int locktype) { - struct rl_q_entry *e, *free, *x, *xp; - struct thread *td; + struct rl_q_entry *e, *free; void *cookie; enum RL_INSERT_RES res; if (rangelock_cheat_lock(lock, locktype, trylock, &cookie)) return (cookie); - td = curthread; for (res = RL_LOCK_RETRY; res == RL_LOCK_RETRY;) { free = NULL; e = rlqentry_alloc(start, end, locktype); @@ -643,17 +715,7 @@ rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start, free = e; e = NULL; } - for (x = free; x != NULL; x = xp) { - MPASS(!rl_e_is_marked(x)); - xp = x->rl_q_free; - MPASS(!rl_e_is_marked(xp)); - if (td->td_rlqe == NULL) { - smr_synchronize(rl_smr); - td->td_rlqe = x; - } else { - uma_zfree_smr(rl_entry_zone, x); - } - } + rangelock_free_free(free); } return (e); } diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 7236749c7e56..c1fc222a3c84 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1657,8 +1657,8 @@ sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, * counts if EINTR/ERESTART are returned. Data and control buffers are freed * on return. */ -int -sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, +static int +sosend_generic_locked(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td) { long space; @@ -1674,6 +1674,9 @@ sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, tls = NULL; tls_rtype = TLS_RLTYPE_APP; #endif + + SOCK_IO_SEND_ASSERT_LOCKED(so); + if (uio != NULL) resid = uio->uio_resid; else if ((top->m_flags & M_PKTHDR) != 0) @@ -1703,10 +1706,6 @@ sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, if (control != NULL) clen = control->m_len; - error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags)); - if (error) - goto out; - #ifdef KERN_TLS tls_send_flag = 0; tls = ktls_hold(so->so_snd.sb_tls_info); @@ -1729,7 +1728,7 @@ sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, if (resid == 0 && !ktls_permit_empty_frames(tls)) { error = EINVAL; - goto release; + goto out; } } #endif @@ -1740,13 +1739,13 @@ sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, if (so->so_snd.sb_state & SBS_CANTSENDMORE) { SOCKBUF_UNLOCK(&so->so_snd); error = EPIPE; - goto release; + goto out; } if (so->so_error) { error = so->so_error; so->so_error = 0; SOCKBUF_UNLOCK(&so->so_snd); - goto release; + goto out; } if ((so->so_state & SS_ISCONNECTED) == 0) { /* @@ -1760,7 +1759,7 @@ sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, if (!(resid == 0 && clen != 0)) { SOCKBUF_UNLOCK(&so->so_snd); error = ENOTCONN; - goto release; + goto out; } } else if (addr == NULL) { SOCKBUF_UNLOCK(&so->so_snd); @@ -1768,7 +1767,7 @@ sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, error = ENOTCONN; else error = EDESTADDRREQ; - goto release; + goto out; } } space = sbspace(&so->so_snd); @@ -1778,7 +1777,7 @@ sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, clen > so->so_snd.sb_hiwat) { SOCKBUF_UNLOCK(&so->so_snd); error = EMSGSIZE; - goto release; + goto out; } if (space < resid + clen && (atomic || space < so->so_snd.sb_lowat || space < clen)) { @@ -1786,12 +1785,12 @@ sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) { SOCKBUF_UNLOCK(&so->so_snd); error = EWOULDBLOCK; - goto release; + goto out; } error = sbwait(so, SO_SND); SOCKBUF_UNLOCK(&so->so_snd); if (error) - goto release; + goto out; goto restart; } SOCKBUF_UNLOCK(&so->so_snd); @@ -1836,7 +1835,7 @@ sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, ((flags & MSG_EOR) ? M_EOR : 0)); if (top == NULL) { error = EFAULT; /* only possible error */ - goto release; + goto out; } space -= resid - uio->uio_resid; resid = uio->uio_resid; @@ -1900,12 +1899,10 @@ sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, control = NULL; top = NULL; if (error) - goto release; + goto out; } while (resid && space > 0); } while (resid); -release: - SOCK_IO_SEND_UNLOCK(so); out: #ifdef KERN_TLS if (tls != NULL) @@ -1918,6 +1915,20 @@ sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, return (error); } +int +sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, + struct mbuf *top, struct mbuf *control, int flags, struct thread *td) +{ + int error; + + error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags)); + if (error) + return (error); + error = sosend_generic_locked(so, addr, uio, top, control, flags, td); + SOCK_IO_SEND_UNLOCK(so); + return (error); +} + /* * Send to a socket from a kernel thread. * @@ -2071,11 +2082,11 @@ sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord) * mbuf **mp0 for use in returning the chain. The uio is then used only for * the count in uio_resid. */ -int -soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio, - struct mbuf **mp0, struct mbuf **controlp, int *flagsp) +static int +soreceive_generic_locked(struct socket *so, struct sockaddr **psa, + struct uio *uio, struct mbuf **mp, struct mbuf **controlp, int *flagsp) { - struct mbuf *m, **mp; + struct mbuf *m; int flags, error, offset; ssize_t len; struct protosw *pr = so->so_proto; @@ -2084,25 +2095,15 @@ soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio, ssize_t orig_resid = uio->uio_resid; bool report_real_len = false; - mp = mp0; - if (psa != NULL) - *psa = NULL; - if (controlp != NULL) - *controlp = NULL; + SOCK_IO_RECV_ASSERT_LOCKED(so); + + error = 0; if (flagsp != NULL) { report_real_len = *flagsp & MSG_TRUNC; *flagsp &= ~MSG_TRUNC; flags = *flagsp &~ MSG_EOR; } else flags = 0; - if (flags & MSG_OOB) - return (soreceive_rcvoob(so, uio, flags)); - if (mp != NULL) - *mp = NULL; - - error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags)); - if (error) - return (error); restart: SOCKBUF_LOCK(&so->so_rcv); @@ -2560,72 +2561,56 @@ soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio, if (flagsp != NULL) *flagsp |= flags; release: - SOCK_IO_RECV_UNLOCK(so); return (error); } -/* - * Optimized version of soreceive() for stream (TCP) sockets. - */ int -soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio, - struct mbuf **mp0, struct mbuf **controlp, int *flagsp) +soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio, + struct mbuf **mp, struct mbuf **controlp, int *flagsp) { - int len = 0, error = 0, flags, oresid; - struct sockbuf *sb; - struct mbuf *m, *n = NULL; + int error, flags; - /* We only do stream sockets. */ - if (so->so_type != SOCK_STREAM) - return (EINVAL); if (psa != NULL) *psa = NULL; - if (flagsp != NULL) - flags = *flagsp &~ MSG_EOR; - else - flags = 0; if (controlp != NULL) *controlp = NULL; - if (flags & MSG_OOB) - return (soreceive_rcvoob(so, uio, flags)); - if (mp0 != NULL) - *mp0 = NULL; - - sb = &so->so_rcv; - -#ifdef KERN_TLS - /* - * KTLS store TLS records as records with a control message to - * describe the framing. - * - * We check once here before acquiring locks to optimize the - * common case. - */ - if (sb->sb_tls_info != NULL) - return (soreceive_generic(so, psa, uio, mp0, controlp, - flagsp)); -#endif + if (flagsp != NULL) { + flags = *flagsp; + if ((flags & MSG_OOB) != 0) + return (soreceive_rcvoob(so, uio, flags)); + } else { + flags = 0; + } + if (mp != NULL) + *mp = NULL; - /* Prevent other readers from entering the socket. */ error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags)); if (error) return (error); -#ifdef KERN_TLS - if (__predict_false(sb->sb_tls_info != NULL)) { - SOCK_IO_RECV_UNLOCK(so); - return (soreceive_generic(so, psa, uio, mp0, controlp, - flagsp)); - } -#endif + error = soreceive_generic_locked(so, psa, uio, mp, controlp, flagsp); + SOCK_IO_RECV_UNLOCK(so); + return (error); +} + +/* + * Optimized version of soreceive() for stream (TCP) sockets. + */ +static int +soreceive_stream_locked(struct socket *so, struct sockbuf *sb, + struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, + struct mbuf **controlp, int flags) +{ + int len = 0, error = 0, oresid; + struct mbuf *m, *n = NULL; + + SOCK_IO_RECV_ASSERT_LOCKED(so); - SOCKBUF_LOCK(sb); /* Easy one, no space to copyout anything. */ - if (uio->uio_resid == 0) { - error = EINVAL; - goto out; - } + if (uio->uio_resid == 0) + return (EINVAL); oresid = uio->uio_resid; + SOCKBUF_LOCK(sb); /* We will never ever get anything unless we are or were connected. */ if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) { error = ENOTCONN; @@ -2779,6 +2764,62 @@ soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio, SBLASTRECORDCHK(sb); SBLASTMBUFCHK(sb); SOCKBUF_UNLOCK(sb); + return (error); +} + +int +soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio, + struct mbuf **mp0, struct mbuf **controlp, int *flagsp) +{ + struct sockbuf *sb; + int error, flags; + + sb = &so->so_rcv; + + /* We only do stream sockets. */ + if (so->so_type != SOCK_STREAM) + return (EINVAL); + if (psa != NULL) + *psa = NULL; + if (flagsp != NULL) + flags = *flagsp & ~MSG_EOR; + else + flags = 0; + if (controlp != NULL) + *controlp = NULL; + if (flags & MSG_OOB) + return (soreceive_rcvoob(so, uio, flags)); + if (mp0 != NULL) + *mp0 = NULL; + +#ifdef KERN_TLS + /* + * KTLS store TLS records as records with a control message to + * describe the framing. + * + * We check once here before acquiring locks to optimize the + * common case. + */ + if (sb->sb_tls_info != NULL) + return (soreceive_generic(so, psa, uio, mp0, controlp, + flagsp)); +#endif + + /* + * Prevent other threads from reading from the socket. This lock may be + * dropped in order to sleep waiting for data to arrive. + */ + error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags)); + if (error) + return (error); +#ifdef KERN_TLS + if (__predict_false(sb->sb_tls_info != NULL)) { + SOCK_IO_RECV_UNLOCK(so); + return (soreceive_generic(so, psa, uio, mp0, controlp, + flagsp)); + } +#endif + error = soreceive_stream_locked(so, sb, psa, uio, mp0, controlp, flags); SOCK_IO_RECV_UNLOCK(so); return (error); } diff --git a/sys/modules/linux64/Makefile b/sys/modules/linux64/Makefile index d558319f3ba2..b23891a65a4f 100644 --- a/sys/modules/linux64/Makefile +++ b/sys/modules/linux64/Makefile @@ -89,8 +89,7 @@ linux_support.o: linux_support.S assym.inc linux_assym.h ${.ALLSRC:M*.S:u} -o ${.TARGET} linux_genassym.o: offset.inc - ${CC} -c ${CFLAGS:N-flto*:N-fno-common:N-fsanitize*:N-fno-sanitize*} \ - -fcommon ${.IMPSRC} + ${CC} -c ${NOSAN_CFLAGS:N-flto*:N-fno-common} -fcommon ${.IMPSRC} .if !defined(KERNBUILDDIR) .warning Building Linuxulator outside of a kernel does not make sense diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile index 88586a39bf96..0604a34690e5 100644 --- a/sys/modules/vmm/Makefile +++ b/sys/modules/vmm/Makefile @@ -28,7 +28,10 @@ DPSRCS+= assym.inc SRCS+= vmm_arm64.c \ vmm_reset.c \ vmm_call.S \ + vmm_handlers.c \ vmm_mmu.c \ + vmm_vhe_exception.S \ + vmm_vhe.c \ vmm_hyp_el2.S .PATH: ${SRCTOP}/sys/${MACHINE}/vmm/io @@ -38,20 +41,20 @@ SRCS+= vgic.c \ vgic_v3.c \ vtimer.c -CLEANFILES+= vmm_hyp_exception.o vmm_hyp.o +CLEANFILES+= vmm_nvhe_exception.o vmm_nvhe.o + CLEANFILES+= vmm_hyp_blob.elf.full CLEANFILES+= vmm_hyp_blob.elf vmm_hyp_blob.bin -vmm_hyp_exception.o: vmm_hyp_exception.S +vmm_nvhe_exception.o: vmm_nvhe_exception.S ${CC} -c -x assembler-with-cpp -DLOCORE \ - ${CFLAGS:N-fsanitize*:N-fno-sanitize*:N-mbranch-protection*} \ - ${.IMPSRC} -o ${.TARGET} -fpie + ${NOSAN_CFLAGS:N-mbranch-protection*} ${.IMPSRC} -o ${.TARGET} -fpie -vmm_hyp.o: vmm_hyp.c - ${CC} -c ${CFLAGS:N-fsanitize*:N-fno-sanitize*:N-mbranch-protection*} \ - ${.IMPSRC} -o ${.TARGET} -fpie +vmm_nvhe.o: vmm_nvhe.c + ${CC} -c ${NOSAN_CFLAGS:N-mbranch-protection*} ${.IMPSRC} \ + -o ${.TARGET} -fpie -vmm_hyp_blob.elf.full: vmm_hyp_exception.o vmm_hyp.o +vmm_hyp_blob.elf.full: vmm_nvhe_exception.o vmm_nvhe.o ${LD} -m ${LD_EMULATION} -Bdynamic -L ${SYSDIR}/conf -T ${SYSDIR}/conf/ldscript.arm64 \ ${_LDFLAGS:N-zbti-report*} --no-warn-mismatch --warn-common --export-dynamic \ --dynamic-linker /red/herring -X -o ${.TARGET} ${.ALLSRC} \ @@ -135,14 +138,12 @@ svm_support.o: ${.IMPSRC} -o ${.TARGET} hyp_genassym.o: offset.inc - ${CC} -c ${CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC} + ${CC} -c ${NOSAN_CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC} vmx_genassym.o: offset.inc - ${CC} -c ${CFLAGS:N-flto*:N-fno-common:N-fsanitize*:N-fno-sanitize*} \ - -fcommon ${.IMPSRC} + ${CC} -c ${NOSAN_CFLAGS:N-flto*:N-fno-common} -fcommon ${.IMPSRC} svm_genassym.o: offset.inc - ${CC} -c ${CFLAGS:N-flto*:N-fno-common:N-fsanitize*:N-fno-sanitize*} \ - -fcommon ${.IMPSRC} + ${CC} -c ${NOSAN_CFLAGS:N-flto*:N-fno-common} -fcommon ${.IMPSRC} .include diff --git a/sys/net/dummymbuf.c b/sys/net/dummymbuf.c index cb92889c5b77..8c46421888ed 100644 --- a/sys/net/dummymbuf.c +++ b/sys/net/dummymbuf.c @@ -117,11 +117,15 @@ SYSCTL_PROC(_net_dummymbuf, OID_AUTO, hits, * pfil(9) context */ +#ifdef INET VNET_DEFINE_STATIC(pfil_hook_t, dmb_pfil_inet_hook); #define V_dmb_pfil_inet_hook VNET(dmb_pfil_inet_hook) +#endif +#ifdef INET6 VNET_DEFINE_STATIC(pfil_hook_t, dmb_pfil_inet6_hook); #define V_dmb_pfil_inet6_hook VNET(dmb_pfil_inet6_hook) +#endif VNET_DEFINE_STATIC(pfil_hook_t, dmb_pfil_ethernet_hook); #define V_dmb_pfil_ethernet_hook VNET(dmb_pfil_ethernet_hook) @@ -321,6 +325,7 @@ dmb_pfil_mbuf_chk(int pfil_type, struct mbuf **mp, struct ifnet *ifp, return (PFIL_PASS); } +#ifdef INET static pfil_return_t dmb_pfil_inet_mbuf_chk(struct mbuf **mp, struct ifnet *ifp, int flags, void *ruleset, struct inpcb *inp) @@ -328,7 +333,9 @@ dmb_pfil_inet_mbuf_chk(struct mbuf **mp, struct ifnet *ifp, int flags, return (dmb_pfil_mbuf_chk(PFIL_TYPE_IP4, mp, ifp, flags, ruleset, inp)); } +#endif +#ifdef INET6 static pfil_return_t dmb_pfil_inet6_mbuf_chk(struct mbuf **mp, struct ifnet *ifp, int flags, void *ruleset, struct inpcb *inp) @@ -336,6 +343,7 @@ dmb_pfil_inet6_mbuf_chk(struct mbuf **mp, struct ifnet *ifp, int flags, return (dmb_pfil_mbuf_chk(PFIL_TYPE_IP6, mp, ifp, flags, ruleset, inp)); } +#endif static pfil_return_t dmb_pfil_ethernet_mbuf_chk(struct mbuf **mp, struct ifnet *ifp, int flags, diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c index f64da6c50db2..ae3bc7079ecf 100644 --- a/sys/net/if_lagg.c +++ b/sys/net/if_lagg.c @@ -18,7 +18,6 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include #include "opt_inet.h" #include "opt_inet6.h" #include "opt_kern_tls.h" diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c index 8fd0af1fa449..01be7ab2d13b 100644 --- a/sys/net/if_vlan.c +++ b/sys/net/if_vlan.c @@ -42,9 +42,9 @@ * use by the real outgoing interface, and ask it to send them. */ -#include #include "opt_inet.h" #include "opt_inet6.h" +#include "opt_ipsec.h" #include "opt_kern_tls.h" #include "opt_vlan.h" #include "opt_ratelimit.h" @@ -185,6 +185,7 @@ struct ifvlan { void *ifv_cookie; int ifv_pflags; /* special flags we have set on parent */ int ifv_capenable; + int ifv_capenable2; int ifv_encaplen; /* encapsulation length */ int ifv_mtufudge; /* MTU fudged by this much */ int ifv_mintu; /* min transmission unit */ @@ -1751,6 +1752,7 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid, ifv->ifv_mintu = ETHERMIN; ifv->ifv_pflags = 0; ifv->ifv_capenable = -1; + ifv->ifv_capenable2 = -1; /* * If the parent supports the VLAN_MTU capability, @@ -2009,13 +2011,90 @@ vlan_link_state(struct ifnet *ifp) NET_EPOCH_EXIT(et); } +#ifdef IPSEC_OFFLOAD +#define VLAN_IPSEC_METHOD(exp) \ + if_t p; \ + struct ifvlan *ifv; \ + int error; \ + \ + ifv = ifp->if_softc; \ + VLAN_SLOCK(); \ + if (TRUNK(ifv) != NULL) { \ + p = PARENT(ifv); \ + if_ref(p); \ + error = p->if_ipsec_accel_m->exp; \ + if_rele(p); \ + } else { \ + error = ENXIO; \ + } \ + VLAN_SUNLOCK(); \ + return (error); + + +static int +vlan_if_spdadd(if_t ifp, void *sp, void *inp, void **priv) +{ + VLAN_IPSEC_METHOD(if_spdadd(ifp, sp, inp, priv)); +} + +static int +vlan_if_spddel(if_t ifp, void *sp, void *priv) +{ + VLAN_IPSEC_METHOD(if_spddel(ifp, sp, priv)); +} + +static int +vlan_if_sa_newkey(if_t ifp, void *sav, u_int drv_spi, void **privp) +{ + VLAN_IPSEC_METHOD(if_sa_newkey(ifp, sav, drv_spi, privp)); +} + +static int +vlan_if_sa_deinstall(if_t ifp, u_int drv_spi, void *priv) +{ + VLAN_IPSEC_METHOD(if_sa_deinstall(ifp, drv_spi, priv)); +} + +static int +vlan_if_sa_cnt(if_t ifp, void *sa, uint32_t drv_spi, void *priv, + struct seclifetime *lt) +{ + VLAN_IPSEC_METHOD(if_sa_cnt(ifp, sa, drv_spi, priv, lt)); +} + +static int +vlan_if_ipsec_hwassist(if_t ifp, void *sav, u_int drv_spi,void *priv) +{ + if_t trunk; + + NET_EPOCH_ASSERT(); + trunk = vlan_trunkdev(ifp); + if (trunk == NULL) + return (0); + return (trunk->if_ipsec_accel_m->if_hwassist(trunk, sav, + drv_spi, priv)); +} + +static const struct if_ipsec_accel_methods vlan_if_ipsec_accel_methods = { + .if_spdadd = vlan_if_spdadd, + .if_spddel = vlan_if_spddel, + .if_sa_newkey = vlan_if_sa_newkey, + .if_sa_deinstall = vlan_if_sa_deinstall, + .if_sa_cnt = vlan_if_sa_cnt, + .if_hwassist = vlan_if_ipsec_hwassist, +}; + +#undef VLAN_IPSEC_METHOD +#endif /* IPSEC_OFFLOAD */ + static void vlan_capabilities(struct ifvlan *ifv) { struct ifnet *p; struct ifnet *ifp; struct ifnet_hw_tsomax hw_tsomax; - int cap = 0, ena = 0, mena; + int cap = 0, ena = 0, mena, cap2 = 0, ena2 = 0; + int mena2 __unused; u_long hwa = 0; NET_EPOCH_ASSERT(); @@ -2026,6 +2105,7 @@ vlan_capabilities(struct ifvlan *ifv) /* Mask parent interface enabled capabilities disabled by user. */ mena = p->if_capenable & ifv->ifv_capenable; + mena2 = p->if_capenable2 & ifv->ifv_capenable2; /* * If the parent interface can do checksum offloading @@ -2131,6 +2211,15 @@ vlan_capabilities(struct ifvlan *ifv) ifp->if_capabilities = cap; ifp->if_capenable = ena; ifp->if_hwassist = hwa; + +#ifdef IPSEC_OFFLOAD + cap2 |= p->if_capabilities2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD); + ena2 |= mena2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD); + ifp->if_ipsec_accel_m = &vlan_if_ipsec_accel_methods; +#endif + + ifp->if_capabilities2 = cap2; + ifp->if_capenable2 = ena2; } static void diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index 4d948c08e5c7..d1c94696c948 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -2571,11 +2571,12 @@ u_short pf_map_addr(u_int8_t, struct pf_krule *, struct pf_addr *, struct pf_addr *, struct pfi_kkif **nkif, struct pf_addr *, struct pf_ksrc_node **); -struct pf_krule *pf_get_translation(struct pf_pdesc *, struct mbuf *, +u_short pf_get_translation(struct pf_pdesc *, struct mbuf *, int, struct pfi_kkif *, struct pf_ksrc_node **, struct pf_state_key **, struct pf_state_key **, struct pf_addr *, struct pf_addr *, - uint16_t, uint16_t, struct pf_kanchor_stackframe *); + uint16_t, uint16_t, struct pf_kanchor_stackframe *, + struct pf_krule **); struct pf_state_key *pf_state_key_setup(struct pf_pdesc *, struct pf_addr *, struct pf_addr *, u_int16_t, u_int16_t); diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index 49d7a352372e..ad94aacb5032 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -234,6 +234,12 @@ in_pcbhashseed_init(void) VNET_SYSINIT(in_pcbhashseed_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, in_pcbhashseed_init, 0); +VNET_DEFINE_STATIC(int, connect_inaddr_wild) = 1; +#define V_connect_inaddr_wild VNET(connect_inaddr_wild) +SYSCTL_INT(_net_inet_ip, OID_AUTO, connect_inaddr_wild, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(connect_inaddr_wild), 0, + "Allow connecting to INADDR_ANY or INADDR_BROADCAST for connect(2)"); + static void in_pcbremhash(struct inpcb *); /* @@ -1309,7 +1315,7 @@ in_pcbconnect_setup(struct inpcb *inp, struct sockaddr_in *sin, inp->inp_flowtype = hash_type; } #endif - if (!CK_STAILQ_EMPTY(&V_in_ifaddrhead)) { + if (V_connect_inaddr_wild && !CK_STAILQ_EMPTY(&V_in_ifaddrhead)) { /* * If the destination address is INADDR_ANY, * use the primary local address. diff --git a/sys/netinet/libalias/alias_db.c b/sys/netinet/libalias/alias_db.c index 167201fa1b8f..4bb95549aaaf 100644 --- a/sys/netinet/libalias/alias_db.c +++ b/sys/netinet/libalias/alias_db.c @@ -868,8 +868,18 @@ _FindLinkIn(struct libalias *la, struct in_addr dst_addr, case 0: LIST_FOREACH(lnk, &grp->full, all.in) { if (lnk->dst_addr.s_addr == dst_addr.s_addr && - lnk->dst_port == dst_port) - return (UseLink(la, lnk)); + lnk->dst_port == dst_port) { + struct alias_link *found; + + found = UseLink(la, lnk); + if (found != NULL) + return (found); + /* link expired */ + grp = StartPointIn(la, alias_addr, alias_port, link_type, 0); + if (grp == NULL) + return (NULL); + break; + } } break; case LINK_UNKNOWN_DEST_PORT: diff --git a/sys/netinet/sctp_sysctl.c b/sys/netinet/sctp_sysctl.c index a4be3471e2fd..a39429ec046e 100644 --- a/sys/netinet/sctp_sysctl.c +++ b/sys/netinet/sctp_sysctl.c @@ -894,7 +894,7 @@ sctp_sysctl_handle_trace_log_clear(SYSCTL_HANDLER_ARGS) return (error); \ } \ SYSCTL_PROC(_net_inet_sctp, OID_AUTO, mib_name, flags, NULL, 0, \ - sctp_sysctl_handle_##mib_name, "UI", prefix##_DESC) + sctp_sysctl_handle_##mib_name, "IU", prefix##_DESC) #define SCTP_UINT_SYSCTL_RDTUN(mib_name, var_name, prefix) \ SYSCTL_UINT(_net_inet_sctp, OID_AUTO, mib_name, \ diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c index 921d28f82517..906e01257a04 100644 --- a/sys/netinet/tcp_lro.c +++ b/sys/netinet/tcp_lro.c @@ -175,7 +175,7 @@ tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *ifp, { struct lro_entry *le; size_t size; - unsigned i, elements; + unsigned i; lc->lro_bad_csum = 0; lc->lro_queued = 0; @@ -190,11 +190,7 @@ tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *ifp, LIST_INIT(&lc->lro_active); /* create hash table to accelerate entry lookup */ - if (lro_entries > lro_mbufs) - elements = lro_entries; - else - elements = lro_mbufs; - lc->lro_hash = phashinit_flags(elements, M_LRO, &lc->lro_hashsz, + lc->lro_hash = phashinit_flags(lro_entries, M_LRO, &lc->lro_hashsz, HASH_NOWAIT); if (lc->lro_hash == NULL) { memset(lc, 0, sizeof(*lc)); @@ -599,7 +595,7 @@ tcp_lro_rx_done(struct lro_ctrl *lc) static void tcp_lro_flush_active(struct lro_ctrl *lc) { - struct lro_entry *le; + struct lro_entry *le, *le_tmp; /* * Walk through the list of le entries, and @@ -611,7 +607,7 @@ tcp_lro_flush_active(struct lro_ctrl *lc) * is being freed. This is ok it will just get * reallocated again like it was new. */ - LIST_FOREACH(le, &lc->lro_active, next) { + LIST_FOREACH_SAFE(le, &lc->lro_active, next, le_tmp) { if (le->m_head != NULL) { tcp_lro_active_remove(le); tcp_lro_flush(lc, le); diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 62a4a5a28878..33a6a66b7138 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -207,7 +207,7 @@ sysctl_net_inet_tcp_syncache_rexmtlimit_check(SYSCTL_HANDLER_ARGS) SYSCTL_PROC(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &VNET_NAME(tcp_syncache.rexmt_limit), 0, - sysctl_net_inet_tcp_syncache_rexmtlimit_check, "UI", + sysctl_net_inet_tcp_syncache_rexmtlimit_check, "IU", "Limit on SYN/ACK retransmissions"); VNET_DEFINE(int, tcp_sc_rst_sock_fail) = 1; @@ -1720,9 +1720,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, * Do a standard 3-way handshake. */ if (syncache_respond(sc, m, TH_SYN|TH_ACK) == 0) { - if (V_tcp_syncookies && V_tcp_syncookiesonly && sc != &scs) - syncache_free(sc); - else if (sc != &scs) + if (sc != &scs) syncache_insert(sc, sch); /* locks and unlocks sch */ TCPSTAT_INC(tcps_sndacks); TCPSTAT_INC(tcps_sndtotal); diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index e6ec0f24c898..098b4e50483c 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -83,6 +83,7 @@ #include #include #include +#include #include #include #include @@ -97,6 +98,7 @@ #include #include #include +#include #include #include @@ -112,6 +114,14 @@ #include #include +SYSCTL_DECL(_net_inet6); +SYSCTL_DECL(_net_inet6_ip6); +VNET_DEFINE_STATIC(int, connect_in6addr_wild) = 1; +#define V_connect_in6addr_wild VNET(connect_in6addr_wild) +SYSCTL_INT(_net_inet6_ip6, OID_AUTO, connect_in6addr_wild, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(connect_in6addr_wild), 0, + "Allow connecting to the unspecified address for connect(2)"); + int in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred) { @@ -351,7 +361,7 @@ in6_pcbladdr(struct inpcb *inp, struct sockaddr_in6 *sin6, if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) return(error); - if (!CK_STAILQ_EMPTY(&V_in6_ifaddrhead)) { + if (V_connect_in6addr_wild && !CK_STAILQ_EMPTY(&V_in6_ifaddrhead)) { /* * If the destination address is UNSPECIFIED addr, * use the loopback addr, e.g ::1. diff --git a/sys/netipsec/ipsec_offload.c b/sys/netipsec/ipsec_offload.c index 984134539d8b..bbf98ac7a676 100644 --- a/sys/netipsec/ipsec_offload.c +++ b/sys/netipsec/ipsec_offload.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -138,6 +139,8 @@ PCTRIE_DEFINE(DRVSPI_SA, ifp_handle_sav, drv_spi, drvspi_sa_trie_alloc, drvspi_sa_trie_free); static struct pctrie drv_spi_pctrie; +static eventhandler_tag ipsec_accel_ifdetach_event_tag; + static void ipsec_accel_sa_newkey_impl(struct secasvar *sav); static int ipsec_accel_handle_sav(struct secasvar *sav, struct ifnet *ifp, u_int drv_spi, void *priv, uint32_t flags, struct ifp_handle_sav **ires); @@ -154,6 +157,9 @@ static struct mbuf *ipsec_accel_key_setaccelif_impl(struct secasvar *sav); static void ipsec_accel_on_ifdown_impl(struct ifnet *ifp); static void ipsec_accel_drv_sa_lifetime_update_impl(struct secasvar *sav, if_t ifp, u_int drv_spi, uint64_t octets, uint64_t allocs); +static int ipsec_accel_drv_sa_lifetime_fetch_impl(struct secasvar *sav, + if_t ifp, u_int drv_spi, uint64_t *octets, uint64_t *allocs); +static void ipsec_accel_ifdetach_event(void *arg, struct ifnet *ifp); static void ipsec_accel_init(void *arg) @@ -173,7 +179,12 @@ ipsec_accel_init(void *arg) ipsec_accel_on_ifdown_p = ipsec_accel_on_ifdown_impl; ipsec_accel_drv_sa_lifetime_update_p = ipsec_accel_drv_sa_lifetime_update_impl; + ipsec_accel_drv_sa_lifetime_fetch_p = + ipsec_accel_drv_sa_lifetime_fetch_impl; pctrie_init(&drv_spi_pctrie); + ipsec_accel_ifdetach_event_tag = EVENTHANDLER_REGISTER( + ifnet_departure_event, ipsec_accel_ifdetach_event, NULL, + EVENTHANDLER_PRI_ANY); } SYSINIT(ipsec_accel_init, SI_SUB_VNET_DONE, SI_ORDER_ANY, ipsec_accel_init, NULL); @@ -181,6 +192,8 @@ SYSINIT(ipsec_accel_init, SI_SUB_VNET_DONE, SI_ORDER_ANY, static void ipsec_accel_fini(void *arg) { + EVENTHANDLER_DEREGISTER(ifnet_departure_event, + ipsec_accel_ifdetach_event_tag); ipsec_accel_sa_newkey_p = NULL; ipsec_accel_forget_sav_p = NULL; ipsec_accel_spdadd_p = NULL; @@ -191,6 +204,7 @@ ipsec_accel_fini(void *arg) ipsec_accel_key_setaccelif_p = NULL; ipsec_accel_on_ifdown_p = NULL; ipsec_accel_drv_sa_lifetime_update_p = NULL; + ipsec_accel_drv_sa_lifetime_fetch_p = NULL; ipsec_accel_sync_imp(); clean_unrhdr(drv_spi_unr); /* avoid panic, should go later */ clear_unrhdr(drv_spi_unr); @@ -799,6 +813,14 @@ ipsec_accel_on_ifdown_impl(struct ifnet *ifp) ipsec_accel_on_ifdown_sav(ifp); } +static void +ipsec_accel_ifdetach_event(void *arg __unused, struct ifnet *ifp) +{ + if ((ifp->if_flags & IFF_RENAMING) != 0) + return; + ipsec_accel_on_ifdown_impl(ifp); +} + static bool ipsec_accel_output_pad(struct mbuf *m, struct secasvar *sav, int skip, int mtu) { @@ -1000,6 +1022,30 @@ ipsec_accel_drv_sa_lifetime_update_impl(struct secasvar *sav, if_t ifp, NET_EPOCH_EXIT(et); } +static int +ipsec_accel_drv_sa_lifetime_fetch_impl(struct secasvar *sav, + if_t ifp, u_int drv_spi, uint64_t *octets, uint64_t *allocs) +{ + struct ifp_handle_sav *i; + int error; + + NET_EPOCH_ASSERT(); + error = 0; + + mtx_lock(&ipsec_accel_cnt_lock); + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if (i->ifp == ifp && i->drv_spi == drv_spi) { + *octets = i->cnt_octets; + *allocs = i->cnt_allocs; + break; + } + } + if (i == NULL) + error = ENOENT; + mtx_unlock(&ipsec_accel_cnt_lock); + return (error); +} + static void ipsec_accel_sa_lifetime_hw(struct secasvar *sav, if_t ifp, struct seclifetime *lft) diff --git a/sys/netipsec/ipsec_offload.h b/sys/netipsec/ipsec_offload.h index 411aa1ea67c5..833281cd4aaa 100644 --- a/sys/netipsec/ipsec_offload.h +++ b/sys/netipsec/ipsec_offload.h @@ -64,6 +64,8 @@ extern struct mbuf *(*ipsec_accel_key_setaccelif_p)(struct secasvar *sav); extern void (*ipsec_accel_on_ifdown_p)(struct ifnet *ifp); extern void (*ipsec_accel_drv_sa_lifetime_update_p)(struct secasvar *sav, if_t ifp, u_int drv_spi, uint64_t octets, uint64_t allocs); +extern int (*ipsec_accel_drv_sa_lifetime_fetch_p)(struct secasvar *sav, + if_t ifp, u_int drv_spi, uint64_t *octets, uint64_t *allocs); #ifdef IPSEC_OFFLOAD /* @@ -191,6 +193,8 @@ struct ipsec_accel_in_tag *ipsec_accel_input_tag_lookup(const struct mbuf *); void ipsec_accel_on_ifdown(struct ifnet *ifp); void ipsec_accel_drv_sa_lifetime_update(struct secasvar *sav, if_t ifp, u_int drv_spi, uint64_t octets, uint64_t allocs); +int ipsec_accel_drv_sa_lifetime_fetch(struct secasvar *sav, + if_t ifp, u_int drv_spi, uint64_t *octets, uint64_t *allocs); #endif /* _KERNEL */ diff --git a/sys/netipsec/key.c b/sys/netipsec/key.c index 149173e0b5f6..5a3e5727bc2e 100644 --- a/sys/netipsec/key.c +++ b/sys/netipsec/key.c @@ -112,6 +112,8 @@ struct mbuf *(*ipsec_accel_key_setaccelif_p)(struct secasvar *sav); void (*ipsec_accel_on_ifdown_p)(struct ifnet *ifp); void (*ipsec_accel_drv_sa_lifetime_update_p)(struct secasvar *sav, if_t ifp, u_int drv_spi, uint64_t octets, uint64_t allocs); +int (*ipsec_accel_drv_sa_lifetime_fetch_p)(struct secasvar *sav, if_t ifp, + u_int drv_spi, uint64_t *octets, uint64_t *allocs); #endif #define FULLMASK 0xff @@ -8990,4 +8992,17 @@ ipsec_accel_drv_sa_lifetime_update(struct secasvar *sav, if_t ifp, if (p != NULL) p(sav, ifp, drv_spi, octets, allocs); } + +int +ipsec_accel_drv_sa_lifetime_fetch(struct secasvar *sav, + if_t ifp, u_int drv_spi, uint64_t *octets, uint64_t *allocs) +{ + int (*p)(struct secasvar *sav, if_t ifp, u_int drv_spi, + uint64_t *octets, uint64_t *allocs); + + p = atomic_load_ptr(&ipsec_accel_drv_sa_lifetime_fetch_p); + if (p == NULL) + return (EOPNOTSUPP); + return (p(sav, ifp, drv_spi, octets, allocs)); +} #endif diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 0547e29e04c2..cb69d06b1fe6 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -4365,6 +4365,13 @@ pf_test_eth_rule(int dir, struct pfi_kkif *kif, struct mbuf **m0) r = TAILQ_FIRST(rules); rm = NULL; + if (__predict_false(m->m_len < sizeof(struct ether_header)) && + (m = *m0 = m_pullup(*m0, sizeof(struct ether_header))) == NULL) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_test_eth_rule: m_len < sizeof(struct ether_header)" + ", pullup failed\n")); + return (PF_DROP); + } e = mtod(m, struct ether_header *); proto = ntohs(e->ether_type); @@ -4605,7 +4612,7 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, struct pfi_kkif *kif, struct pf_ksrc_node *nsn = NULL; struct tcphdr *th = &pd->hdr.tcp; struct pf_state_key *sk = NULL, *nk = NULL; - u_short reason; + u_short reason, transerror; int rewrite = 0, hdrlen = 0; int tag = -1; int asd = 0; @@ -4618,6 +4625,8 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, struct pfi_kkif *kif, PF_RULES_RASSERT(); + SLIST_INIT(&match_rules); + if (inp != NULL) { INP_LOCK_ASSERT(inp); pd->lookup.uid = inp->inp_cred->cr_uid; @@ -4686,8 +4695,17 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, struct pfi_kkif *kif, r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); /* check packet for BINAT/NAT/RDR */ - if ((nr = pf_get_translation(pd, m, off, kif, &nsn, &sk, - &nk, saddr, daddr, sport, dport, anchor_stack)) != NULL) { + transerror = pf_get_translation(pd, m, off, kif, &nsn, &sk, + &nk, saddr, daddr, sport, dport, anchor_stack, &nr); + switch (transerror) { + default: + /* A translation error occurred. */ + REASON_SET(&reason, transerror); + goto cleanup; + case PFRES_MAX: + /* No match. */ + break; + case PFRES_MATCH: KASSERT(sk != NULL, ("%s: null sk", __func__)); KASSERT(nk != NULL, ("%s: null nk", __func__)); @@ -4836,7 +4854,6 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, struct pfi_kkif *kif, pd->nat_rule = nr; } - SLIST_INIT(&match_rules); while (r != NULL) { pf_counter_u64_add(&r->evaluations, 1); if (pfi_kkif_match(r->kif, kif) == r->ifnot) @@ -8344,7 +8361,7 @@ pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, pd.af = AF_INET; pd.act.rtableid = -1; - if (m->m_len < sizeof(struct ip) && + if (__predict_false(m->m_len < sizeof(struct ip)) && (m = *m0 = m_pullup(*m0, sizeof(struct ip))) == NULL) { DPFPRINTF(PF_DEBUG_URGENT, ("pf_test: m_len < sizeof(struct ip), pullup failed\n")); @@ -8936,6 +8953,14 @@ pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb pd.af = AF_INET6; pd.act.rtableid = -1; + if (__predict_false(m->m_len < sizeof(struct ip6_hdr)) && + (m = *m0 = m_pullup(*m0, sizeof(struct ip6_hdr))) == NULL) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_test6: m_len < sizeof(struct ip6_hdr)" + ", pullup failed\n")); + PF_RULES_RUNLOCK(); + return (PF_DROP); + } h = mtod(m, struct ip6_hdr *); off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr); diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c index 4fcad7e578a8..68fc76233dab 100644 --- a/sys/netpfil/pf/pf_lb.c +++ b/sys/netpfil/pf/pf_lb.c @@ -591,22 +591,26 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, return (reason); } -struct pf_krule * +u_short pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, struct pfi_kkif *kif, struct pf_ksrc_node **sn, struct pf_state_key **skp, struct pf_state_key **nkp, struct pf_addr *saddr, struct pf_addr *daddr, - uint16_t sport, uint16_t dport, struct pf_kanchor_stackframe *anchor_stack) + uint16_t sport, uint16_t dport, struct pf_kanchor_stackframe *anchor_stack, + struct pf_krule **rp) { struct pf_krule *r = NULL; struct pf_addr *naddr; - uint16_t *nport; + uint16_t *nportp; uint16_t low, high; + u_short reason; PF_RULES_RASSERT(); KASSERT(*skp == NULL, ("*skp not NULL")); KASSERT(*nkp == NULL, ("*nkp not NULL")); + *rp = NULL; + if (pd->dir == PF_OUT) { r = pf_match_translation(pd, m, off, kif, saddr, sport, daddr, dport, PF_RULESET_BINAT, anchor_stack); @@ -624,28 +628,27 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, } if (r == NULL) - return (NULL); + return (PFRES_MAX); switch (r->action) { case PF_NONAT: case PF_NOBINAT: case PF_NORDR: - return (NULL); + return (PFRES_MAX); } *skp = pf_state_key_setup(pd, saddr, daddr, sport, dport); if (*skp == NULL) - return (NULL); + return (PFRES_MEMORY); *nkp = pf_state_key_clone(*skp); if (*nkp == NULL) { uma_zfree(V_pf_state_key_z, *skp); *skp = NULL; - return (NULL); + return (PFRES_MEMORY); } - /* XXX We only modify one side for now. */ naddr = &(*nkp)->addr[1]; - nport = &(*nkp)->port[1]; + nportp = &(*nkp)->port[1]; switch (r->action) { case PF_NAT: @@ -658,20 +661,22 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, } if (r->rpool.mape.offset > 0) { if (pf_get_mape_sport(pd->af, pd->proto, r, saddr, - sport, daddr, dport, naddr, nport, sn)) { + sport, daddr, dport, naddr, nportp, sn)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: MAP-E port allocation (%u/%u/%u)" " failed\n", r->rpool.mape.offset, r->rpool.mape.psidlen, r->rpool.mape.psid)); + reason = PFRES_MAPFAILED; goto notrans; } } else if (pf_get_sport(pd->af, pd->proto, r, saddr, sport, - daddr, dport, naddr, nport, low, high, sn)) { + daddr, dport, naddr, nportp, low, high, sn)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: NAT proxy port allocation (%u-%u) failed\n", r->rpool.proxy_port[0], r->rpool.proxy_port[1])); + reason = PFRES_MAPFAILED; goto notrans; } break; @@ -683,8 +688,10 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, #ifdef INET case AF_INET: if (r->rpool.cur->addr.p.dyn-> - pfid_acnt4 < 1) + pfid_acnt4 < 1) { + reason = PFRES_MAPFAILED; goto notrans; + } PF_POOLMASK(naddr, &r->rpool.cur->addr.p.dyn-> pfid_addr4, @@ -695,8 +702,10 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, #ifdef INET6 case AF_INET6: if (r->rpool.cur->addr.p.dyn-> - pfid_acnt6 < 1) + pfid_acnt6 < 1) { + reason = PFRES_MAPFAILED; goto notrans; + } PF_POOLMASK(naddr, &r->rpool.cur->addr.p.dyn-> pfid_addr6, @@ -716,8 +725,10 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, switch (pd->af) { #ifdef INET case AF_INET: - if (r->src.addr.p.dyn-> pfid_acnt4 < 1) + if (r->src.addr.p.dyn->pfid_acnt4 < 1) { + reason = PFRES_MAPFAILED; goto notrans; + } PF_POOLMASK(naddr, &r->src.addr.p.dyn->pfid_addr4, &r->src.addr.p.dyn->pfid_mask4, @@ -726,8 +737,10 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, #endif /* INET */ #ifdef INET6 case AF_INET6: - if (r->src.addr.p.dyn->pfid_acnt6 < 1) + if (r->src.addr.p.dyn->pfid_acnt6 < 1) { + reason = PFRES_MAPFAILED; goto notrans; + } PF_POOLMASK(naddr, &r->src.addr.p.dyn->pfid_addr6, &r->src.addr.p.dyn->pfid_mask6, @@ -742,7 +755,11 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, } break; case PF_RDR: { - if (pf_map_addr(pd->af, r, saddr, naddr, NULL, NULL, sn)) + struct pf_state_key_cmp key; + uint16_t cut, low, high, nport; + + reason = pf_map_addr(pd->af, r, saddr, naddr, NULL, NULL, sn); + if (reason != 0) goto notrans; if ((r->rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) PF_POOLMASK(naddr, naddr, &r->rpool.cur->addr.v.a.mask, @@ -762,9 +779,64 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, /* Wrap around if necessary. */ if (tmp_nport > 65535) tmp_nport -= 65535; - *nport = htons((uint16_t)tmp_nport); + nport = htons((uint16_t)tmp_nport); } else if (r->rpool.proxy_port[0]) - *nport = htons(r->rpool.proxy_port[0]); + nport = htons(r->rpool.proxy_port[0]); + else + nport = dport; + + /* + * Update the destination port. + */ + *nportp = nport; + + /* + * Do we have a source port conflict in the stack state? Try to + * modulate the source port if so. Note that this is racy since + * the state lookup may not find any matches here but will once + * pf_create_state() actually instantiates the state. + */ + bzero(&key, sizeof(key)); + key.af = pd->af; + key.proto = pd->proto; + key.port[0] = sport; + PF_ACPY(&key.addr[0], saddr, key.af); + key.port[1] = nport; + PF_ACPY(&key.addr[1], naddr, key.af); + + if (!pf_find_state_all_exists(&key, PF_OUT)) + break; + + low = 50001; /* XXX-MJ PF_NAT_PROXY_PORT_LOW/HIGH */ + high = 65535; + cut = arc4random() % (1 + high - low) + low; + for (uint32_t tmp = cut; + tmp <= high && tmp <= UINT16_MAX; tmp++) { + key.port[0] = htons(tmp); + if (!pf_find_state_all_exists(&key, PF_OUT)) { + /* Update the source port. */ + (*nkp)->port[0] = htons(tmp); + goto out; + } + } + for (uint32_t tmp = cut - 1; tmp >= low; tmp--) { + key.port[0] = htons(tmp); + if (!pf_find_state_all_exists(&key, PF_OUT)) { + /* Update the source port. */ + (*nkp)->port[0] = htons(tmp); + goto out; + } + } + + DPFPRINTF(PF_DEBUG_MISC, + ("pf: RDR source port allocation failed\n")); + reason = PFRES_MAPFAILED; + goto notrans; + +out: + DPFPRINTF(PF_DEBUG_MISC, + ("pf: RDR source port allocation %u->%u\n", + ntohs(sport), ntohs((*nkp)->port[0]))); break; } default: @@ -772,14 +844,17 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, } /* Return success only if translation really happened. */ - if (bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp))) - return (r); + if (bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp))) { + *rp = r; + return (PFRES_MATCH); + } + reason = PFRES_MAX; notrans: uma_zfree(V_pf_state_key_z, *nkp); uma_zfree(V_pf_state_key_z, *skp); *skp = *nkp = NULL; *sn = NULL; - return (NULL); + return (reason); } diff --git a/sys/riscv/include/elf.h b/sys/riscv/include/elf.h index 905e876832a0..c2fe70b0551a 100644 --- a/sys/riscv/include/elf.h +++ b/sys/riscv/include/elf.h @@ -112,5 +112,6 @@ __ElfType(Auxinfo); #define HWCAP_ISA_C HWCAP_ISA_BIT('c') #define HWCAP_ISA_G \ (HWCAP_ISA_I | HWCAP_ISA_M | HWCAP_ISA_A | HWCAP_ISA_F | HWCAP_ISA_D) +#define HWCAP_ISA_B HWCAP_ISA_BIT('b') #endif /* !_MACHINE_ELF_H_ */ diff --git a/sys/riscv/include/ifunc.h b/sys/riscv/include/ifunc.h new file mode 100644 index 000000000000..0f9747a2aa14 --- /dev/null +++ b/sys/riscv/include/ifunc.h @@ -0,0 +1,49 @@ +/*- + * Copyright (c) 2015-2018 The FreeBSD Foundation + * Copyright (c) 2024 Jessica Clarke + * + * Part of this software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef __RISCV_IFUNC_H +#define __RISCV_IFUNC_H + +#define DEFINE_IFUNC(qual, ret_type, name, args) \ + static ret_type (*name##_resolver(void))args __used; \ + qual ret_type name args __attribute__((ifunc(#name "_resolver"))); \ + static ret_type (*name##_resolver(void))args + +#define DEFINE_UIFUNC(qual, ret_type, name, args) \ + static ret_type (*name##_resolver(unsigned long, unsigned long, \ + unsigned long, unsigned long, unsigned long, unsigned long, \ + unsigned long, unsigned long))args __used; \ + qual ret_type name args __attribute__((ifunc(#name "_resolver"))); \ + static ret_type (*name##_resolver(unsigned long elf_hwcap __unused, \ + unsigned long _arg2 __unused, unsigned long _arg3 __unused, \ + unsigned long _arg4 __unused, unsigned long _arg5 __unused, \ + unsigned long _arg6 __unused, unsigned long _arg7 __unused, \ + unsigned long _arg8 __unused))args + +#endif diff --git a/sys/riscv/riscv/identcpu.c b/sys/riscv/riscv/identcpu.c index c681edfff47b..203edb3689bc 100644 --- a/sys/riscv/riscv/identcpu.c +++ b/sys/riscv/riscv/identcpu.c @@ -245,6 +245,7 @@ parse_riscv_isa(struct cpu_desc *desc, char *isa, int len) while (i < len) { switch(isa[i]) { case 'a': + case 'b': case 'c': case 'd': case 'f': diff --git a/sys/sys/buf_ring.h b/sys/sys/buf_ring.h index cb18175c3a75..c99cf81d8b6d 100644 --- a/sys/sys/buf_ring.h +++ b/sys/sys/buf_ring.h @@ -3,6 +3,7 @@ * * Copyright (c) 2007-2009 Kip Macy * All rights reserved. + * Copyright (c) 2024 Arm Ltd * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -35,15 +36,20 @@ #include #include -#ifdef DEBUG_BUFRING -#ifdef _KERNEL +#if defined(DEBUG_BUFRING) && defined(_KERNEL) #include #include -#else -#error "DEBUG_BUFRING is only supported in kernel" -#endif #endif +/* + * We only apply the mask to the head and tail values when calculating the + * index into br_ring to access. This means the upper bits can be used as + * epoch to reduce the chance the atomic_cmpset succeedes when it should + * fail, e.g. when the head wraps while the CPU is in an interrupt. This + * is a probablistic fix as there is still a very unlikely chance the + * value wraps back to the expected value. + * + */ struct buf_ring { volatile uint32_t br_prod_head; volatile uint32_t br_prod_tail; @@ -54,7 +60,7 @@ struct buf_ring { volatile uint32_t br_cons_tail; int br_cons_size; int br_cons_mask; -#ifdef DEBUG_BUFRING +#if defined(DEBUG_BUFRING) && defined(_KERNEL) struct mtx *br_lock; #endif void *br_ring[0] __aligned(CACHE_LINE_SIZE); @@ -67,28 +73,38 @@ struct buf_ring { static __inline int buf_ring_enqueue(struct buf_ring *br, void *buf) { - uint32_t prod_head, prod_next, cons_tail; -#ifdef DEBUG_BUFRING - int i; + uint32_t prod_head, prod_next, prod_idx; + uint32_t cons_tail, mask; + mask = br->br_prod_mask; +#ifdef DEBUG_BUFRING /* * Note: It is possible to encounter an mbuf that was removed * via drbr_peek(), and then re-added via drbr_putback() and * trigger a spurious panic. */ - for (i = br->br_cons_head; i != br->br_prod_head; - i = ((i + 1) & br->br_cons_mask)) - if (br->br_ring[i] == buf) + for (uint32_t i = br->br_cons_head; i != br->br_prod_head; i++) + if (br->br_ring[i & mask] == buf) panic("buf=%p already enqueue at %d prod=%d cons=%d", buf, i, br->br_prod_tail, br->br_cons_tail); #endif critical_enter(); do { - prod_head = br->br_prod_head; - prod_next = (prod_head + 1) & br->br_prod_mask; - cons_tail = br->br_cons_tail; - - if (prod_next == cons_tail) { + /* + * br->br_prod_head needs to be read before br->br_cons_tail. + * If not then we could perform the dequeue and enqueue + * between reading br_cons_tail and reading br_prod_head. This + * could give us values where br_cons_head == br_prod_tail + * (after masking). + * + * To work around this us a load acquire. This is just to + * ensure ordering within this thread. + */ + prod_head = atomic_load_acq_32(&br->br_prod_head); + prod_next = prod_head + 1; + cons_tail = atomic_load_acq_32(&br->br_cons_tail); + + if ((int32_t)(cons_tail + br->br_prod_size - prod_next) < 1) { rmb(); if (prod_head == br->br_prod_head && cons_tail == br->br_cons_tail) { @@ -98,12 +114,13 @@ buf_ring_enqueue(struct buf_ring *br, void *buf) } continue; } - } while (!atomic_cmpset_acq_int(&br->br_prod_head, prod_head, prod_next)); + } while (!atomic_cmpset_acq_32(&br->br_prod_head, prod_head, prod_next)); + prod_idx = prod_head & mask; #ifdef DEBUG_BUFRING - if (br->br_ring[prod_head] != NULL) + if (br->br_ring[prod_idx] != NULL) panic("dangling value in enqueue"); #endif - br->br_ring[prod_head] = buf; + br->br_ring[prod_idx] = buf; /* * If there are other enqueues in progress @@ -112,7 +129,7 @@ buf_ring_enqueue(struct buf_ring *br, void *buf) */ while (br->br_prod_tail != prod_head) cpu_spinwait(); - atomic_store_rel_int(&br->br_prod_tail, prod_next); + atomic_store_rel_32(&br->br_prod_tail, prod_next); critical_exit(); return (0); } @@ -124,23 +141,32 @@ buf_ring_enqueue(struct buf_ring *br, void *buf) static __inline void * buf_ring_dequeue_mc(struct buf_ring *br) { - uint32_t cons_head, cons_next; + uint32_t cons_head, cons_next, cons_idx; + uint32_t prod_tail, mask; void *buf; critical_enter(); + mask = br->br_cons_mask; do { - cons_head = br->br_cons_head; - cons_next = (cons_head + 1) & br->br_cons_mask; - - if (cons_head == br->br_prod_tail) { + /* + * As with buf_ring_enqueue ensure we read the head before + * the tail. If we read them in the wrong order we may + * think the bug_ring is full when it is empty. + */ + cons_head = atomic_load_acq_32(&br->br_cons_head); + cons_next = cons_head + 1; + prod_tail = atomic_load_acq_32(&br->br_prod_tail); + + if (cons_head == prod_tail) { critical_exit(); return (NULL); } - } while (!atomic_cmpset_acq_int(&br->br_cons_head, cons_head, cons_next)); + } while (!atomic_cmpset_acq_32(&br->br_cons_head, cons_head, cons_next)); + cons_idx = cons_head & mask; - buf = br->br_ring[cons_head]; + buf = br->br_ring[cons_idx]; #ifdef DEBUG_BUFRING - br->br_ring[cons_head] = NULL; + br->br_ring[cons_idx] = NULL; #endif /* * If there are other dequeues in progress @@ -150,7 +176,7 @@ buf_ring_dequeue_mc(struct buf_ring *br) while (br->br_cons_tail != cons_head) cpu_spinwait(); - atomic_store_rel_int(&br->br_cons_tail, cons_next); + atomic_store_rel_32(&br->br_cons_tail, cons_next); critical_exit(); return (buf); @@ -164,72 +190,34 @@ buf_ring_dequeue_mc(struct buf_ring *br) static __inline void * buf_ring_dequeue_sc(struct buf_ring *br) { - uint32_t cons_head, cons_next; -#ifdef PREFETCH_DEFINED - uint32_t cons_next_next; -#endif - uint32_t prod_tail; + uint32_t cons_head, cons_next, cons_idx; + uint32_t prod_tail, mask; void *buf; - /* - * This is a workaround to allow using buf_ring on ARM and ARM64. - * ARM64TODO: Fix buf_ring in a generic way. - * REMARKS: It is suspected that br_cons_head does not require - * load_acq operation, but this change was extensively tested - * and confirmed it's working. To be reviewed once again in - * FreeBSD-12. - * - * Preventing following situation: - - * Core(0) - buf_ring_enqueue() Core(1) - buf_ring_dequeue_sc() - * ----------------------------------------- ---------------------------------------------- - * - * cons_head = br->br_cons_head; - * atomic_cmpset_acq_32(&br->br_prod_head, ...)); - * buf = br->br_ring[cons_head]; > - * br->br_ring[prod_head] = buf; - * atomic_store_rel_32(&br->br_prod_tail, ...); - * prod_tail = br->br_prod_tail; - * if (cons_head == prod_tail) - * return (NULL); - * ` - * - * <1> Load (on core 1) from br->br_ring[cons_head] can be reordered (speculative readed) by CPU. - */ -#if defined(__arm__) || defined(__aarch64__) - cons_head = atomic_load_acq_32(&br->br_cons_head); -#else + mask = br->br_cons_mask; cons_head = br->br_cons_head; -#endif prod_tail = atomic_load_acq_32(&br->br_prod_tail); - cons_next = (cons_head + 1) & br->br_cons_mask; -#ifdef PREFETCH_DEFINED - cons_next_next = (cons_head + 2) & br->br_cons_mask; -#endif + cons_next = cons_head + 1; - if (cons_head == prod_tail) + if (cons_head == prod_tail) return (NULL); -#ifdef PREFETCH_DEFINED - if (cons_next != prod_tail) { - prefetch(br->br_ring[cons_next]); - if (cons_next_next != prod_tail) - prefetch(br->br_ring[cons_next_next]); - } -#endif + cons_idx = cons_head & mask; br->br_cons_head = cons_next; - buf = br->br_ring[cons_head]; + buf = br->br_ring[cons_idx]; #ifdef DEBUG_BUFRING - br->br_ring[cons_head] = NULL; + br->br_ring[cons_idx] = NULL; +#ifdef _KERNEL if (!mtx_owned(br->br_lock)) panic("lock not held on single consumer dequeue"); +#endif if (br->br_cons_tail != cons_head) panic("inconsistent list cons_tail=%d cons_head=%d", br->br_cons_tail, cons_head); #endif - br->br_cons_tail = cons_next; + atomic_store_rel_32(&br->br_cons_tail, cons_next); return (buf); } @@ -241,20 +229,23 @@ buf_ring_dequeue_sc(struct buf_ring *br) static __inline void buf_ring_advance_sc(struct buf_ring *br) { - uint32_t cons_head, cons_next; - uint32_t prod_tail; + uint32_t cons_head, cons_next, prod_tail; +#ifdef DEBUG_BUFRING + uint32_t mask; + mask = br->br_cons_mask; +#endif cons_head = br->br_cons_head; prod_tail = br->br_prod_tail; - cons_next = (cons_head + 1) & br->br_cons_mask; - if (cons_head == prod_tail) + cons_next = cons_head + 1; + if (cons_head == prod_tail) return; br->br_cons_head = cons_next; #ifdef DEBUG_BUFRING - br->br_ring[cons_head] = NULL; + br->br_ring[cons_head & mask] = NULL; #endif - br->br_cons_tail = cons_next; + atomic_store_rel_32(&br->br_cons_tail, cons_next); } /* @@ -276,9 +267,12 @@ buf_ring_advance_sc(struct buf_ring *br) static __inline void buf_ring_putback_sc(struct buf_ring *br, void *new) { - KASSERT(br->br_cons_head != br->br_prod_tail, + uint32_t mask; + + mask = br->br_cons_mask; + KASSERT((br->br_cons_head & mask) != (br->br_prod_tail & mask), ("Buf-Ring has none in putback")) ; - br->br_ring[br->br_cons_head] = new; + br->br_ring[br->br_cons_head & mask] = new; } /* @@ -289,68 +283,56 @@ buf_ring_putback_sc(struct buf_ring *br, void *new) static __inline void * buf_ring_peek(struct buf_ring *br) { + uint32_t cons_head, prod_tail, mask; -#ifdef DEBUG_BUFRING +#if defined(DEBUG_BUFRING) && defined(_KERNEL) if ((br->br_lock != NULL) && !mtx_owned(br->br_lock)) panic("lock not held on single consumer dequeue"); #endif - /* - * I believe it is safe to not have a memory barrier - * here because we control cons and tail is worst case - * a lagging indicator so we worst case we might - * return NULL immediately after a buffer has been enqueued - */ - if (br->br_cons_head == br->br_prod_tail) + mask = br->br_cons_mask; + prod_tail = atomic_load_acq_32(&br->br_prod_tail); + cons_head = br->br_cons_head; + + if (cons_head == prod_tail) return (NULL); - return (br->br_ring[br->br_cons_head]); + return (br->br_ring[cons_head & mask]); } static __inline void * buf_ring_peek_clear_sc(struct buf_ring *br) { -#ifdef DEBUG_BUFRING + uint32_t cons_head, prod_tail, mask; void *ret; +#if defined(DEBUG_BUFRING) && defined(_KERNEL) if (!mtx_owned(br->br_lock)) panic("lock not held on single consumer dequeue"); #endif - if (br->br_cons_head == br->br_prod_tail) - return (NULL); + mask = br->br_cons_mask; + prod_tail = atomic_load_acq_32(&br->br_prod_tail); + cons_head = br->br_cons_head; -#if defined(__arm__) || defined(__aarch64__) - /* - * The barrier is required there on ARM and ARM64 to ensure, that - * br->br_ring[br->br_cons_head] will not be fetched before the above - * condition is checked. - * Without the barrier, it is possible, that buffer will be fetched - * before the enqueue will put mbuf into br, then, in the meantime, the - * enqueue will update the array and the br_prod_tail, and the - * conditional check will be true, so we will return previously fetched - * (and invalid) buffer. - */ - atomic_thread_fence_acq(); -#endif + if (cons_head == prod_tail) + return (NULL); + ret = br->br_ring[cons_head & mask]; #ifdef DEBUG_BUFRING /* * Single consumer, i.e. cons_head will not move while we are * running, so atomic_swap_ptr() is not necessary here. */ - ret = br->br_ring[br->br_cons_head]; - br->br_ring[br->br_cons_head] = NULL; - return (ret); -#else - return (br->br_ring[br->br_cons_head]); + br->br_ring[cons_head & mask] = NULL; #endif + return (ret); } static __inline int buf_ring_full(struct buf_ring *br) { - return (((br->br_prod_head + 1) & br->br_prod_mask) == br->br_cons_tail); + return (br->br_prod_head == br->br_cons_tail + br->br_cons_size - 1); } static __inline int diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index 4f8651411851..a5be05efc6d9 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -262,13 +262,25 @@ void cdev_pager_free_page(vm_object_t object, vm_page_t m) { - VM_OBJECT_ASSERT_WLOCKED(object); - if (object->type == OBJT_MGTDEVICE) { - KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("unmanaged %p", m)); - pmap_remove_all(m); - (void)vm_page_remove(m); - } else if (object->type == OBJT_DEVICE) + if (object->type == OBJT_MGTDEVICE) + cdev_mgtdev_pager_free_page(object, m); + else if (object->type == OBJT_DEVICE) dev_pager_free_page(object, m); + else + KASSERT(false, + ("Invalid device type obj %p m %p", object, m)); +} + +void +cdev_mgtdev_pager_free_page(vm_object_t object, vm_page_t m) +{ + + VM_OBJECT_ASSERT_WLOCKED(object); + KASSERT((object->type == OBJT_MGTDEVICE && + (m->oflags & VPO_UNMANAGED) == 0), + ("Unmanaged device or page obj %p m %p", object, m)); + pmap_remove_all(m); + (void)vm_page_remove(m); } static void diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h index 7d6b2e96b38c..d30bf349e411 100644 --- a/sys/vm/vm_pager.h +++ b/sys/vm/vm_pager.h @@ -300,6 +300,7 @@ vm_object_t cdev_pager_allocate(void *handle, enum obj_type tp, vm_ooffset_t foff, struct ucred *cred); vm_object_t cdev_pager_lookup(void *handle); void cdev_pager_free_page(vm_object_t object, vm_page_t m); +void cdev_mgtdev_pager_free_page(vm_object_t object, vm_page_t m); struct phys_pager_ops { int (*phys_pg_getpages)(vm_object_t vm_obj, vm_page_t *m, int count, diff --git a/sys/x86/iommu/intel_drv.c b/sys/x86/iommu/intel_drv.c index 9401892aa9d6..636534173715 100644 --- a/sys/x86/iommu/intel_drv.c +++ b/sys/x86/iommu/intel_drv.c @@ -159,8 +159,7 @@ dmar_count_iter(ACPI_DMAR_HEADER *dmarh, void *arg) int dmar_rmrr_enable = 1; -static int dmar_enable = 1; - +static int dmar_enable = 0; static void dmar_identify(driver_t *driver, device_t parent) { @@ -423,6 +422,7 @@ dmar_attach(device_t dev) &unit->reg_rid, RF_ACTIVE); if (unit->regs == NULL) { device_printf(dev, "cannot allocate register window\n"); + dmar_devs[unit->iommu.unit] = NULL; return (ENOMEM); } unit->hw_ver = dmar_read4(unit, DMAR_VER_REG); @@ -450,6 +450,7 @@ dmar_attach(device_t dev) error = dmar_alloc_irq(dev, unit, DMAR_INTR_FAULT); if (error != 0) { dmar_release_resources(dev, unit); + dmar_devs[unit->iommu.unit] = NULL; return (error); } if (DMAR_HAS_QI(unit)) { @@ -464,6 +465,7 @@ dmar_attach(device_t dev) error = dmar_alloc_irq(dev, unit, DMAR_INTR_QI); if (error != 0) { dmar_release_resources(dev, unit); + dmar_devs[unit->iommu.unit] = NULL; return (error); } } @@ -497,12 +499,14 @@ dmar_attach(device_t dev) if (error != 0) { DMAR_UNLOCK(unit); dmar_release_resources(dev, unit); + dmar_devs[unit->iommu.unit] = NULL; return (error); } error = dmar_inv_ctx_glob(unit); if (error != 0) { DMAR_UNLOCK(unit); dmar_release_resources(dev, unit); + dmar_devs[unit->iommu.unit] = NULL; return (error); } if ((unit->hw_ecap & DMAR_ECAP_DI) != 0) { @@ -510,6 +514,7 @@ dmar_attach(device_t dev) if (error != 0) { DMAR_UNLOCK(unit); dmar_release_resources(dev, unit); + dmar_devs[unit->iommu.unit] = NULL; return (error); } } @@ -518,16 +523,19 @@ dmar_attach(device_t dev) error = dmar_init_fault_log(unit); if (error != 0) { dmar_release_resources(dev, unit); + dmar_devs[unit->iommu.unit] = NULL; return (error); } error = dmar_init_qi(unit); if (error != 0) { dmar_release_resources(dev, unit); + dmar_devs[unit->iommu.unit] = NULL; return (error); } error = dmar_init_irt(unit); if (error != 0) { dmar_release_resources(dev, unit); + dmar_devs[unit->iommu.unit] = NULL; return (error); } @@ -543,6 +551,7 @@ dmar_attach(device_t dev) error = iommu_init_busdma(&unit->iommu); if (error != 0) { dmar_release_resources(dev, unit); + dmar_devs[unit->iommu.unit] = NULL; return (error); } @@ -552,6 +561,7 @@ dmar_attach(device_t dev) if (error != 0) { DMAR_UNLOCK(unit); dmar_release_resources(dev, unit); + dmar_devs[unit->iommu.unit] = NULL; return (error); } DMAR_UNLOCK(unit); diff --git a/tests/sys/netpfil/pf/Makefile b/tests/sys/netpfil/pf/Makefile index 4a16642a967b..2b3cb9fbd858 100644 --- a/tests/sys/netpfil/pf/Makefile +++ b/tests/sys/netpfil/pf/Makefile @@ -73,6 +73,7 @@ ${PACKAGE}FILES+= CVE-2019-5597.py \ pfsync_defer.py \ pft_ether.py \ pft_read_ipfix.py \ + rdr-srcport.py \ utils.subr ${PACKAGE}FILESMODE_CVE-2019-5597.py= 0555 diff --git a/tests/sys/netpfil/pf/mbuf.sh b/tests/sys/netpfil/pf/mbuf.sh index 082de08b0838..a4664718093a 100644 --- a/tests/sys/netpfil/pf/mbuf.sh +++ b/tests/sys/netpfil/pf/mbuf.sh @@ -91,7 +91,141 @@ inet_in_mbuf_len_cleanup() pft_cleanup } +atf_test_case "inet6_in_mbuf_len" "cleanup" +inet6_in_mbuf_len_head() +{ + atf_set descr 'Test that pf can handle inbound with the first mbuf with m_len < sizeof(struct ip6_hdr)' + atf_set require.user root +} +inet6_in_mbuf_len_body() +{ + pft_init + dummymbuf_init + + epair=$(vnet_mkepair) + ifconfig ${epair}a inet6 2001:db8::1/64 up no_dad + + # Set up a simple jail with one interface + vnet_mkjail alcatraz ${epair}b + jexec alcatraz ifconfig ${epair}b inet6 2001:db8::2/64 up no_dad + + # Sanity check + atf_check -s exit:0 -o ignore ping -c1 2001:db8::2 + + # Should be denied + jexec alcatraz pfctl -e + pft_set_rules alcatraz \ + "block" \ + "pass quick inet6 proto icmp6 icmp6-type { neighbrsol, neighbradv }" + atf_check -s not-exit:0 -o ignore ping -c1 -t1 2001:db8::2 + + # Should be allowed by from/to addresses + pft_set_rules alcatraz \ + "block" \ + "pass quick inet6 proto icmp6 icmp6-type { neighbrsol, neighbradv }" \ + "pass in inet6 from 2001:db8::1 to 2001:db8::2" + atf_check -s exit:0 -o ignore ping -c1 2001:db8::2 + + # Should still work for m_len=0 + jexec alcatraz pfilctl link -i dummymbuf:inet6 inet6 + jexec alcatraz sysctl net.dummymbuf.rules="inet6 in ${epair}b pull-head 0;" + atf_check_equal "0" "$(jexec alcatraz sysctl -n net.dummymbuf.hits)" + atf_check -s exit:0 -o ignore ping -c1 2001:db8::2 + atf_check_equal "1" "$(jexec alcatraz sysctl -n net.dummymbuf.hits)" + + # m_len=1 + jexec alcatraz sysctl net.dummymbuf.rules="inet6 in ${epair}b pull-head 1;" + jexec alcatraz sysctl net.dummymbuf.hits=0 + atf_check -s exit:0 -o ignore ping -c1 2001:db8::2 + atf_check_equal "1" "$(jexec alcatraz sysctl -n net.dummymbuf.hits)" + + # m_len=39 + # provided IPv6 basic header is 40 bytes long, it should impact the dst addr + jexec alcatraz sysctl net.dummymbuf.rules="inet6 in ${epair}b pull-head 39;" + jexec alcatraz sysctl net.dummymbuf.hits=0 + atf_check -s exit:0 -o ignore ping -c1 2001:db8::2 + atf_check_equal "1" "$(jexec alcatraz sysctl -n net.dummymbuf.hits)" +} +inet6_in_mbuf_len_cleanup() +{ + pft_cleanup +} + +atf_test_case "ethernet_in_mbuf_len" "cleanup" +ethernet_in_mbuf_len_head() +{ + atf_set descr 'Test that pf can handle inbound with the first mbuf with m_len < sizeof(struct ether_header)' + atf_set require.user root +} +ethernet_in_mbuf_len_body() +{ + pft_init + dummymbuf_init + + epair=$(vnet_mkepair) + epair_a_mac=$(ifconfig ${epair}a ether | awk '/ether/ { print $2; }') + ifconfig ${epair}a 192.0.2.1/24 up + + # Set up a simple jail with one interface + vnet_mkjail alcatraz ${epair}b + jexec alcatraz ifconfig ${epair}b 192.0.2.2/24 up + epair_b_mac=$(jexec alcatraz ifconfig ${epair}b ether | awk '/ether/ { print $2; }') + + # Sanity check + atf_check -s exit:0 -o ignore ping -c1 192.0.2.2 + + # Should be denied + jexec alcatraz pfctl -e + pft_set_rules alcatraz \ + "ether block" \ + "pass" + atf_check -s not-exit:0 -o ignore ping -c1 -t1 192.0.2.2 + + # Should be allowed by from/to addresses + echo $epair_a_mac + echo $epair_b_mac + pft_set_rules alcatraz \ + "ether block" \ + "ether pass in from ${epair_a_mac} to ${epair_b_mac}" \ + "ether pass out from ${epair_b_mac} to ${epair_a_mac}" \ + "pass" + atf_check -s exit:0 -o ignore ping -c1 192.0.2.2 + + # Should still work for m_len=0 + jexec alcatraz pfilctl link -i dummymbuf:ethernet ethernet + jexec alcatraz sysctl net.dummymbuf.rules="ethernet in ${epair}b pull-head 0;" + atf_check_equal "0" "$(jexec alcatraz sysctl -n net.dummymbuf.hits)" + atf_check -s exit:0 -o ignore ping -c1 192.0.2.2 + atf_check_equal "1" "$(jexec alcatraz sysctl -n net.dummymbuf.hits)" + + # m_len=1 + jexec alcatraz sysctl net.dummymbuf.rules="ethernet in ${epair}b pull-head 1;" + jexec alcatraz sysctl net.dummymbuf.hits=0 + atf_check -s exit:0 -o ignore ping -c1 192.0.2.2 + atf_check_equal "1" "$(jexec alcatraz sysctl -n net.dummymbuf.hits)" + + # m_len=11 + # for the simplest L2 Ethernet frame it should impact src field + jexec alcatraz sysctl net.dummymbuf.rules="ethernet in ${epair}b pull-head 11;" + jexec alcatraz sysctl net.dummymbuf.hits=0 + atf_check -s exit:0 -o ignore ping -c1 192.0.2.2 + atf_check_equal "1" "$(jexec alcatraz sysctl -n net.dummymbuf.hits)" + + # m_len=13 + # provided L2 Ethernet simplest header is 14 bytes long, it should impact ethertype field + jexec alcatraz sysctl net.dummymbuf.rules="ethernet in ${epair}b pull-head 13;" + jexec alcatraz sysctl net.dummymbuf.hits=0 + atf_check -s exit:0 -o ignore ping -c1 192.0.2.2 + atf_check_equal "1" "$(jexec alcatraz sysctl -n net.dummymbuf.hits)" +} +ethernet_in_mbuf_len_cleanup() +{ + pft_cleanup +} + atf_init_test_cases() { atf_add_test_case "inet_in_mbuf_len" + atf_add_test_case "inet6_in_mbuf_len" + atf_add_test_case "ethernet_in_mbuf_len" } diff --git a/tests/sys/netpfil/pf/rdr-srcport.py b/tests/sys/netpfil/pf/rdr-srcport.py new file mode 100644 index 000000000000..633580582711 --- /dev/null +++ b/tests/sys/netpfil/pf/rdr-srcport.py @@ -0,0 +1,20 @@ +# +# A helper script which accepts TCP connections and writes the remote port +# number to the stream. +# + +import socket + +def main(): + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + s.bind(('0.0.0.0', 8888)) + s.listen(5) + + while True: + cs, addr = s.accept() + cs.sendall(str(addr[1]).encode()) + cs.close() + +if __name__ == '__main__': + main() diff --git a/tests/sys/netpfil/pf/rdr.sh b/tests/sys/netpfil/pf/rdr.sh index b7ec80b4d85e..135bfd42c1f4 100644 --- a/tests/sys/netpfil/pf/rdr.sh +++ b/tests/sys/netpfil/pf/rdr.sh @@ -121,7 +121,107 @@ tcp_v6_cleanup() pft_cleanup } + +atf_test_case "srcport" "cleanup" +srcport_head() +{ + atf_set descr 'TCP rdr srcport modulation' + atf_set require.user root + atf_set require.progs python3 + atf_set timeout 9999 +} + +# +# Test that rdr works for multiple TCP with same srcip and srcport. +# +# Four jails, a, b, c, d, are used: +# - jail d runs a server on port 8888, +# - jail a makes connections to the server, routed through jails b and c, +# - jail b uses NAT to rewrite source addresses and ports to the same 2-tuple, +# avoiding the need to use SO_REUSEADDR in jail a, +# - jail c uses a redirect rule to map the destination address to the same +# address and port, resulting in a NAT state conflict. +# +# In this case, the rdr rule should also rewrite the source port (again) to +# resolve the state conflict. +# +srcport_body() +{ + pft_init + + j="rdr:srcport" + epair1=$(vnet_mkepair) + epair2=$(vnet_mkepair) + epair3=$(vnet_mkepair) + + echo $epair_one + echo $epair_two + + vnet_mkjail ${j}a ${epair1}a + vnet_mkjail ${j}b ${epair1}b ${epair2}a + vnet_mkjail ${j}c ${epair2}b ${epair3}a + vnet_mkjail ${j}d ${epair3}b + + # configure addresses for a + jexec ${j}a ifconfig lo0 up + jexec ${j}a ifconfig ${epair1}a inet 198.51.100.50/24 up + jexec ${j}a ifconfig ${epair1}a inet alias 198.51.100.51/24 + jexec ${j}a ifconfig ${epair1}a inet alias 198.51.100.52/24 + + # configure addresses for b + jexec ${j}b ifconfig lo0 up + jexec ${j}b ifconfig ${epair1}b inet 198.51.100.1/24 up + jexec ${j}b ifconfig ${epair2}a inet 198.51.101.2/24 up + + # configure addresses for c + jexec ${j}c ifconfig lo0 up + jexec ${j}c ifconfig ${epair2}b inet 198.51.101.3/24 up + jexec ${j}c ifconfig ${epair2}b inet alias 198.51.101.4/24 + jexec ${j}c ifconfig ${epair2}b inet alias 198.51.101.5/24 + jexec ${j}c ifconfig ${epair3}a inet 203.0.113.1/24 up + + # configure addresses for d + jexec ${j}d ifconfig lo0 up + jexec ${j}d ifconfig ${epair3}b inet 203.0.113.50/24 up + + jexec ${j}b sysctl net.inet.ip.forwarding=1 + jexec ${j}c sysctl net.inet.ip.forwarding=1 + jexec ${j}b pfctl -e + jexec ${j}c pfctl -e + + pft_set_rules ${j}b \ + "set debug misc" \ + "nat on ${epair2}a inet from 198.51.100.0/24 to any -> ${epair2}a static-port" + + pft_set_rules ${j}c \ + "set debug misc" \ + "rdr on ${epair2}b proto tcp from any to ${epair2}b port 7777 -> 203.0.113.50 port 8888" + + jexec ${j}a route add default 198.51.100.1 + jexec ${j}c route add 198.51.100.0/24 198.51.101.2 + jexec ${j}d route add 198.51.101.0/24 203.0.113.1 + + jexec ${j}d python3 $(atf_get_srcdir)/rdr-srcport.py & + sleep 1 + + echo a | jexec ${j}a nc -w 3 -s 198.51.100.50 -p 1234 198.51.101.3 7777 > port1 + + jexec ${j}a nc -s 198.51.100.51 -p 1234 198.51.101.4 7777 > port2 & + jexec ${j}a nc -s 198.51.100.52 -p 1234 198.51.101.5 7777 > port3 & + sleep 1 + + atf_check -o inline:"1234" cat port1 + atf_check -o match:"[0-9]+" -o not-inline:"1234" cat port2 + atf_check -o match:"[0-9]+" -o not-inline:"1234" cat port3 +} + +srcport_cleanup() +{ + pft_cleanup +} + atf_init_test_cases() { atf_add_test_case "tcp_v6" + atf_add_test_case "srcport" } diff --git a/tests/sys/netpfil/pf/set_skip.sh b/tests/sys/netpfil/pf/set_skip.sh index e5b1440360e9..e984377721b8 100644 --- a/tests/sys/netpfil/pf/set_skip.sh +++ b/tests/sys/netpfil/pf/set_skip.sh @@ -26,6 +26,50 @@ . $(atf_get_srcdir)/utils.subr +atf_test_case "unset" "cleanup" +unset_head() +{ + atf_set descr 'Unset set skip test' + atf_set require.user root +} + +unset_body() +{ + pft_init + + vnet_mkjail alcatraz + jexec alcatraz ifconfig lo0 127.0.0.1/8 up + jexec alcatraz pfctl -e + pft_set_rules alcatraz "set skip on lo0" \ + "block in proto icmp" + + echo "set skip" + jexec alcatraz pfctl -v -sI + + jexec alcatraz ifconfig + atf_check -s exit:0 -o ignore jexec alcatraz ping -c 1 127.0.0.1 + + # Unset the skip on the group + pft_set_rules noflush alcatraz \ + "block in proto icmp" + + echo "No setskip" + jexec alcatraz pfctl -v -sI + + # Do flush states + jexec alcatraz pfctl -Fs + + # And now our ping is blocked + atf_check -s exit:2 -o ignore jexec alcatraz ping -c 1 127.0.0.1 + + jexec alcatraz pfctl -v -sI +} + +unset_cleanup() +{ + pft_cleanup +} + atf_test_case "set_skip_group" "cleanup" set_skip_group_head() { @@ -45,8 +89,24 @@ set_skip_group_body() pft_set_rules alcatraz "set skip on foo" \ "block in proto icmp" + echo "set skip" + jexec alcatraz pfctl -v -sI + jexec alcatraz ifconfig atf_check -s exit:0 -o ignore jexec alcatraz ping -c 1 127.0.0.1 + + # Unset the skip on the group + pft_set_rules noflush alcatraz \ + "block in proto icmp" + + # Do flush states + jexec alcatraz pfctl -Fs + + # And now our ping is blocked + atf_check -s exit:2 -o ignore jexec alcatraz ping -c 1 127.0.0.1 + + echo "No setskip" + jexec alcatraz pfctl -v -sI } set_skip_group_cleanup() @@ -163,6 +223,7 @@ pr255852_cleanup() atf_init_test_cases() { + atf_add_test_case "unset" atf_add_test_case "set_skip_group" atf_add_test_case "set_skip_group_lo" atf_add_test_case "set_skip_dynamic" diff --git a/tools/build/options/WITH_UNDEFINED_VERSION b/tools/build/options/WITH_UNDEFINED_VERSION index 71b048349a6f..99f687d37bcf 100644 --- a/tools/build/options/WITH_UNDEFINED_VERSION +++ b/tools/build/options/WITH_UNDEFINED_VERSION @@ -1,4 +1,4 @@ Link libraries with --undefined-version which permits version maps to contain symbols that are not present in the library. -If this is necessicary to build a particular configuration, a bug is +If this is necessary to build a particular configuration, a bug is present and the configuration should be reported. diff --git a/tools/test/stress2/misc/buildkernel.sh b/tools/test/stress2/misc/buildkernel.sh index 849a09b81439..e0aa85617f9b 100755 --- a/tools/test/stress2/misc/buildkernel.sh +++ b/tools/test/stress2/misc/buildkernel.sh @@ -49,6 +49,7 @@ chmod 0777 $TMPDIR log=$mntpoint/log p=$((`sysctl -n hw.ncpu`+ 1)) +[ $p -gt 32 ] && p=32 # Arbitrary cap p=`jot -r 1 1 $p` echo "make -j $p buildkernel KERNCONF=GENERIC DESTDIR=$mntpoint" \ "TARGET=amd64 TARGET_ARCH=amd64" diff --git a/tools/test/stress2/misc/buildworld.sh b/tools/test/stress2/misc/buildworld.sh index 595b387c90ae..3b362ec7041a 100755 --- a/tools/test/stress2/misc/buildworld.sh +++ b/tools/test/stress2/misc/buildworld.sh @@ -55,6 +55,7 @@ mkdir $TMPDIR chmod 0777 $TMPDIR p=$((`sysctl -n hw.ncpu`+ 1)) +[ $p -gt 32 ] && p=32 # Arbitrary cap timeout 20m make -i -j $p buildworld DESTDIR=$mntpoint TARGET=amd64 \ TARGET_ARCH=amd64 > /dev/null diff --git a/tools/test/stress2/misc/buildworld2.sh b/tools/test/stress2/misc/buildworld2.sh index 9c1eed97b7ea..3653cb1db5b4 100755 --- a/tools/test/stress2/misc/buildworld2.sh +++ b/tools/test/stress2/misc/buildworld2.sh @@ -46,6 +46,7 @@ mkdir $TMPDIR chmod 0777 $TMPDIR p=$((`sysctl -n hw.ncpu`+ 1)) +[ $p -gt 32 ] && p=32 # Arbitrary cap make -j $p buildworld DESTDIR=$mntpoint TARGET=amd64 TARGET_ARCH=amd64 \ > /dev/null & sleep $((20 * 60)) diff --git a/tools/test/stress2/misc/buildworld3.sh b/tools/test/stress2/misc/buildworld3.sh index 0c660cae8eae..e3bce2764c0c 100755 --- a/tools/test/stress2/misc/buildworld3.sh +++ b/tools/test/stress2/misc/buildworld3.sh @@ -62,6 +62,7 @@ mkdir $TMPDIR $MAKEOBJDIRPREFIX chmod 0777 $TMPDIR $MAKEOBJDIRPREFIX p=$((`sysctl -n hw.ncpu`+ 1)) +[ $p -gt 32 ] && p=32 # Arbitrary cap su $testuser -c \ "make -i -j $p buildworld DESTDIR=$mntpoint TARGET=amd64 \ TARGET_ARCH=amd64 > /dev/null" & diff --git a/tools/test/stress2/misc/buildworld4.sh b/tools/test/stress2/misc/buildworld4.sh index 6c15a72a9dcb..d1d162120952 100755 --- a/tools/test/stress2/misc/buildworld4.sh +++ b/tools/test/stress2/misc/buildworld4.sh @@ -50,6 +50,7 @@ mkdir $TMPDIR chmod 0777 $TMPDIR p=$((`sysctl -n hw.ncpu`+ 1)) +[ $p -gt 16 ] && p=16 # Arbitrary cap [ `sysctl -n vm.swap_total` -gt 0 ] && p=$((p * 4)) p=`jot -r 1 1 $p` echo "make -i -j $p buildworld DESTDIR=$mntpoint TARGET=amd64 "\ diff --git a/tools/test/stress2/misc/crossmp3.sh b/tools/test/stress2/misc/crossmp3.sh index 5eecb936e900..32c625a1e4ad 100755 --- a/tools/test/stress2/misc/crossmp3.sh +++ b/tools/test/stress2/misc/crossmp3.sh @@ -41,6 +41,7 @@ CONT=/tmp/crossmp3.continue if [ $# -eq 0 ]; then N=`sysctl -n hw.ncpu` + [ $N -gt 32 ] && N=32 # Arbitrary cap usermem=`sysctl -n hw.usermem` [ `sysctl -n vm.swap_total` -eq 0 ] && usermem=$((usermem / 2)) size=$((usermem / 1024 / 1024 / N)) diff --git a/tools/test/stress2/misc/crossmp4.sh b/tools/test/stress2/misc/crossmp4.sh index e22f969b72bb..21d22bee69e5 100755 --- a/tools/test/stress2/misc/crossmp4.sh +++ b/tools/test/stress2/misc/crossmp4.sh @@ -40,6 +40,7 @@ . ../default.cfg N=`sysctl -n hw.ncpu` +[ $N -gt 32 ] && N=32 # Arbitrary cap usermem=`sysctl -n hw.usermem` [ `swapinfo | wc -l` -eq 1 ] && usermem=$((usermem/100*80)) size=$((usermem / 1024 / 1024 - 2)) diff --git a/tools/test/stress2/misc/crossmp5.sh b/tools/test/stress2/misc/crossmp5.sh index 038dea7ebe4f..6e504d9f20ad 100755 --- a/tools/test/stress2/misc/crossmp5.sh +++ b/tools/test/stress2/misc/crossmp5.sh @@ -33,6 +33,7 @@ . ../default.cfg N=`sysctl -n hw.ncpu` +[ $N -gt 32 ] && N=32 # Arbitrary cap usermem=`sysctl -n hw.usermem` [ `swapinfo | wc -l` -eq 1 ] && usermem=$((usermem/100*80)) size=$((usermem / 1024 / 1024 / N)) diff --git a/tools/test/stress2/misc/crossmp8.sh b/tools/test/stress2/misc/crossmp8.sh index e877dfaf6d1c..eec5ba9bc7c1 100755 --- a/tools/test/stress2/misc/crossmp8.sh +++ b/tools/test/stress2/misc/crossmp8.sh @@ -41,6 +41,7 @@ CONT=/tmp/crossmp8.continue N=`sysctl -n hw.ncpu` +[ $N -gt 32 ] && N=32 # Arbitrary cap usermem=`sysctl -n hw.usermem` [ `swapinfo | wc -l` -eq 1 ] && usermem=$((usermem/100*80)) size=$((usermem / 1024 / 1024 / N)) diff --git a/tools/test/stress2/misc/gnop4.sh b/tools/test/stress2/misc/gnop4.sh index f938dd3b790b..1b4da74266f6 100755 --- a/tools/test/stress2/misc/gnop4.sh +++ b/tools/test/stress2/misc/gnop4.sh @@ -34,6 +34,8 @@ # https://people.freebsd.org/~pho/stress/log/kostik1017.txt # Fixed by r322175 +# Seen with p=513: Threads stuck in "ffsrca" + . ../default.cfg gigs=9 @@ -62,6 +64,7 @@ cd $mntpoint/src export MAKEOBJDIRPREFIX=$mntpoint/obj p=$((`sysctl -n hw.ncpu`+ 1)) +[ $p -gt 32 ] && p=32 # Temporary work around timeout 10m \ make -i -j $p buildworld DESTDIR=$mntpoint TARGET=amd64 \ TARGET_ARCH=amd64 > /dev/null diff --git a/tools/test/stress2/misc/tmpfs13.sh b/tools/test/stress2/misc/tmpfs13.sh index 29b44cbc9ad4..231c42033f9d 100755 --- a/tools/test/stress2/misc/tmpfs13.sh +++ b/tools/test/stress2/misc/tmpfs13.sh @@ -40,6 +40,7 @@ . ../default.cfg N=`sysctl -n hw.ncpu` +[ $N -gt 32 ] && N=32 # Arbitrary cap usermem=`sysctl -n hw.usermem` [ `swapinfo | wc -l` -eq 1 ] && usermem=$((usermem/100*80)) size=$((usermem / 1024 / 1024 / 2)) diff --git a/tools/test/stress2/misc/zzbuildworld.sh b/tools/test/stress2/misc/zzbuildworld.sh index 2104eb156c86..e1bf867d8d5f 100755 --- a/tools/test/stress2/misc/zzbuildworld.sh +++ b/tools/test/stress2/misc/zzbuildworld.sh @@ -44,6 +44,7 @@ top=$mntpoint export MAKEOBJDIRPREFIX=$top/obj export log=$top/buildworld.`date +%Y%m%dT%H%M` n=$((`sysctl -n hw.ncpu` + 1)) +[ $n -gt 32 ] && n=32 # Arbitrary cap cd $src make -j$n buildworld > $log 2>&1 && s=0 ||s=1 grep '\*\*\*' $log && s=2 diff --git a/usr.bin/tail/forward.c b/usr.bin/tail/forward.c index a5303385a74f..6d9db94a827f 100644 --- a/usr.bin/tail/forward.c +++ b/usr.bin/tail/forward.c @@ -379,7 +379,8 @@ follow(file_info_t *files, enum STYLE style, off_t off) sb2.st_dev != file->st.st_dev || sb2.st_nlink == 0) { show(file); - fclose(file->fp); + if (file->fp != NULL) + fclose(file->fp); file->fp = ftmp; memcpy(&file->st, &sb2, sizeof(struct stat)); diff --git a/usr.sbin/adduser/adduser.conf.5 b/usr.sbin/adduser/adduser.conf.5 index 09b80f2df021..9663926ee341 100644 --- a/usr.sbin/adduser/adduser.conf.5 +++ b/usr.sbin/adduser/adduser.conf.5 @@ -23,7 +23,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd April 12, 2007 +.Dd August 18, 2024 .Dt ADDUSER.CONF 5 .Os .Sh NAME @@ -171,8 +171,12 @@ The default information to be held in the GECOS field of .It Va uidstart The default user ID setting. This must be a number above 1000 and fewer than 65534. -.It Va Zflag -Do not attempt to create ZFS home dataset. +.It Va Zcreate +Set to +.Dq no +to prevent the creation of a ZFS home dataset if +.Va homeprefix +is a ZFS mountpoint. .El .Sh EXAMPLES The following is an example diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8 index 527ccf720540..8001b5276d51 100644 --- a/usr.sbin/bhyve/bhyve.8 +++ b/usr.sbin/bhyve/bhyve.8 @@ -1166,7 +1166,7 @@ for a virtual machine, use .Fl o .Ar config.dump=1 : .Bd -literal -offset indent -/usr/sbin/bhyve -c 2 -m 256 -A -H -P \\ +/usr/sbin/bhyve -c 2 -m 256 -H -P \\ -s 0:0,hostbridge -s 1:0,virtio-net,tap0 \\ -s 2:0,ahci-hd,./vm0.img \\ -s 31,lpc -l com1,stdio \\