diff --git a/Documentation/conf.py b/Documentation/conf.py index d148f3e8dd572..0c2205d536b38 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -75,6 +75,8 @@ def have_command(cmd): "__rcu", "__user", "__force", + "__counted_by_le", + "__counted_by_be", # include/linux/compiler_attributes.h: "__alias", diff --git a/Documentation/devicetree/bindings/net/airoha,en8811h.yaml b/Documentation/devicetree/bindings/net/airoha,en8811h.yaml new file mode 100644 index 0000000000000..ecb5149ec6b0d --- /dev/null +++ b/Documentation/devicetree/bindings/net/airoha,en8811h.yaml @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/airoha,en8811h.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Airoha EN8811H PHY + +maintainers: + - Eric Woudstra + +description: + The Airoha EN8811H PHY has the ability to reverse polarity + on the lines to and/or from the MAC. It is reversed by + the booleans in the devicetree node of the phy. + +allOf: + - $ref: ethernet-phy.yaml# + +properties: + compatible: + enum: + - ethernet-phy-id03a2.a411 + + reg: + maxItems: 1 + + airoha,pnswap-rx: + type: boolean + description: + Reverse rx polarity of the SERDES. This is the receiving + side of the lines from the MAC towards the EN881H. + + airoha,pnswap-tx: + type: boolean + description: + Reverse tx polarity of SERDES. This is the transmitting + side of the lines from EN8811H towards the MAC. + +required: + - reg + +unevaluatedProperties: false + +examples: + - | + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethernet-phy@1 { + compatible = "ethernet-phy-id03a2.a411"; + reg = <1>; + airoha,pnswap-rx; + }; + }; diff --git a/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml b/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml index 528ef3572b621..055a3351880bc 100644 --- a/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml +++ b/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml @@ -94,6 +94,10 @@ properties: local-bd-address: true + qcom,local-bd-address-broken: + type: boolean + description: + boot firmware is incorrectly passing the address in big-endian order required: - compatible diff --git a/Documentation/mm/page_frags.rst b/Documentation/mm/page_frags.rst index a81617e688a84..503ca6cdb8040 100644 --- a/Documentation/mm/page_frags.rst +++ b/Documentation/mm/page_frags.rst @@ -25,7 +25,7 @@ to be disabled when executing the fragment allocation. The network stack uses two separate caches per CPU to handle fragment allocation. The netdev_alloc_cache is used by callers making use of the netdev_alloc_frag and __netdev_alloc_skb calls. The napi_alloc_cache is -used by callers of the __napi_alloc_frag and __napi_alloc_skb calls. The +used by callers of the __napi_alloc_frag and napi_alloc_skb calls. The main difference between these two calls is the context in which they may be called. The "netdev" prefixed functions are usable in any context as these functions will disable interrupts, while the "napi" prefixed functions are diff --git a/Documentation/netlink/specs/tc.yaml b/Documentation/netlink/specs/tc.yaml index 324fa182cd149..6068c105c5ee2 100644 --- a/Documentation/netlink/specs/tc.yaml +++ b/Documentation/netlink/specs/tc.yaml @@ -1099,6 +1099,19 @@ definitions: - name: offmask type: s32 + - + name: tc-u32-mark + type: struct + members: + - + name: val + type: u32 + - + name: mask + type: u32 + - + name: success + type: u32 - name: tc-u32-sel type: struct @@ -1774,6 +1787,44 @@ attribute-sets: - name: key-ex type: binary + - + name: tc-act-police-attrs + attributes: + - + name: tbf + type: binary + struct: tc-police + - + name: rate + type: binary # TODO + - + name: peakrate + type: binary # TODO + - + name: avrate + type: u32 + - + name: result + type: u32 + - + name: tm + type: binary + struct: tcf-t + - + name: pad + type: pad + - + name: rate64 + type: u64 + - + name: peakrate64 + type: u64 + - + name: pktrate64 + type: u64 + - + name: pktburst64 + type: u64 - name: tc-act-simple-attrs attributes: diff --git a/Documentation/netlink/specs/team.yaml b/Documentation/netlink/specs/team.yaml new file mode 100644 index 0000000000000..c13529e011c90 --- /dev/null +++ b/Documentation/netlink/specs/team.yaml @@ -0,0 +1,204 @@ +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) + +name: team + +protocol: genetlink-legacy + +doc: | + Network team device driver. + +c-family-name: team-genl-name +c-version-name: team-genl-version +kernel-policy: global +uapi-header: linux/if_team.h + +definitions: + - + name: string-max-len + type: const + value: 32 + - + name: genl-change-event-mc-grp-name + type: const + value: change_event + +attribute-sets: + - + name: team + doc: + The team nested layout of get/set msg looks like + [TEAM_ATTR_LIST_OPTION] + [TEAM_ATTR_ITEM_OPTION] + [TEAM_ATTR_OPTION_*], ... + [TEAM_ATTR_ITEM_OPTION] + [TEAM_ATTR_OPTION_*], ... + ... + [TEAM_ATTR_LIST_PORT] + [TEAM_ATTR_ITEM_PORT] + [TEAM_ATTR_PORT_*], ... + [TEAM_ATTR_ITEM_PORT] + [TEAM_ATTR_PORT_*], ... + ... + name-prefix: team-attr- + attributes: + - + name: unspec + type: unused + value: 0 + - + name: team-ifindex + type: u32 + - + name: list-option + type: nest + nested-attributes: item-option + - + name: list-port + type: nest + nested-attributes: item-port + - + name: item-option + name-prefix: team-attr-item- + attr-cnt-name: __team-attr-item-option-max + attr-max-name: team-attr-item-option-max + attributes: + - + name: option-unspec + type: unused + value: 0 + - + name: option + type: nest + nested-attributes: attr-option + - + name: attr-option + name-prefix: team-attr-option- + attributes: + - + name: unspec + type: unused + value: 0 + - + name: name + type: string + checks: + max-len: string-max-len + unterminated-ok: true + - + name: changed + type: flag + - + name: type + type: u8 + - + name: data + type: binary + - + name: removed + type: flag + - + name: port-ifindex + type: u32 + doc: for per-port options + - + name: array-index + type: u32 + doc: for array options + - + name: item-port + name-prefix: team-attr-item- + attr-cnt-name: __team-attr-item-port-max + attr-max-name: team-attr-item-port-max + attributes: + - + name: port-unspec + type: unused + value: 0 + - + name: port + type: nest + nested-attributes: attr-port + - + name: attr-port + name-prefix: team-attr-port- + attributes: + - + name: unspec + type: unused + value: 0 + - + name: ifindex + type: u32 + - + name: changed + type: flag + - + name: linkup + type: flag + - + name: speed + type: u32 + - + name: duplex + type: u8 + - + name: removed + type: flag + +operations: + list: + - + name: noop + doc: No operation + value: 0 + attribute-set: team + dont-validate: [ strict ] + + do: + # Actually it only reply the team netlink family + reply: + attributes: + - team-ifindex + + - + name: options-set + doc: Set team options + attribute-set: team + dont-validate: [ strict ] + flags: [ admin-perm ] + + do: + request: &option_attrs + attributes: + - team-ifindex + - list-option + reply: *option_attrs + + - + name: options-get + doc: Get team options info + attribute-set: team + dont-validate: [ strict ] + flags: [ admin-perm ] + + do: + request: + attributes: + - team-ifindex + reply: *option_attrs + + - + name: port-list-get + doc: Get team ports info + attribute-set: team + dont-validate: [ strict ] + flags: [ admin-perm ] + + do: + request: + attributes: + - team-ifindex + reply: &port_attrs + attributes: + - team-ifindex + - list-port diff --git a/Documentation/networking/devlink/devlink-eswitch-attr.rst b/Documentation/networking/devlink/devlink-eswitch-attr.rst new file mode 100644 index 0000000000000..08bb39ab15286 --- /dev/null +++ b/Documentation/networking/devlink/devlink-eswitch-attr.rst @@ -0,0 +1,76 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========================== +Devlink E-Switch Attribute +========================== + +Devlink E-Switch supports two modes of operation: legacy and switchdev. +Legacy mode operates based on traditional MAC/VLAN steering rules. Switching +decisions are made based on MAC addresses, VLANs, etc. There is limited ability +to offload switching rules to hardware. + +On the other hand, switchdev mode allows for more advanced offloading +capabilities of the E-Switch to hardware. In switchdev mode, more switching +rules and logic can be offloaded to the hardware switch ASIC. It enables +representor netdevices that represent the slow path of virtual functions (VFs) +or scalable-functions (SFs) of the device. See more information about +:ref:`Documentation/networking/switchdev.rst ` and +:ref:`Documentation/networking/representors.rst `. + +In addition, the devlink E-Switch also comes with other attributes listed +in the following section. + +Attributes Description +====================== + +The following is a list of E-Switch attributes. + +.. list-table:: E-Switch attributes + :widths: 8 5 45 + + * - Name + - Type + - Description + * - ``mode`` + - enum + - The mode of the device. The mode can be one of the following: + + * ``legacy`` operates based on traditional MAC/VLAN steering + rules. + * ``switchdev`` allows for more advanced offloading capabilities of + the E-Switch to hardware. + * - ``inline-mode`` + - enum + - Some HWs need the VF driver to put part of the packet + headers on the TX descriptor so the e-switch can do proper + matching and steering. Support for both switchdev mode and legacy mode. + + * ``none`` none. + * ``link`` L2 mode. + * ``network`` L3 mode. + * ``transport`` L4 mode. + * - ``encap-mode`` + - enum + - The encapsulation mode of the device. Support for both switchdev mode + and legacy mode. The mode can be one of the following: + + * ``none`` Disable encapsulation support. + * ``basic`` Enable encapsulation support. + +Example Usage +============= + +.. code:: shell + + # enable switchdev mode + $ devlink dev eswitch set pci/0000:08:00.0 mode switchdev + + # set inline-mode and encap-mode + $ devlink dev eswitch set pci/0000:08:00.0 inline-mode none encap-mode basic + + # display devlink device eswitch attributes + $ devlink dev eswitch show pci/0000:08:00.0 + pci/0000:08:00.0: mode switchdev inline-mode none encap-mode basic + + # enable encap-mode with legacy mode + $ devlink dev eswitch set pci/0000:08:00.0 mode legacy inline-mode none encap-mode basic diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst index e14d7a701b72b..948c8c44e233f 100644 --- a/Documentation/networking/devlink/index.rst +++ b/Documentation/networking/devlink/index.rst @@ -67,6 +67,7 @@ general. devlink-selftests devlink-trap devlink-linecard + devlink-eswitch-attr Driver-specific documentation ----------------------------- diff --git a/Documentation/networking/representors.rst b/Documentation/networking/representors.rst index decb39c19b9ed..5e23386f69687 100644 --- a/Documentation/networking/representors.rst +++ b/Documentation/networking/representors.rst @@ -1,4 +1,5 @@ .. SPDX-License-Identifier: GPL-2.0 +.. _representors: ============================= Network Function Representors diff --git a/Documentation/translations/zh_CN/mm/page_frags.rst b/Documentation/translations/zh_CN/mm/page_frags.rst index 20bd3fafdc8c9..a5b22486a9134 100644 --- a/Documentation/translations/zh_CN/mm/page_frags.rst +++ b/Documentation/translations/zh_CN/mm/page_frags.rst @@ -25,7 +25,7 @@ sk_buff->head使用,或者用于skb_shared_info的 “frags” 部分。 网络堆栈在每个CPU使用两个独立的缓存来处理碎片分配。netdev_alloc_cache被使用 netdev_alloc_frag和__netdev_alloc_skb调用的调用者使用。napi_alloc_cache -被调用__napi_alloc_frag和__napi_alloc_skb的调用者使用。这两个调用的主要区别是 +被调用__napi_alloc_frag和napi_alloc_skb的调用者使用。这两个调用的主要区别是 它们可能被调用的环境。“netdev” 前缀的函数可以在任何上下文中使用,因为这些函数 将禁用中断,而 ”napi“ 前缀的函数只可以在softirq上下文中使用。 diff --git a/MAINTAINERS b/MAINTAINERS index 6a233e1a3cf2e..68cda9678e738 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21665,6 +21665,7 @@ TEAM DRIVER M: Jiri Pirko L: netdev@vger.kernel.org S: Supported +F: Documentation/netlink/specs/team.yaml F: drivers/net/team/ F: include/linux/if_team.h F: include/uapi/linux/if_team.h @@ -23661,7 +23662,6 @@ F: drivers/scsi/vmw_pvscsi.c F: drivers/scsi/vmw_pvscsi.h VMWARE VIRTUAL PTP CLOCK DRIVER -M: Jeff Sipek R: Ajay Kaher R: Alexey Makhalov R: VMware PV-Drivers Reviewers diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi index f3a6da8b28901..5260c63db0078 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi @@ -944,6 +944,8 @@ ap_spi_fp: &spi10 { vddrf-supply = <&pp1300_l2c>; vddch0-supply = <&pp3300_l10c>; max-speed = <3200000>; + + qcom,local-bd-address-broken; }; }; diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 68cc9290cabe9..5bc6d0fa7498d 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -847,7 +847,7 @@ void af_alg_wmem_wakeup(struct sock *sk) wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLRDNORM | EPOLLRDBAND); - sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); + sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(af_alg_wmem_wakeup); @@ -914,7 +914,7 @@ static void af_alg_data_wakeup(struct sock *sk) wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | EPOLLRDNORM | EPOLLRDBAND); - sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); + sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT); rcu_read_unlock(); } diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index b40b32fa7f1c3..19cfc342fc7bb 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -826,11 +826,15 @@ EXPORT_SYMBOL_GPL(qca_uart_setup); int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) { + bdaddr_t bdaddr_swapped; struct sk_buff *skb; int err; - skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6, bdaddr, - HCI_EV_VENDOR, HCI_INIT_TIMEOUT); + baswap(&bdaddr_swapped, bdaddr); + + skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6, + &bdaddr_swapped, HCI_EV_VENDOR, + HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { err = PTR_ERR(skb); bt_dev_err(hdev, "QCA Change address cmd failed (%d)", err); diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 8a60ad7acd705..ecbc52eaf1010 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -7,7 +7,6 @@ * * Copyright (C) 2007 Texas Instruments, Inc. * Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved. - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. * * Acknowledgements: * This file is based on hci_ll.c, which was... @@ -226,6 +225,7 @@ struct qca_serdev { struct qca_power *bt_power; u32 init_speed; u32 oper_speed; + bool bdaddr_property_broken; const char *firmware_name; }; @@ -1843,6 +1843,7 @@ static int qca_setup(struct hci_uart *hu) const char *firmware_name = qca_get_firmware_name(hu); int ret; struct qca_btsoc_version ver; + struct qca_serdev *qcadev; const char *soc_name; ret = qca_check_speeds(hu); @@ -1904,16 +1905,11 @@ static int qca_setup(struct hci_uart *hu) case QCA_WCN6750: case QCA_WCN6855: case QCA_WCN7850: + set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); - /* Set BDA quirk bit for reading BDA value from fwnode property - * only if that property exist in DT. - */ - if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) { - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); - bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later"); - } else { - bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA"); - } + qcadev = serdev_device_get_drvdata(hu->serdev); + if (qcadev->bdaddr_property_broken) + set_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks); hci_set_aosp_capable(hdev); @@ -2295,6 +2291,9 @@ static int qca_serdev_probe(struct serdev_device *serdev) if (!qcadev->oper_speed) BT_DBG("UART will pick default operating speed"); + qcadev->bdaddr_property_broken = device_property_read_bool(&serdev->dev, + "qcom,local-bd-address-broken"); + if (data) qcadev->btsoc_type = data->soc_type; else diff --git a/drivers/net/dsa/hirschmann/hellcreek_ptp.c b/drivers/net/dsa/hirschmann/hellcreek_ptp.c index 5249a1c2a80b8..bfe21f9f7dcd3 100644 --- a/drivers/net/dsa/hirschmann/hellcreek_ptp.c +++ b/drivers/net/dsa/hirschmann/hellcreek_ptp.c @@ -27,7 +27,8 @@ void hellcreek_ptp_write(struct hellcreek *hellcreek, u16 data, } /* Get nanoseconds from PTP clock */ -static u64 hellcreek_ptp_clock_read(struct hellcreek *hellcreek) +static u64 hellcreek_ptp_clock_read(struct hellcreek *hellcreek, + struct ptp_system_timestamp *sts) { u16 nsl, nsh; @@ -45,16 +46,19 @@ static u64 hellcreek_ptp_clock_read(struct hellcreek *hellcreek) nsh = hellcreek_ptp_read(hellcreek, PR_SS_SYNC_DATA_C); nsh = hellcreek_ptp_read(hellcreek, PR_SS_SYNC_DATA_C); nsh = hellcreek_ptp_read(hellcreek, PR_SS_SYNC_DATA_C); + ptp_read_system_prets(sts); nsl = hellcreek_ptp_read(hellcreek, PR_SS_SYNC_DATA_C); + ptp_read_system_postts(sts); return (u64)nsl | ((u64)nsh << 16); } -static u64 __hellcreek_ptp_gettime(struct hellcreek *hellcreek) +static u64 __hellcreek_ptp_gettime(struct hellcreek *hellcreek, + struct ptp_system_timestamp *sts) { u64 ns; - ns = hellcreek_ptp_clock_read(hellcreek); + ns = hellcreek_ptp_clock_read(hellcreek, sts); if (ns < hellcreek->last_ts) hellcreek->seconds++; hellcreek->last_ts = ns; @@ -72,7 +76,7 @@ u64 hellcreek_ptp_gettime_seconds(struct hellcreek *hellcreek, u64 ns) { u64 s; - __hellcreek_ptp_gettime(hellcreek); + __hellcreek_ptp_gettime(hellcreek, NULL); if (hellcreek->last_ts > ns) s = hellcreek->seconds * NSEC_PER_SEC; else @@ -81,14 +85,15 @@ u64 hellcreek_ptp_gettime_seconds(struct hellcreek *hellcreek, u64 ns) return s; } -static int hellcreek_ptp_gettime(struct ptp_clock_info *ptp, - struct timespec64 *ts) +static int hellcreek_ptp_gettimex(struct ptp_clock_info *ptp, + struct timespec64 *ts, + struct ptp_system_timestamp *sts) { struct hellcreek *hellcreek = ptp_to_hellcreek(ptp); u64 ns; mutex_lock(&hellcreek->ptp_lock); - ns = __hellcreek_ptp_gettime(hellcreek); + ns = __hellcreek_ptp_gettime(hellcreek, sts); mutex_unlock(&hellcreek->ptp_lock); *ts = ns_to_timespec64(ns); @@ -184,7 +189,7 @@ static int hellcreek_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) if (abs(delta) > MAX_SLOW_OFFSET_ADJ) { struct timespec64 now, then = ns_to_timespec64(delta); - hellcreek_ptp_gettime(ptp, &now); + hellcreek_ptp_gettimex(ptp, &now, NULL); now = timespec64_add(now, then); hellcreek_ptp_settime(ptp, &now); @@ -233,7 +238,7 @@ static void hellcreek_ptp_overflow_check(struct work_struct *work) hellcreek = dw_overflow_to_hellcreek(dw); mutex_lock(&hellcreek->ptp_lock); - __hellcreek_ptp_gettime(hellcreek); + __hellcreek_ptp_gettime(hellcreek, NULL); mutex_unlock(&hellcreek->ptp_lock); schedule_delayed_work(&hellcreek->overflow_work, @@ -409,7 +414,7 @@ int hellcreek_ptp_setup(struct hellcreek *hellcreek) hellcreek->ptp_clock_info.pps = 0; hellcreek->ptp_clock_info.adjfine = hellcreek_ptp_adjfine; hellcreek->ptp_clock_info.adjtime = hellcreek_ptp_adjtime; - hellcreek->ptp_clock_info.gettime64 = hellcreek_ptp_gettime; + hellcreek->ptp_clock_info.gettimex64 = hellcreek_ptp_gettimex; hellcreek->ptp_clock_info.settime64 = hellcreek_ptp_settime; hellcreek->ptp_clock_info.enable = hellcreek_ptp_enable; hellcreek->ptp_clock_info.do_aux_work = hellcreek_hwtstamp_work; diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 9ed1821184ece..c95787cb90867 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -5503,8 +5503,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .family = MV88E6XXX_FAMILY_6250, .name = "Marvell 88E6020", .num_databases = 64, - .num_ports = 4, + /* Ports 2-4 are not routed to pins + * => usable ports 0, 1, 5, 6 + */ + .num_ports = 7, .num_internal_phys = 2, + .invalid_port_mask = BIT(2) | BIT(3) | BIT(4), .max_vid = 4095, .port_base_addr = 0x8, .phy_base_addr = 0x0, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 493b724848c8f..b4db4b1aaffbf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4241,6 +4241,7 @@ static void bnxt_init_vnics(struct bnxt *bp) int j; vnic->fw_vnic_id = INVALID_HW_RING_ID; + vnic->vnic_id = i; for (j = 0; j < BNXT_MAX_CTX_PER_VNIC; j++) vnic->fw_rss_cos_lb_ctx[j] = INVALID_HW_RING_ID; @@ -5788,8 +5789,22 @@ void bnxt_fill_ipv6_mask(__be32 mask[4]) static void bnxt_cfg_rfs_ring_tbl_idx(struct bnxt *bp, struct hwrm_cfa_ntuple_filter_alloc_input *req, - u16 rxq) + struct bnxt_ntuple_filter *fltr) { + struct bnxt_rss_ctx *rss_ctx, *tmp; + u16 rxq = fltr->base.rxq; + + if (fltr->base.flags & BNXT_ACT_RSS_CTX) { + list_for_each_entry_safe(rss_ctx, tmp, &bp->rss_ctx_list, list) { + if (rss_ctx->index == fltr->base.fw_vnic_id) { + struct bnxt_vnic_info *vnic = &rss_ctx->vnic; + + req->dst_id = cpu_to_le16(vnic->fw_vnic_id); + break; + } + } + return; + } if (BNXT_SUPPORTS_NTUPLE_VNIC(bp)) { struct bnxt_vnic_info *vnic; u32 enables; @@ -5830,7 +5845,7 @@ int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp, req->flags = cpu_to_le32(CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_DROP); } else if (bp->fw_cap & BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V2) { - bnxt_cfg_rfs_ring_tbl_idx(bp, req, fltr->base.rxq); + bnxt_cfg_rfs_ring_tbl_idx(bp, req, fltr); } else { vnic = &bp->vnic_info[fltr->base.rxq + 1]; req->dst_id = cpu_to_le16(vnic->fw_vnic_id); @@ -5938,9 +5953,9 @@ static void bnxt_hwrm_vnic_update_tunl_tpa(struct bnxt *bp, req->tnl_tpa_en_bitmap = cpu_to_le32(tunl_tpa_bmap); } -static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags) +int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, struct bnxt_vnic_info *vnic, + u32 tpa_flags) { - struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; u16 max_aggs = VNIC_TPA_CFG_REQ_MAX_AGGS_MAX; struct hwrm_vnic_tpa_cfg_input *req; int rc; @@ -6025,9 +6040,10 @@ static u16 bnxt_cp_ring_for_tx(struct bnxt *bp, struct bnxt_tx_ring_info *txr) return bnxt_cp_ring_from_grp(bp, &txr->tx_ring_struct); } -static int bnxt_alloc_rss_indir_tbl(struct bnxt *bp) +int bnxt_alloc_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx) { int entries; + u16 *tbl; if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) entries = BNXT_MAX_RSS_TABLE_ENTRIES_P5; @@ -6035,16 +6051,22 @@ static int bnxt_alloc_rss_indir_tbl(struct bnxt *bp) entries = HW_HASH_INDEX_SIZE; bp->rss_indir_tbl_entries = entries; - bp->rss_indir_tbl = kmalloc_array(entries, sizeof(*bp->rss_indir_tbl), - GFP_KERNEL); - if (!bp->rss_indir_tbl) + tbl = kmalloc_array(entries, sizeof(*bp->rss_indir_tbl), GFP_KERNEL); + if (!tbl) return -ENOMEM; + + if (rss_ctx) + rss_ctx->rss_indir_tbl = tbl; + else + bp->rss_indir_tbl = tbl; + return 0; } -static void bnxt_set_dflt_rss_indir_tbl(struct bnxt *bp) +void bnxt_set_dflt_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx) { u16 max_rings, max_entries, pad, i; + u16 *rss_indir_tbl; if (!bp->rx_nr_rings) return; @@ -6055,13 +6077,17 @@ static void bnxt_set_dflt_rss_indir_tbl(struct bnxt *bp) max_rings = bp->rx_nr_rings; max_entries = bnxt_get_rxfh_indir_size(bp->dev); + if (rss_ctx) + rss_indir_tbl = &rss_ctx->rss_indir_tbl[0]; + else + rss_indir_tbl = &bp->rss_indir_tbl[0]; for (i = 0; i < max_entries; i++) - bp->rss_indir_tbl[i] = ethtool_rxfh_indir_default(i, max_rings); + rss_indir_tbl[i] = ethtool_rxfh_indir_default(i, max_rings); pad = bp->rss_indir_tbl_entries - max_entries; if (pad) - memset(&bp->rss_indir_tbl[i], 0, pad * sizeof(u16)); + memset(&rss_indir_tbl[i], 0, pad * sizeof(u16)); } static u16 bnxt_get_max_rss_ring(struct bnxt *bp) @@ -6117,6 +6143,8 @@ static void bnxt_fill_hw_rss_tbl_p5(struct bnxt *bp, if (vnic->flags & BNXT_VNIC_NTUPLE_FLAG) j = ethtool_rxfh_indir_default(i, bp->rx_nr_rings); + else if (vnic->flags & BNXT_VNIC_RSSCTX_FLAG) + j = vnic->rss_ctx->rss_indir_tbl[i]; else j = bp->rss_indir_tbl[i]; rxr = &bp->rx_ring[j]; @@ -6154,9 +6182,9 @@ __bnxt_hwrm_vnic_set_rss(struct bnxt *bp, struct hwrm_vnic_rss_cfg_input *req, req->hash_key_tbl_addr = cpu_to_le64(vnic->rss_hash_key_dma_addr); } -static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, u16 vnic_id, bool set_rss) +static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, struct bnxt_vnic_info *vnic, + bool set_rss) { - struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; struct hwrm_vnic_rss_cfg_input *req; int rc; @@ -6174,9 +6202,9 @@ static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, u16 vnic_id, bool set_rss) return hwrm_req_send(bp, req); } -static int bnxt_hwrm_vnic_set_rss_p5(struct bnxt *bp, u16 vnic_id, bool set_rss) +static int bnxt_hwrm_vnic_set_rss_p5(struct bnxt *bp, + struct bnxt_vnic_info *vnic, bool set_rss) { - struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; struct hwrm_vnic_rss_cfg_input *req; dma_addr_t ring_tbl_map; u32 i, nr_ctxs; @@ -6229,9 +6257,8 @@ static void bnxt_hwrm_update_rss_hash_cfg(struct bnxt *bp) hwrm_req_drop(bp, req); } -static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, u16 vnic_id) +static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, struct bnxt_vnic_info *vnic) { - struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; struct hwrm_vnic_plcmodes_cfg_input *req; int rc; @@ -6256,7 +6283,8 @@ static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, u16 vnic_id) return hwrm_req_send(bp, req); } -static void bnxt_hwrm_vnic_ctx_free_one(struct bnxt *bp, u16 vnic_id, +static void bnxt_hwrm_vnic_ctx_free_one(struct bnxt *bp, + struct bnxt_vnic_info *vnic, u16 ctx_idx) { struct hwrm_vnic_rss_cos_lb_ctx_free_input *req; @@ -6265,10 +6293,10 @@ static void bnxt_hwrm_vnic_ctx_free_one(struct bnxt *bp, u16 vnic_id, return; req->rss_cos_lb_ctx_id = - cpu_to_le16(bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[ctx_idx]); + cpu_to_le16(vnic->fw_rss_cos_lb_ctx[ctx_idx]); hwrm_req_send(bp, req); - bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[ctx_idx] = INVALID_HW_RING_ID; + vnic->fw_rss_cos_lb_ctx[ctx_idx] = INVALID_HW_RING_ID; } static void bnxt_hwrm_vnic_ctx_free(struct bnxt *bp) @@ -6280,13 +6308,14 @@ static void bnxt_hwrm_vnic_ctx_free(struct bnxt *bp) for (j = 0; j < BNXT_MAX_CTX_PER_VNIC; j++) { if (vnic->fw_rss_cos_lb_ctx[j] != INVALID_HW_RING_ID) - bnxt_hwrm_vnic_ctx_free_one(bp, i, j); + bnxt_hwrm_vnic_ctx_free_one(bp, vnic, j); } } bp->rsscos_nr_ctxs = 0; } -static int bnxt_hwrm_vnic_ctx_alloc(struct bnxt *bp, u16 vnic_id, u16 ctx_idx) +static int bnxt_hwrm_vnic_ctx_alloc(struct bnxt *bp, + struct bnxt_vnic_info *vnic, u16 ctx_idx) { struct hwrm_vnic_rss_cos_lb_ctx_alloc_output *resp; struct hwrm_vnic_rss_cos_lb_ctx_alloc_input *req; @@ -6299,7 +6328,7 @@ static int bnxt_hwrm_vnic_ctx_alloc(struct bnxt *bp, u16 vnic_id, u16 ctx_idx) resp = hwrm_req_hold(bp, req); rc = hwrm_req_send(bp, req); if (!rc) - bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[ctx_idx] = + vnic->fw_rss_cos_lb_ctx[ctx_idx] = le16_to_cpu(resp->rss_cos_lb_ctx_id); hwrm_req_drop(bp, req); @@ -6313,10 +6342,9 @@ static u32 bnxt_get_roce_vnic_mode(struct bnxt *bp) return VNIC_CFG_REQ_FLAGS_ROCE_DUAL_VNIC_MODE; } -int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id) +int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic) { struct bnxt_vnic_info *vnic0 = &bp->vnic_info[BNXT_VNIC_DEFAULT]; - struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; struct hwrm_vnic_cfg_input *req; unsigned int ring = 0, grp_idx; u16 def_vlan = 0; @@ -6364,8 +6392,8 @@ int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id) if (vnic->flags & BNXT_VNIC_RSS_FLAG) ring = 0; else if (vnic->flags & BNXT_VNIC_RFS_FLAG) - ring = vnic_id - 1; - else if ((vnic_id == 1) && BNXT_CHIP_TYPE_NITRO_A0(bp)) + ring = vnic->vnic_id - 1; + else if ((vnic->vnic_id == 1) && BNXT_CHIP_TYPE_NITRO_A0(bp)) ring = bp->rx_nr_rings - 1; grp_idx = bp->rx_ring[ring].bnapi->index; @@ -6381,25 +6409,25 @@ int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id) #endif if ((bp->flags & BNXT_FLAG_STRIP_VLAN) || def_vlan) req->flags |= cpu_to_le32(VNIC_CFG_REQ_FLAGS_VLAN_STRIP_MODE); - if (!vnic_id && bnxt_ulp_registered(bp->edev)) + if (vnic->vnic_id == BNXT_VNIC_DEFAULT && bnxt_ulp_registered(bp->edev)) req->flags |= cpu_to_le32(bnxt_get_roce_vnic_mode(bp)); return hwrm_req_send(bp, req); } -static void bnxt_hwrm_vnic_free_one(struct bnxt *bp, u16 vnic_id) +static void bnxt_hwrm_vnic_free_one(struct bnxt *bp, + struct bnxt_vnic_info *vnic) { - if (bp->vnic_info[vnic_id].fw_vnic_id != INVALID_HW_RING_ID) { + if (vnic->fw_vnic_id != INVALID_HW_RING_ID) { struct hwrm_vnic_free_input *req; if (hwrm_req_init(bp, req, HWRM_VNIC_FREE)) return; - req->vnic_id = - cpu_to_le32(bp->vnic_info[vnic_id].fw_vnic_id); + req->vnic_id = cpu_to_le32(vnic->fw_vnic_id); hwrm_req_send(bp, req); - bp->vnic_info[vnic_id].fw_vnic_id = INVALID_HW_RING_ID; + vnic->fw_vnic_id = INVALID_HW_RING_ID; } } @@ -6408,15 +6436,14 @@ static void bnxt_hwrm_vnic_free(struct bnxt *bp) u16 i; for (i = 0; i < bp->nr_vnics; i++) - bnxt_hwrm_vnic_free_one(bp, i); + bnxt_hwrm_vnic_free_one(bp, &bp->vnic_info[i]); } -static int bnxt_hwrm_vnic_alloc(struct bnxt *bp, u16 vnic_id, - unsigned int start_rx_ring_idx, - unsigned int nr_rings) +int bnxt_hwrm_vnic_alloc(struct bnxt *bp, struct bnxt_vnic_info *vnic, + unsigned int start_rx_ring_idx, + unsigned int nr_rings) { unsigned int i, j, grp_idx, end_idx = start_rx_ring_idx + nr_rings; - struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; struct hwrm_vnic_alloc_output *resp; struct hwrm_vnic_alloc_input *req; int rc; @@ -6442,7 +6469,7 @@ static int bnxt_hwrm_vnic_alloc(struct bnxt *bp, u16 vnic_id, vnic_no_ring_grps: for (i = 0; i < BNXT_MAX_CTX_PER_VNIC; i++) vnic->fw_rss_cos_lb_ctx[i] = INVALID_HW_RING_ID; - if (vnic_id == BNXT_VNIC_DEFAULT) + if (vnic->vnic_id == BNXT_VNIC_DEFAULT) req->flags = cpu_to_le32(VNIC_ALLOC_REQ_FLAGS_DEFAULT); resp = hwrm_req_hold(bp, req); @@ -7341,7 +7368,7 @@ static void bnxt_check_rss_tbl_no_rmgr(struct bnxt *bp) if (hw_resc->resv_rx_rings != bp->rx_nr_rings) { hw_resc->resv_rx_rings = bp->rx_nr_rings; if (!netif_is_rxfh_configured(bp->dev)) - bnxt_set_dflt_rss_indir_tbl(bp); + bnxt_set_dflt_rss_indir_tbl(bp, NULL); } } @@ -7349,7 +7376,7 @@ static int bnxt_get_total_vnics(struct bnxt *bp, int rx_rings) { if (bp->flags & BNXT_FLAG_RFS) { if (BNXT_SUPPORTS_NTUPLE_VNIC(bp)) - return 2; + return 2 + bp->num_rss_ctx; if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) return rx_rings + 1; } @@ -7497,7 +7524,7 @@ static int __bnxt_reserve_rings(struct bnxt *bp) return -ENOMEM; if (!netif_is_rxfh_configured(bp->dev)) - bnxt_set_dflt_rss_indir_tbl(bp); + bnxt_set_dflt_rss_indir_tbl(bp, NULL); return rc; } @@ -9676,7 +9703,7 @@ static int bnxt_set_tpa(struct bnxt *bp, bool set_tpa) else if (BNXT_NO_FW_ACCESS(bp)) return 0; for (i = 0; i < bp->nr_vnics; i++) { - rc = bnxt_hwrm_vnic_set_tpa(bp, i, tpa_flags); + rc = bnxt_hwrm_vnic_set_tpa(bp, &bp->vnic_info[i], tpa_flags); if (rc) { netdev_err(bp->dev, "hwrm vnic set tpa failure rc for vnic %d: %x\n", i, rc); @@ -9691,7 +9718,7 @@ static void bnxt_hwrm_clear_vnic_rss(struct bnxt *bp) int i; for (i = 0; i < bp->nr_vnics; i++) - bnxt_hwrm_vnic_set_rss(bp, i, false); + bnxt_hwrm_vnic_set_rss(bp, &bp->vnic_info[i], false); } static void bnxt_clear_vnic(struct bnxt *bp) @@ -9769,28 +9796,27 @@ static int bnxt_hwrm_set_cache_line_size(struct bnxt *bp, int size) return hwrm_req_send(bp, req); } -static int __bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id) +static int __bnxt_setup_vnic(struct bnxt *bp, struct bnxt_vnic_info *vnic) { - struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; int rc; if (vnic->flags & BNXT_VNIC_RFS_NEW_RSS_FLAG) goto skip_rss_ctx; /* allocate context for vnic */ - rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic_id, 0); + rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic, 0); if (rc) { netdev_err(bp->dev, "hwrm vnic %d alloc failure rc: %x\n", - vnic_id, rc); + vnic->vnic_id, rc); goto vnic_setup_err; } bp->rsscos_nr_ctxs++; if (BNXT_CHIP_TYPE_NITRO_A0(bp)) { - rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic_id, 1); + rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic, 1); if (rc) { netdev_err(bp->dev, "hwrm vnic %d cos ctx alloc failure rc: %x\n", - vnic_id, rc); + vnic->vnic_id, rc); goto vnic_setup_err; } bp->rsscos_nr_ctxs++; @@ -9798,26 +9824,26 @@ static int __bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id) skip_rss_ctx: /* configure default vnic, ring grp */ - rc = bnxt_hwrm_vnic_cfg(bp, vnic_id); + rc = bnxt_hwrm_vnic_cfg(bp, vnic); if (rc) { netdev_err(bp->dev, "hwrm vnic %d cfg failure rc: %x\n", - vnic_id, rc); + vnic->vnic_id, rc); goto vnic_setup_err; } /* Enable RSS hashing on vnic */ - rc = bnxt_hwrm_vnic_set_rss(bp, vnic_id, true); + rc = bnxt_hwrm_vnic_set_rss(bp, vnic, true); if (rc) { netdev_err(bp->dev, "hwrm vnic %d set rss failure rc: %x\n", - vnic_id, rc); + vnic->vnic_id, rc); goto vnic_setup_err; } if (bp->flags & BNXT_FLAG_AGG_RINGS) { - rc = bnxt_hwrm_vnic_set_hds(bp, vnic_id); + rc = bnxt_hwrm_vnic_set_hds(bp, vnic); if (rc) { netdev_err(bp->dev, "hwrm vnic %d set hds failure rc: %x\n", - vnic_id, rc); + vnic->vnic_id, rc); } } @@ -9825,16 +9851,33 @@ static int __bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id) return rc; } -static int __bnxt_setup_vnic_p5(struct bnxt *bp, u16 vnic_id) +int bnxt_hwrm_vnic_rss_cfg_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic) +{ + int rc; + + rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic, true); + if (rc) { + netdev_err(bp->dev, "hwrm vnic %d set rss failure rc: %d\n", + vnic->vnic_id, rc); + return rc; + } + rc = bnxt_hwrm_vnic_cfg(bp, vnic); + if (rc) + netdev_err(bp->dev, "hwrm vnic %d cfg failure rc: %x\n", + vnic->vnic_id, rc); + return rc; +} + +int __bnxt_setup_vnic_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic) { int rc, i, nr_ctxs; nr_ctxs = bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings); for (i = 0; i < nr_ctxs; i++) { - rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic_id, i); + rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic, i); if (rc) { netdev_err(bp->dev, "hwrm vnic %d ctx %d alloc failure rc: %x\n", - vnic_id, i, rc); + vnic->vnic_id, i, rc); break; } bp->rsscos_nr_ctxs++; @@ -9842,63 +9885,57 @@ static int __bnxt_setup_vnic_p5(struct bnxt *bp, u16 vnic_id) if (i < nr_ctxs) return -ENOMEM; - rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic_id, true); - if (rc) { - netdev_err(bp->dev, "hwrm vnic %d set rss failure rc: %d\n", - vnic_id, rc); - return rc; - } - rc = bnxt_hwrm_vnic_cfg(bp, vnic_id); - if (rc) { - netdev_err(bp->dev, "hwrm vnic %d cfg failure rc: %x\n", - vnic_id, rc); + rc = bnxt_hwrm_vnic_rss_cfg_p5(bp, vnic); + if (rc) return rc; - } + if (bp->flags & BNXT_FLAG_AGG_RINGS) { - rc = bnxt_hwrm_vnic_set_hds(bp, vnic_id); + rc = bnxt_hwrm_vnic_set_hds(bp, vnic); if (rc) { netdev_err(bp->dev, "hwrm vnic %d set hds failure rc: %x\n", - vnic_id, rc); + vnic->vnic_id, rc); } } return rc; } -static int bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id) +static int bnxt_setup_vnic(struct bnxt *bp, struct bnxt_vnic_info *vnic) { if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) - return __bnxt_setup_vnic_p5(bp, vnic_id); + return __bnxt_setup_vnic_p5(bp, vnic); else - return __bnxt_setup_vnic(bp, vnic_id); + return __bnxt_setup_vnic(bp, vnic); } -static int bnxt_alloc_and_setup_vnic(struct bnxt *bp, u16 vnic_id, +static int bnxt_alloc_and_setup_vnic(struct bnxt *bp, + struct bnxt_vnic_info *vnic, u16 start_rx_ring_idx, int rx_rings) { int rc; - rc = bnxt_hwrm_vnic_alloc(bp, vnic_id, start_rx_ring_idx, rx_rings); + rc = bnxt_hwrm_vnic_alloc(bp, vnic, start_rx_ring_idx, rx_rings); if (rc) { netdev_err(bp->dev, "hwrm vnic %d alloc failure rc: %x\n", - vnic_id, rc); + vnic->vnic_id, rc); return rc; } - return bnxt_setup_vnic(bp, vnic_id); + return bnxt_setup_vnic(bp, vnic); } static int bnxt_alloc_rfs_vnics(struct bnxt *bp) { + struct bnxt_vnic_info *vnic; int i, rc = 0; - if (BNXT_SUPPORTS_NTUPLE_VNIC(bp)) - return bnxt_alloc_and_setup_vnic(bp, BNXT_VNIC_NTUPLE, 0, - bp->rx_nr_rings); + if (BNXT_SUPPORTS_NTUPLE_VNIC(bp)) { + vnic = &bp->vnic_info[BNXT_VNIC_NTUPLE]; + return bnxt_alloc_and_setup_vnic(bp, vnic, 0, bp->rx_nr_rings); + } if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) return 0; for (i = 0; i < bp->rx_nr_rings; i++) { - struct bnxt_vnic_info *vnic; u16 vnic_id = i + 1; u16 ring_id = i; @@ -9909,12 +9946,104 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp) vnic->flags |= BNXT_VNIC_RFS_FLAG; if (bp->rss_cap & BNXT_RSS_CAP_NEW_RSS_CAP) vnic->flags |= BNXT_VNIC_RFS_NEW_RSS_FLAG; - if (bnxt_alloc_and_setup_vnic(bp, vnic_id, ring_id, 1)) + if (bnxt_alloc_and_setup_vnic(bp, &bp->vnic_info[vnic_id], ring_id, 1)) break; } return rc; } +void bnxt_del_one_rss_ctx(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx, + bool all) +{ + struct bnxt_vnic_info *vnic = &rss_ctx->vnic; + struct bnxt_filter_base *usr_fltr, *tmp; + struct bnxt_ntuple_filter *ntp_fltr; + int i; + + bnxt_hwrm_vnic_free_one(bp, &rss_ctx->vnic); + for (i = 0; i < BNXT_MAX_CTX_PER_VNIC; i++) { + if (vnic->fw_rss_cos_lb_ctx[i] != INVALID_HW_RING_ID) + bnxt_hwrm_vnic_ctx_free_one(bp, vnic, i); + } + if (!all) + return; + + list_for_each_entry_safe(usr_fltr, tmp, &bp->usr_fltr_list, list) { + if ((usr_fltr->flags & BNXT_ACT_RSS_CTX) && + usr_fltr->fw_vnic_id == rss_ctx->index) { + ntp_fltr = container_of(usr_fltr, + struct bnxt_ntuple_filter, + base); + bnxt_hwrm_cfa_ntuple_filter_free(bp, ntp_fltr); + bnxt_del_ntp_filter(bp, ntp_fltr); + bnxt_del_one_usr_fltr(bp, usr_fltr); + } + } + + if (vnic->rss_table) + dma_free_coherent(&bp->pdev->dev, vnic->rss_table_size, + vnic->rss_table, + vnic->rss_table_dma_addr); + kfree(rss_ctx->rss_indir_tbl); + list_del(&rss_ctx->list); + bp->num_rss_ctx--; + clear_bit(rss_ctx->index, bp->rss_ctx_bmap); + kfree(rss_ctx); +} + +static void bnxt_hwrm_realloc_rss_ctx_vnic(struct bnxt *bp) +{ + bool set_tpa = !!(bp->flags & BNXT_FLAG_TPA); + struct bnxt_rss_ctx *rss_ctx, *tmp; + + list_for_each_entry_safe(rss_ctx, tmp, &bp->rss_ctx_list, list) { + struct bnxt_vnic_info *vnic = &rss_ctx->vnic; + + if (bnxt_hwrm_vnic_alloc(bp, vnic, 0, bp->rx_nr_rings) || + bnxt_hwrm_vnic_set_tpa(bp, vnic, set_tpa) || + __bnxt_setup_vnic_p5(bp, vnic)) { + netdev_err(bp->dev, "Failed to restore RSS ctx %d\n", + rss_ctx->index); + bnxt_del_one_rss_ctx(bp, rss_ctx, true); + } + } +} + +struct bnxt_rss_ctx *bnxt_alloc_rss_ctx(struct bnxt *bp) +{ + struct bnxt_rss_ctx *rss_ctx = NULL; + + rss_ctx = kzalloc(sizeof(*rss_ctx), GFP_KERNEL); + if (rss_ctx) { + rss_ctx->vnic.rss_ctx = rss_ctx; + list_add_tail(&rss_ctx->list, &bp->rss_ctx_list); + bp->num_rss_ctx++; + } + return rss_ctx; +} + +void bnxt_clear_rss_ctxs(struct bnxt *bp, bool all) +{ + struct bnxt_rss_ctx *rss_ctx, *tmp; + + list_for_each_entry_safe(rss_ctx, tmp, &bp->rss_ctx_list, list) + bnxt_del_one_rss_ctx(bp, rss_ctx, all); + + if (all) + bitmap_free(bp->rss_ctx_bmap); +} + +static void bnxt_init_multi_rss_ctx(struct bnxt *bp) +{ + bp->rss_ctx_bmap = bitmap_zalloc(BNXT_RSS_CTX_BMAP_LEN, GFP_KERNEL); + if (bp->rss_ctx_bmap) { + /* burn index 0 since we cannot have context 0 */ + __set_bit(0, bp->rss_ctx_bmap); + INIT_LIST_HEAD(&bp->rss_ctx_list); + bp->rss_cap |= BNXT_RSS_CAP_MULTI_RSS_CTX; + } +} + /* Allow PF, trusted VFs and VFs with default VLAN to be in promiscuous mode */ static bool bnxt_promisc_ok(struct bnxt *bp) { @@ -9927,16 +10056,17 @@ static bool bnxt_promisc_ok(struct bnxt *bp) static int bnxt_setup_nitroa0_vnic(struct bnxt *bp) { + struct bnxt_vnic_info *vnic = &bp->vnic_info[1]; unsigned int rc = 0; - rc = bnxt_hwrm_vnic_alloc(bp, 1, bp->rx_nr_rings - 1, 1); + rc = bnxt_hwrm_vnic_alloc(bp, vnic, bp->rx_nr_rings - 1, 1); if (rc) { netdev_err(bp->dev, "Cannot allocate special vnic for NS2 A0: %x\n", rc); return rc; } - rc = bnxt_hwrm_vnic_cfg(bp, 1); + rc = bnxt_hwrm_vnic_cfg(bp, vnic); if (rc) { netdev_err(bp->dev, "Cannot allocate special vnic for NS2 A0: %x\n", rc); @@ -9979,7 +10109,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) rx_nr_rings--; /* default vnic 0 */ - rc = bnxt_hwrm_vnic_alloc(bp, BNXT_VNIC_DEFAULT, 0, rx_nr_rings); + rc = bnxt_hwrm_vnic_alloc(bp, vnic, 0, rx_nr_rings); if (rc) { netdev_err(bp->dev, "hwrm vnic alloc failure rc: %x\n", rc); goto err_out; @@ -9988,7 +10118,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) if (BNXT_VF(bp)) bnxt_hwrm_func_qcfg(bp); - rc = bnxt_setup_vnic(bp, BNXT_VNIC_DEFAULT); + rc = bnxt_setup_vnic(bp, vnic); if (rc) goto err_out; if (bp->rss_cap & BNXT_RSS_CAP_RSS_HASH_TYPE_DELTA) @@ -11760,6 +11890,8 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) bnxt_vf_reps_open(bp); bnxt_ptp_init_rtc(bp, true); bnxt_ptp_cfg_tstamp_filters(bp); + if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) + bnxt_hwrm_realloc_rss_ctx_vnic(bp); bnxt_cfg_usr_fltrs(bp); return 0; @@ -11908,6 +12040,8 @@ static void __bnxt_close_nic(struct bnxt *bp, bool irq_re_init, while (bnxt_drv_busy(bp)) msleep(20); + if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) + bnxt_clear_rss_ctxs(bp, false); /* Flush rings and disable interrupts */ bnxt_shutdown_nic(bp, irq_re_init); @@ -12405,33 +12539,26 @@ static bool bnxt_rfs_supported(struct bnxt *bp) } /* If runtime conditions support RFS */ -static bool bnxt_rfs_capable(struct bnxt *bp) +bool bnxt_rfs_capable(struct bnxt *bp, bool new_rss_ctx) { struct bnxt_hw_rings hwr = {0}; int max_vnics, max_rss_ctxs; - hwr.rss_ctx = 1; - if (BNXT_SUPPORTS_NTUPLE_VNIC(bp)) { - /* 2 VNICS: default + Ntuple */ - hwr.vnic = 2; - hwr.rss_ctx = bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) * - hwr.vnic; - goto check_reserve_vnic; - } - if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) + if ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && + !BNXT_SUPPORTS_NTUPLE_VNIC(bp)) return bnxt_rfs_supported(bp); + if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp) || !bp->rx_nr_rings) return false; - hwr.vnic = 1 + bp->rx_nr_rings; -check_reserve_vnic: + hwr.grp = bp->rx_nr_rings; + hwr.vnic = bnxt_get_total_vnics(bp, bp->rx_nr_rings); + if (new_rss_ctx) + hwr.vnic++; + hwr.rss_ctx = bnxt_get_total_rss_ctxs(bp, &hwr); max_vnics = bnxt_get_max_func_vnics(bp); max_rss_ctxs = bnxt_get_max_func_rss_ctxs(bp); - if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && - !(bp->rss_cap & BNXT_RSS_CAP_NEW_RSS_CAP)) - hwr.rss_ctx = hwr.vnic; - if (hwr.vnic > max_vnics || hwr.rss_ctx > max_rss_ctxs) { if (bp->rx_nr_rings > 1) netdev_warn(bp->dev, @@ -12465,7 +12592,7 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev, struct bnxt *bp = netdev_priv(dev); netdev_features_t vlan_features; - if ((features & NETIF_F_NTUPLE) && !bnxt_rfs_capable(bp)) + if ((features & NETIF_F_NTUPLE) && !bnxt_rfs_capable(bp, false)) features &= ~NETIF_F_NTUPLE; if ((bp->flags & BNXT_FLAG_NO_AGG_RINGS) || bp->xdp_prog) @@ -13601,7 +13728,7 @@ static void bnxt_set_dflt_rfs(struct bnxt *bp) bp->flags &= ~BNXT_FLAG_RFS; if (bnxt_rfs_supported(bp)) { dev->hw_features |= NETIF_F_NTUPLE; - if (bnxt_rfs_capable(bp)) { + if (bnxt_rfs_capable(bp, false)) { bp->flags |= BNXT_FLAG_RFS; dev->features |= NETIF_F_NTUPLE; } @@ -14454,12 +14581,9 @@ static int bnxt_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, if (!br_spec) return -EINVAL; - nla_for_each_nested(attr, br_spec, rem) { + nla_for_each_nested_type(attr, IFLA_BRIDGE_MODE, br_spec, rem) { u16 mode; - if (nla_type(attr) != IFLA_BRIDGE_MODE) - continue; - mode = nla_get_u16(attr); if (mode == bp->br_mode) break; @@ -14601,6 +14725,8 @@ static void bnxt_remove_one(struct pci_dev *pdev) unregister_netdev(dev); bnxt_free_l2_filters(bp, true); bnxt_free_ntp_fltrs(bp, true); + if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) + bnxt_clear_rss_ctxs(bp, true); clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); /* Flush any pending tasks */ cancel_work_sync(&bp->sp_task); @@ -15059,7 +15185,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) bp->flags |= BNXT_FLAG_CHIP_P7; } - rc = bnxt_alloc_rss_indir_tbl(bp); + rc = bnxt_alloc_rss_indir_tbl(bp, NULL); if (rc) goto init_err_pci_clean; @@ -15212,6 +15338,9 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_LIST_HEAD(&bp->usr_fltr_list); + if (BNXT_SUPPORTS_NTUPLE_VNIC(bp)) + bnxt_init_multi_rss_ctx(bp); + rc = register_netdev(dev); if (rc) goto init_err_cleanup; @@ -15232,6 +15361,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) bnxt_clear_int_mode(bp); init_err_pci_clean: + if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) + bnxt_clear_rss_ctxs(bp, true); bnxt_hwrm_func_drv_unrgtr(bp); bnxt_free_hwrm_resources(bp); bnxt_hwmon_uninit(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index dd849e715c9ba..0640fcb57ef84 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1256,8 +1256,22 @@ struct bnxt_vnic_info { #define BNXT_VNIC_UCAST_FLAG 8 #define BNXT_VNIC_RFS_NEW_RSS_FLAG 0x10 #define BNXT_VNIC_NTUPLE_FLAG 0x20 +#define BNXT_VNIC_RSSCTX_FLAG 0x40 + struct bnxt_rss_ctx *rss_ctx; + u32 vnic_id; }; +struct bnxt_rss_ctx { + struct list_head list; + struct bnxt_vnic_info vnic; + u16 *rss_indir_tbl; + u8 index; +}; + +#define BNXT_MAX_ETH_RSS_CTX 32 +#define BNXT_RSS_CTX_BMAP_LEN (BNXT_MAX_ETH_RSS_CTX + 1) +#define BNXT_VNIC_ID_INVALID 0xffffffff + struct bnxt_hw_rings { int tx; int rx; @@ -1360,6 +1374,7 @@ struct bnxt_filter_base { #define BNXT_ACT_RING_DST 2 #define BNXT_ACT_FUNC_DST 4 #define BNXT_ACT_NO_AGING 8 +#define BNXT_ACT_RSS_CTX 0x10 u16 sw_id; u16 rxq; u16 fw_vnic_id; @@ -2227,6 +2242,9 @@ struct bnxt { /* grp_info indexed by completion ring index */ struct bnxt_ring_grp_info *grp_info; struct bnxt_vnic_info *vnic_info; + struct list_head rss_ctx_list; + unsigned long *rss_ctx_bmap; + u32 num_rss_ctx; int nr_vnics; u16 *rss_indir_tbl; u16 rss_indir_tbl_entries; @@ -2241,6 +2259,7 @@ struct bnxt { #define BNXT_RSS_CAP_AH_V6_RSS_CAP BIT(5) #define BNXT_RSS_CAP_ESP_V4_RSS_CAP BIT(6) #define BNXT_RSS_CAP_ESP_V6_RSS_CAP BIT(7) +#define BNXT_RSS_CAP_MULTI_RSS_CTX BIT(8) u8 rss_hash_key[HW_HASH_KEY_SIZE]; u8 rss_hash_key_valid:1; @@ -2340,6 +2359,10 @@ struct bnxt { #define BNXT_SUPPORTS_NTUPLE_VNIC(bp) \ (BNXT_PF(bp) && ((bp)->fw_cap & BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V3)) +#define BNXT_SUPPORTS_MULTI_RSS_CTX(bp) \ + (BNXT_PF(bp) && BNXT_SUPPORTS_NTUPLE_VNIC(bp) && \ + ((bp)->rss_cap & BNXT_RSS_CAP_MULTI_RSS_CTX)) + u32 hwrm_spec_code; u16 hwrm_cmd_seq; u16 hwrm_cmd_kong_seq; @@ -2693,9 +2716,16 @@ int bnxt_hwrm_cfa_ntuple_filter_free(struct bnxt *bp, struct bnxt_ntuple_filter *fltr); int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp, struct bnxt_ntuple_filter *fltr); +int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, struct bnxt_vnic_info *vnic, + u32 tpa_flags); void bnxt_fill_ipv6_mask(__be32 mask[4]); +int bnxt_alloc_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx); +void bnxt_set_dflt_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx); int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings); -int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id); +int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic); +int bnxt_hwrm_vnic_alloc(struct bnxt *bp, struct bnxt_vnic_info *vnic, + unsigned int start_rx_ring_idx, + unsigned int nr_rings); int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings); int bnxt_nq_rings_in_use(struct bnxt *bp); int bnxt_hwrm_set_coal(struct bnxt *); @@ -2721,6 +2751,12 @@ int bnxt_hwrm_free_wol_fltr(struct bnxt *bp); int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all); int bnxt_hwrm_func_qcaps(struct bnxt *bp); int bnxt_hwrm_fw_set_time(struct bnxt *); +int bnxt_hwrm_vnic_rss_cfg_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic); +int __bnxt_setup_vnic_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic); +void bnxt_del_one_rss_ctx(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx, + bool all); +struct bnxt_rss_ctx *bnxt_alloc_rss_ctx(struct bnxt *bp); +void bnxt_clear_rss_ctxs(struct bnxt *bp, bool all); int bnxt_open_nic(struct bnxt *, bool, bool); int bnxt_half_open_nic(struct bnxt *bp); void bnxt_half_close_nic(struct bnxt *bp); @@ -2728,6 +2764,7 @@ void bnxt_reenable_sriov(struct bnxt *bp); void bnxt_close_nic(struct bnxt *, bool, bool); void bnxt_get_ring_err_stats(struct bnxt *bp, struct bnxt_total_ring_err_stats *stats); +bool bnxt_rfs_capable(struct bnxt *bp, bool new_rss_ctx); int bnxt_dbg_hwrm_rd_reg(struct bnxt *bp, u32 reg_off, u16 num_words, u32 *reg_buf); void bnxt_fw_exception(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 1d240a27455a5..9c49f629d5657 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -969,6 +969,8 @@ static int bnxt_set_channels(struct net_device *dev, } bnxt_clear_usr_fltrs(bp, true); + if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) + bnxt_clear_rss_ctxs(bp, false); if (netif_running(dev)) { if (BNXT_PF(bp)) { /* TODO CHIMP_FW: Send message to all VF's @@ -1205,6 +1207,36 @@ static int bnxt_grxclsrule(struct bnxt *bp, struct ethtool_rxnfc *cmd) return rc; } +static struct bnxt_rss_ctx *bnxt_get_rss_ctx_from_index(struct bnxt *bp, + u32 index) +{ + struct bnxt_rss_ctx *rss_ctx, *tmp; + + list_for_each_entry_safe(rss_ctx, tmp, &bp->rss_ctx_list, list) + if (rss_ctx->index == index) + return rss_ctx; + return NULL; +} + +static int bnxt_alloc_rss_ctx_rss_table(struct bnxt *bp, + struct bnxt_rss_ctx *rss_ctx) +{ + int size = L1_CACHE_ALIGN(BNXT_MAX_RSS_TABLE_SIZE_P5); + struct bnxt_vnic_info *vnic = &rss_ctx->vnic; + + vnic->rss_table_size = size + HW_HASH_KEY_SIZE; + vnic->rss_table = dma_alloc_coherent(&bp->pdev->dev, + vnic->rss_table_size, + &vnic->rss_table_dma_addr, + GFP_KERNEL); + if (!vnic->rss_table) + return -ENOMEM; + + vnic->rss_hash_key = ((void *)vnic->rss_table) + size; + vnic->rss_hash_key_dma_addr = vnic->rss_table_dma_addr + size; + return 0; +} + static int bnxt_add_l2_cls_rule(struct bnxt *bp, struct ethtool_rx_flow_spec *fs) { @@ -1280,22 +1312,24 @@ static bool bnxt_verify_ntuple_ip6_flow(struct ethtool_usrip6_spec *ip_spec, } static int bnxt_add_ntuple_cls_rule(struct bnxt *bp, - struct ethtool_rx_flow_spec *fs) + struct ethtool_rxnfc *cmd) { - u8 vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie); - u32 ring = ethtool_get_flow_spec_ring(fs->ring_cookie); + struct ethtool_rx_flow_spec *fs = &cmd->fs; struct bnxt_ntuple_filter *new_fltr, *fltr; + u32 flow_type = fs->flow_type & 0xff; struct bnxt_l2_filter *l2_fltr; struct bnxt_flow_masks *fmasks; - u32 flow_type = fs->flow_type; struct flow_keys *fkeys; - u32 idx; + u32 idx, ring; int rc; + u8 vf; if (!bp->vnic_info) return -EAGAIN; - if ((flow_type & (FLOW_MAC_EXT | FLOW_EXT)) || vf) + vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie); + ring = ethtool_get_flow_spec_ring(fs->ring_cookie); + if ((fs->flow_type & (FLOW_MAC_EXT | FLOW_EXT)) || vf) return -EOPNOTSUPP; if (flow_type == IP_USER_FLOW) { @@ -1403,6 +1437,19 @@ static int bnxt_add_ntuple_cls_rule(struct bnxt *bp, rcu_read_unlock(); new_fltr->base.flags = BNXT_ACT_NO_AGING; + if (fs->flow_type & FLOW_RSS) { + struct bnxt_rss_ctx *rss_ctx; + + new_fltr->base.fw_vnic_id = 0; + new_fltr->base.flags |= BNXT_ACT_RSS_CTX; + rss_ctx = bnxt_get_rss_ctx_from_index(bp, cmd->rss_context); + if (rss_ctx) { + new_fltr->base.fw_vnic_id = rss_ctx->index; + } else { + rc = -EINVAL; + goto ntuple_err; + } + } if (fs->ring_cookie == RX_CLS_FLOW_DISC) new_fltr->base.flags |= BNXT_ACT_DROP; else @@ -1444,12 +1491,12 @@ static int bnxt_srxclsrlins(struct bnxt *bp, struct ethtool_rxnfc *cmd) flow_type == IPV6_USER_FLOW) && !(bp->fw_cap & BNXT_FW_CAP_CFA_NTUPLE_RX_EXT_IP_PROTO)) return -EOPNOTSUPP; - if (flow_type & (FLOW_MAC_EXT | FLOW_RSS)) + if (flow_type & FLOW_MAC_EXT) return -EINVAL; flow_type &= ~FLOW_EXT; if (fs->ring_cookie == RX_CLS_FLOW_DISC && flow_type != ETHER_FLOW) - return bnxt_add_ntuple_cls_rule(bp, fs); + return bnxt_add_ntuple_cls_rule(bp, cmd); ring = ethtool_get_flow_spec_ring(fs->ring_cookie); vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie); @@ -1463,7 +1510,7 @@ static int bnxt_srxclsrlins(struct bnxt *bp, struct ethtool_rxnfc *cmd) if (flow_type == ETHER_FLOW) rc = bnxt_add_l2_cls_rule(bp, fs); else - rc = bnxt_add_ntuple_cls_rule(bp, fs); + rc = bnxt_add_ntuple_cls_rule(bp, cmd); return rc; } @@ -1754,7 +1801,10 @@ static u32 bnxt_get_rxfh_key_size(struct net_device *dev) static int bnxt_get_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh) { + u32 rss_context = rxfh->rss_context; + struct bnxt_rss_ctx *rss_ctx = NULL; struct bnxt *bp = netdev_priv(dev); + u16 *indir_tbl = bp->rss_indir_tbl; struct bnxt_vnic_info *vnic; u32 i, tbl_size; @@ -1764,10 +1814,18 @@ static int bnxt_get_rxfh(struct net_device *dev, return 0; vnic = &bp->vnic_info[BNXT_VNIC_DEFAULT]; - if (rxfh->indir && bp->rss_indir_tbl) { + if (rxfh->rss_context) { + rss_ctx = bnxt_get_rss_ctx_from_index(bp, rss_context); + if (!rss_ctx) + return -EINVAL; + indir_tbl = rss_ctx->rss_indir_tbl; + vnic = &rss_ctx->vnic; + } + + if (rxfh->indir && indir_tbl) { tbl_size = bnxt_get_rxfh_indir_size(dev); for (i = 0; i < tbl_size; i++) - rxfh->indir[i] = bp->rss_indir_tbl[i]; + rxfh->indir[i] = indir_tbl[i]; } if (rxfh->key && vnic->rss_hash_key) @@ -1776,6 +1834,131 @@ static int bnxt_get_rxfh(struct net_device *dev, return 0; } +static void bnxt_modify_rss(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx, + struct ethtool_rxfh_param *rxfh) +{ + if (rxfh->key) { + if (rss_ctx) { + memcpy(rss_ctx->vnic.rss_hash_key, rxfh->key, + HW_HASH_KEY_SIZE); + } else { + memcpy(bp->rss_hash_key, rxfh->key, HW_HASH_KEY_SIZE); + bp->rss_hash_key_updated = true; + } + } + if (rxfh->indir) { + u32 i, pad, tbl_size = bnxt_get_rxfh_indir_size(bp->dev); + u16 *indir_tbl = bp->rss_indir_tbl; + + if (rss_ctx) + indir_tbl = rss_ctx->rss_indir_tbl; + for (i = 0; i < tbl_size; i++) + indir_tbl[i] = rxfh->indir[i]; + pad = bp->rss_indir_tbl_entries - tbl_size; + if (pad) + memset(&bp->rss_indir_tbl[i], 0, pad * sizeof(u16)); + } +} + +static int bnxt_set_rxfh_context(struct bnxt *bp, + struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) +{ + u32 *rss_context = &rxfh->rss_context; + struct bnxt_rss_ctx *rss_ctx; + struct bnxt_vnic_info *vnic; + bool modify = false; + int bit_id; + int rc; + + if (!BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) { + NL_SET_ERR_MSG_MOD(extack, "RSS contexts not supported"); + return -EOPNOTSUPP; + } + + if (*rss_context != ETH_RXFH_CONTEXT_ALLOC) { + rss_ctx = bnxt_get_rss_ctx_from_index(bp, *rss_context); + if (!rss_ctx) { + NL_SET_ERR_MSG_FMT_MOD(extack, "RSS context %u not found", + *rss_context); + return -EINVAL; + } + if (*rss_context && rxfh->rss_delete) { + bnxt_del_one_rss_ctx(bp, rss_ctx, true); + return 0; + } + modify = true; + vnic = &rss_ctx->vnic; + goto modify_context; + } + + if (bp->num_rss_ctx >= BNXT_MAX_ETH_RSS_CTX) { + NL_SET_ERR_MSG_FMT_MOD(extack, "Out of RSS contexts, maximum %u", + BNXT_MAX_ETH_RSS_CTX); + return -EINVAL; + } + + if (!bnxt_rfs_capable(bp, true)) { + NL_SET_ERR_MSG_MOD(extack, "Out hardware resources"); + return -ENOMEM; + } + + rss_ctx = bnxt_alloc_rss_ctx(bp); + if (!rss_ctx) + return -ENOMEM; + + vnic = &rss_ctx->vnic; + vnic->flags |= BNXT_VNIC_RSSCTX_FLAG; + vnic->vnic_id = BNXT_VNIC_ID_INVALID; + rc = bnxt_alloc_rss_ctx_rss_table(bp, rss_ctx); + if (rc) + goto out; + + rc = bnxt_alloc_rss_indir_tbl(bp, rss_ctx); + if (rc) + goto out; + + bnxt_set_dflt_rss_indir_tbl(bp, rss_ctx); + memcpy(vnic->rss_hash_key, bp->rss_hash_key, HW_HASH_KEY_SIZE); + + rc = bnxt_hwrm_vnic_alloc(bp, vnic, 0, bp->rx_nr_rings); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Unable to allocate VNIC"); + goto out; + } + + rc = bnxt_hwrm_vnic_set_tpa(bp, vnic, bp->flags & BNXT_FLAG_TPA); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Unable to setup TPA"); + goto out; + } +modify_context: + bnxt_modify_rss(bp, rss_ctx, rxfh); + + if (modify) + return bnxt_hwrm_vnic_rss_cfg_p5(bp, vnic); + + rc = __bnxt_setup_vnic_p5(bp, vnic); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Unable to setup TPA"); + goto out; + } + + bit_id = bitmap_find_free_region(bp->rss_ctx_bmap, + BNXT_RSS_CTX_BMAP_LEN, 0); + if (bit_id < 0) { + rc = -ENOMEM; + goto out; + } + rss_ctx->index = (u16)bit_id; + *rss_context = rss_ctx->index; + + return 0; +out: + bnxt_del_one_rss_ctx(bp, rss_ctx, true); + return rc; +} + static int bnxt_set_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh, struct netlink_ext_ack *extack) @@ -1786,20 +1969,11 @@ static int bnxt_set_rxfh(struct net_device *dev, if (rxfh->hfunc && rxfh->hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; - if (rxfh->key) { - memcpy(bp->rss_hash_key, rxfh->key, HW_HASH_KEY_SIZE); - bp->rss_hash_key_updated = true; - } + if (rxfh->rss_context) + return bnxt_set_rxfh_context(bp, rxfh, extack); - if (rxfh->indir) { - u32 i, pad, tbl_size = bnxt_get_rxfh_indir_size(dev); + bnxt_modify_rss(bp, NULL, rxfh); - for (i = 0; i < tbl_size; i++) - bp->rss_indir_tbl[i] = rxfh->indir[i]; - pad = bp->rss_indir_tbl_entries - tbl_size; - if (pad) - memset(&bp->rss_indir_tbl[i], 0, pad * sizeof(u16)); - } bnxt_clear_usr_fltrs(bp, false); if (netif_running(bp->dev)) { bnxt_close_nic(bp, false, false); @@ -5071,6 +5245,7 @@ void bnxt_ethtool_free(struct bnxt *bp) const struct ethtool_ops bnxt_ethtool_ops = { .cap_link_lanes_supported = 1, + .cap_rss_ctx_supported = 1, .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | ETHTOOL_COALESCE_USECS_IRQ | diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index cc07660330f53..345aac4484ee4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -109,7 +109,8 @@ static void bnxt_ptp_get_current_time(struct bnxt *bp) spin_unlock_bh(&ptp->ptp_lock); } -static int bnxt_hwrm_port_ts_query(struct bnxt *bp, u32 flags, u64 *ts) +static int bnxt_hwrm_port_ts_query(struct bnxt *bp, u32 flags, u64 *ts, + u32 txts_tmo) { struct hwrm_port_ts_query_output *resp; struct hwrm_port_ts_query_input *req; @@ -122,10 +123,15 @@ static int bnxt_hwrm_port_ts_query(struct bnxt *bp, u32 flags, u64 *ts) req->flags = cpu_to_le32(flags); if ((flags & PORT_TS_QUERY_REQ_FLAGS_PATH) == PORT_TS_QUERY_REQ_FLAGS_PATH_TX) { + u32 tmo_us = txts_tmo * 1000; + req->enables = cpu_to_le16(BNXT_PTP_QTS_TX_ENABLES); req->ptp_seq_id = cpu_to_le32(bp->ptp_cfg->tx_seqid); req->ptp_hdr_offset = cpu_to_le16(bp->ptp_cfg->tx_hdr_off); - req->ts_req_timeout = cpu_to_le16(BNXT_PTP_QTS_TIMEOUT); + if (!tmo_us) + tmo_us = BNXT_PTP_QTS_TIMEOUT; + tmo_us = min(tmo_us, BNXT_PTP_QTS_MAX_TMO_US); + req->ts_req_timeout = cpu_to_le16(txts_tmo); } resp = hwrm_req_hold(bp, req); @@ -672,10 +678,17 @@ static void bnxt_stamp_tx_skb(struct bnxt *bp, struct sk_buff *skb) { struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; struct skb_shared_hwtstamps timestamp; + unsigned long now = jiffies; u64 ts = 0, ns = 0; + u32 tmo = 0; int rc; - rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_PATH_TX, &ts); + if (!ptp->txts_pending) + ptp->abs_txts_tmo = now + msecs_to_jiffies(ptp->txts_tmo); + if (!time_after_eq(now, ptp->abs_txts_tmo)) + tmo = jiffies_to_msecs(ptp->abs_txts_tmo - now); + rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_PATH_TX, &ts, + tmo); if (!rc) { memset(×tamp, 0, sizeof(timestamp)); spin_lock_bh(&ptp->ptp_lock); @@ -684,6 +697,10 @@ static void bnxt_stamp_tx_skb(struct bnxt *bp, struct sk_buff *skb) timestamp.hwtstamp = ns_to_ktime(ns); skb_tstamp_tx(ptp->tx_skb, ×tamp); } else { + if (!time_after_eq(jiffies, ptp->abs_txts_tmo)) { + ptp->txts_pending = true; + return; + } netdev_warn_once(bp->dev, "TS query for TX timer failed rc = %x\n", rc); } @@ -691,6 +708,7 @@ static void bnxt_stamp_tx_skb(struct bnxt *bp, struct sk_buff *skb) dev_kfree_skb_any(ptp->tx_skb); ptp->tx_skb = NULL; atomic_inc(&ptp->tx_avail); + ptp->txts_pending = false; } static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info) @@ -714,6 +732,8 @@ static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info) spin_unlock_bh(&ptp->ptp_lock); ptp->next_overflow_check = now + BNXT_PHC_OVERFLOW_PERIOD; } + if (ptp->txts_pending) + return 0; return HZ; } @@ -891,7 +911,8 @@ int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg) if (rc) return rc; } else { - rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_CURRENT_TIME, &ns); + rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_CURRENT_TIME, + &ns, 0); if (rc) return rc; } @@ -965,6 +986,7 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg) spin_unlock_bh(&ptp->ptp_lock); ptp_schedule_worker(ptp->ptp_clock, 0); } + ptp->txts_tmo = BNXT_PTP_DFLT_TX_TMO; return 0; out: diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index fce8dc39a7d0e..6a2bba3f9e2dd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -22,7 +22,9 @@ #define BNXT_LO_TIMER_MASK 0x0000ffffffffUL #define BNXT_HI_TIMER_MASK 0xffff00000000UL +#define BNXT_PTP_DFLT_TX_TMO 1000 /* ms */ #define BNXT_PTP_QTS_TIMEOUT 1000 +#define BNXT_PTP_QTS_MAX_TMO_US 65535 #define BNXT_PTP_QTS_TX_ENABLES (PORT_TS_QUERY_REQ_ENABLES_PTP_SEQ_ID | \ PORT_TS_QUERY_REQ_ENABLES_TS_REQ_TIMEOUT | \ PORT_TS_QUERY_REQ_ENABLES_PTP_HDR_OFFSET) @@ -115,11 +117,14 @@ struct bnxt_ptp_cfg { BNXT_PTP_MSG_PDELAY_REQ | \ BNXT_PTP_MSG_PDELAY_RESP) u8 tx_tstamp_en:1; + u8 txts_pending:1; int rx_filter; u32 tstamp_filters; u32 refclk_regs[2]; u32 refclk_mapped_regs[2]; + u32 txts_tmo; + unsigned long abs_txts_tmo; }; #if BITS_PER_LONG == 32 diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index 93f9bd55020f2..86dcd2c76587b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -71,7 +71,7 @@ int bnxt_register_dev(struct bnxt_en_dev *edev, rcu_assign_pointer(ulp->ulp_ops, ulp_ops); if (test_bit(BNXT_STATE_OPEN, &bp->state)) - bnxt_hwrm_vnic_cfg(bp, 0); + bnxt_hwrm_vnic_cfg(bp, &bp->vnic_info[BNXT_VNIC_DEFAULT]); bnxt_fill_msix_vecs(bp, bp->edev->msix_entries); edev->flags |= BNXT_EN_FLAG_MSIX_REQUESTED; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index ad862ed7888ac..a8596ebcdfd60 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4982,10 +4982,7 @@ static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, if (!br_spec) return -EINVAL; - nla_for_each_nested(attr, br_spec, rem) { - if (nla_type(attr) != IFLA_BRIDGE_MODE) - continue; - + nla_for_each_nested_type(attr, IFLA_BRIDGE_MODE, br_spec, rem) { mode = nla_get_u16(attr); if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 888509cf1f210..40e8818295951 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -2896,11 +2896,14 @@ static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n, static int update_xps(struct dpaa2_eth_priv *priv) { struct net_device *net_dev = priv->net_dev; - struct cpumask xps_mask; - struct dpaa2_eth_fq *fq; int i, num_queues, netdev_queues; + struct dpaa2_eth_fq *fq; + cpumask_var_t xps_mask; int err = 0; + if (!alloc_cpumask_var(&xps_mask, GFP_KERNEL)) + return -ENOMEM; + num_queues = dpaa2_eth_queue_count(priv); netdev_queues = (net_dev->num_tc ? : 1) * num_queues; @@ -2910,16 +2913,17 @@ static int update_xps(struct dpaa2_eth_priv *priv) for (i = 0; i < netdev_queues; i++) { fq = &priv->fq[i % num_queues]; - cpumask_clear(&xps_mask); - cpumask_set_cpu(fq->target_cpu, &xps_mask); + cpumask_clear(xps_mask); + cpumask_set_cpu(fq->target_cpu, xps_mask); - err = netif_set_xps_queue(net_dev, &xps_mask, i); + err = netif_set_xps_queue(net_dev, xps_mask, i); if (err) { netdev_warn_once(net_dev, "Error setting XPS queue\n"); break; } } + free_cpumask_var(xps_mask); return err; } diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 9f07f4947b631..5c45f42232d32 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -2769,7 +2769,7 @@ static int enetc_setup_xdp_prog(struct net_device *ndev, struct bpf_prog *prog, if (priv->min_num_stack_tx_queues + num_xdp_tx_queues > priv->num_tx_rings) { NL_SET_ERR_MSG_FMT_MOD(extack, - "Reserving %d XDP TXQs does not leave a minimum of %d for stack (total %d)", + "Reserving %d XDP TXQs leaves under %d for stack (total %d)", num_xdp_tx_queues, priv->min_num_stack_tx_queues, priv->num_tx_rings); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index d7693fdf640d5..8bd213da8fb6f 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2454,8 +2454,6 @@ static int fec_enet_mii_probe(struct net_device *ndev) fep->link = 0; fep->full_duplex = 0; - phy_dev->mac_managed_pm = true; - phy_attached_info(phy_dev); return 0; @@ -2467,10 +2465,12 @@ static int fec_enet_mii_init(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct fec_enet_private *fep = netdev_priv(ndev); bool suppress_preamble = false; + struct phy_device *phydev; struct device_node *node; int err = -ENXIO; u32 mii_speed, holdtime; u32 bus_freq; + int addr; /* * The i.MX28 dual fec interfaces are not equal. @@ -2584,6 +2584,13 @@ static int fec_enet_mii_init(struct platform_device *pdev) goto err_out_free_mdiobus; of_node_put(node); + /* find all the PHY devices on the bus and set mac_managed_pm to true */ + for (addr = 0; addr < PHY_MAX_ADDR; addr++) { + phydev = mdiobus_get_phy(fep->mii_bus, addr); + if (phydev) + phydev->mac_managed_pm = true; + } + mii_cnt++; /* save fec0 mii_bus */ diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 9aebfb843d9d1..dbe05402d40b1 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -73,7 +73,7 @@ static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] = { "adminq_create_tx_queue_cnt", "adminq_create_rx_queue_cnt", "adminq_destroy_tx_queue_cnt", "adminq_destroy_rx_queue_cnt", "adminq_dcfg_device_resources_cnt", "adminq_set_driver_parameter_cnt", - "adminq_report_stats_cnt", "adminq_report_link_speed_cnt" + "adminq_report_stats_cnt", "adminq_report_link_speed_cnt", "adminq_get_ptype_map_cnt" }; static const char gve_gstrings_priv_flags[][ETH_GSTRING_LEN] = { @@ -428,6 +428,7 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = priv->adminq_set_driver_parameter_cnt; data[i++] = priv->adminq_report_stats_cnt; data[i++] = priv->adminq_report_link_speed_cnt; + data[i++] = priv->adminq_get_ptype_map_cnt; } static void gve_get_channels(struct net_device *netdev, diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 3fcb8daaa2437..9b068d40778db 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -3037,7 +3037,7 @@ static int __e100_power_off(struct pci_dev *pdev, bool wake) return 0; } -static int __maybe_unused e100_suspend(struct device *dev_d) +static int e100_suspend(struct device *dev_d) { bool wake; @@ -3046,7 +3046,7 @@ static int __maybe_unused e100_suspend(struct device *dev_d) return 0; } -static int __maybe_unused e100_resume(struct device *dev_d) +static int e100_resume(struct device *dev_d) { struct net_device *netdev = dev_get_drvdata(dev_d); struct nic *nic = netdev_priv(netdev); @@ -3163,7 +3163,7 @@ static const struct pci_error_handlers e100_err_handler = { .resume = e100_io_resume, }; -static SIMPLE_DEV_PM_OPS(e100_pm_ops, e100_suspend, e100_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(e100_pm_ops, e100_suspend, e100_resume); static struct pci_driver e100_driver = { .name = DRV_NAME, @@ -3172,7 +3172,7 @@ static struct pci_driver e100_driver = { .remove = e100_remove, /* Power Management hooks */ - .driver.pm = &e100_pm_ops, + .driver.pm = pm_sleep_ptr(&e100_pm_ops), .shutdown = e100_shutdown, .err_handler = &e100_err_handler, diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 1d1e93686af2b..5b43f9b194fc2 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -149,8 +149,8 @@ static int e1000_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid); static void e1000_restore_vlan(struct e1000_adapter *adapter); -static int __maybe_unused e1000_suspend(struct device *dev); -static int __maybe_unused e1000_resume(struct device *dev); +static int e1000_suspend(struct device *dev); +static int e1000_resume(struct device *dev); static void e1000_shutdown(struct pci_dev *pdev); #ifdef CONFIG_NET_POLL_CONTROLLER @@ -175,16 +175,14 @@ static const struct pci_error_handlers e1000_err_handler = { .resume = e1000_io_resume, }; -static SIMPLE_DEV_PM_OPS(e1000_pm_ops, e1000_suspend, e1000_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(e1000_pm_ops, e1000_suspend, e1000_resume); static struct pci_driver e1000_driver = { .name = e1000_driver_name, .id_table = e1000_pci_tbl, .probe = e1000_probe, .remove = e1000_remove, - .driver = { - .pm = &e1000_pm_ops, - }, + .driver.pm = pm_sleep_ptr(&e1000_pm_ops), .shutdown = e1000_shutdown, .err_handler = &e1000_err_handler }; @@ -5135,7 +5133,7 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake) return 0; } -static int __maybe_unused e1000_suspend(struct device *dev) +static int e1000_suspend(struct device *dev) { int retval; struct pci_dev *pdev = to_pci_dev(dev); @@ -5147,7 +5145,7 @@ static int __maybe_unused e1000_suspend(struct device *dev) return retval; } -static int __maybe_unused e1000_resume(struct device *dev) +static int e1000_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h index 1fef6bb5a5fbc..4b6e7536170ab 100644 --- a/drivers/net/ethernet/intel/e1000e/hw.h +++ b/drivers/net/ethernet/intel/e1000e/hw.h @@ -628,6 +628,7 @@ struct e1000_phy_info { u32 id; u32 reset_delay_us; /* in usec */ u32 revision; + u32 retry_count; enum e1000_media_type media_type; @@ -644,6 +645,7 @@ struct e1000_phy_info { bool polarity_correction; bool speed_downgraded; bool autoneg_wait_to_complete; + bool retry_enabled; }; struct e1000_nvm_info { diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 19e450a5bd314..f9e94be36e97f 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -222,11 +222,18 @@ static bool e1000_phy_is_accessible_pchlan(struct e1000_hw *hw) if (hw->mac.type >= e1000_pch_lpt) { /* Only unforce SMBus if ME is not active */ if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) { + /* Switching PHY interface always returns MDI error + * so disable retry mechanism to avoid wasting time + */ + e1000e_disable_phy_retry(hw); + /* Unforce SMBus mode in PHY */ e1e_rphy_locked(hw, CV_SMB_CTRL, &phy_reg); phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS; e1e_wphy_locked(hw, CV_SMB_CTRL, phy_reg); + e1000e_enable_phy_retry(hw); + /* Unforce SMBus mode in MAC */ mac_reg = er32(CTRL_EXT); mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS; @@ -310,6 +317,11 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) goto out; } + /* There is no guarantee that the PHY is accessible at this time + * so disable retry mechanism to avoid wasting time + */ + e1000e_disable_phy_retry(hw); + /* The MAC-PHY interconnect may be in SMBus mode. If the PHY is * inaccessible and resetting the PHY is not blocked, toggle the * LANPHYPC Value bit to force the interconnect to PCIe mode. @@ -380,6 +392,8 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) break; } + e1000e_enable_phy_retry(hw); + hw->phy.ops.release(hw); if (!ret_val) { @@ -449,6 +463,11 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) phy->id = e1000_phy_unknown; + if (hw->mac.type == e1000_pch_mtp) { + phy->retry_count = 2; + e1000e_enable_phy_retry(hw); + } + ret_val = e1000_init_phy_workarounds_pchlan(hw); if (ret_val) return ret_val; @@ -1146,18 +1165,6 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx) if (ret_val) goto out; - /* Force SMBus mode in PHY */ - ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg); - if (ret_val) - goto release; - phy_reg |= CV_SMB_CTRL_FORCE_SMBUS; - e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg); - - /* Force SMBus mode in MAC */ - mac_reg = er32(CTRL_EXT); - mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS; - ew32(CTRL_EXT, mac_reg); - /* Si workaround for ULP entry flow on i127/rev6 h/w. Enable * LPLU and disable Gig speed when entering ULP */ @@ -1313,6 +1320,11 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) /* Toggle LANPHYPC Value bit */ e1000_toggle_lanphypc_pch_lpt(hw); + /* Switching PHY interface always returns MDI error + * so disable retry mechanism to avoid wasting time + */ + e1000e_disable_phy_retry(hw); + /* Unforce SMBus mode in PHY */ ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg); if (ret_val) { @@ -1333,6 +1345,8 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS; e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg); + e1000e_enable_phy_retry(hw); + /* Unforce SMBus mode in MAC */ mac_reg = er32(CTRL_EXT); mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index cc8c531ec3dff..bef65ee4c549a 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6623,6 +6623,7 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime) struct e1000_hw *hw = &adapter->hw; u32 ctrl, ctrl_ext, rctl, status, wufc; int retval = 0; + u16 smb_ctrl; /* Runtime suspend should only enable wakeup for link changes */ if (runtime) @@ -6696,6 +6697,23 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime) if (retval) return retval; } + + /* Force SMBUS to allow WOL */ + /* Switching PHY interface always returns MDI error + * so disable retry mechanism to avoid wasting time + */ + e1000e_disable_phy_retry(hw); + + e1e_rphy(hw, CV_SMB_CTRL, &smb_ctrl); + smb_ctrl |= CV_SMB_CTRL_FORCE_SMBUS; + e1e_wphy(hw, CV_SMB_CTRL, smb_ctrl); + + e1000e_enable_phy_retry(hw); + + /* Force SMBus mode in MAC */ + ctrl_ext = er32(CTRL_EXT); + ctrl_ext |= E1000_CTRL_EXT_FORCE_SMBUS; + ew32(CTRL_EXT, ctrl_ext); } /* Ensure that the appropriate bits are set in LPI_CTRL @@ -6950,13 +6968,13 @@ static int __e1000_resume(struct pci_dev *pdev) return 0; } -static __maybe_unused int e1000e_pm_prepare(struct device *dev) +static int e1000e_pm_prepare(struct device *dev) { return pm_runtime_suspended(dev) && pm_suspend_via_firmware(); } -static __maybe_unused int e1000e_pm_suspend(struct device *dev) +static int e1000e_pm_suspend(struct device *dev) { struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); struct e1000_adapter *adapter = netdev_priv(netdev); @@ -6979,7 +6997,7 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev) return rc; } -static __maybe_unused int e1000e_pm_resume(struct device *dev) +static int e1000e_pm_resume(struct device *dev) { struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); struct e1000_adapter *adapter = netdev_priv(netdev); @@ -7013,7 +7031,7 @@ static __maybe_unused int e1000e_pm_runtime_idle(struct device *dev) return -EBUSY; } -static __maybe_unused int e1000e_pm_runtime_resume(struct device *dev) +static int e1000e_pm_runtime_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); @@ -7032,7 +7050,7 @@ static __maybe_unused int e1000e_pm_runtime_resume(struct device *dev) return rc; } -static __maybe_unused int e1000e_pm_runtime_suspend(struct device *dev) +static int e1000e_pm_runtime_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); @@ -7919,8 +7937,7 @@ static const struct pci_device_id e1000_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, e1000_pci_tbl); -static const struct dev_pm_ops e1000_pm_ops = { -#ifdef CONFIG_PM_SLEEP +static const struct dev_pm_ops e1000e_pm_ops = { .prepare = e1000e_pm_prepare, .suspend = e1000e_pm_suspend, .resume = e1000e_pm_resume, @@ -7928,9 +7945,8 @@ static const struct dev_pm_ops e1000_pm_ops = { .thaw = e1000e_pm_thaw, .poweroff = e1000e_pm_suspend, .restore = e1000e_pm_resume, -#endif - SET_RUNTIME_PM_OPS(e1000e_pm_runtime_suspend, e1000e_pm_runtime_resume, - e1000e_pm_runtime_idle) + RUNTIME_PM_OPS(e1000e_pm_runtime_suspend, e1000e_pm_runtime_resume, + e1000e_pm_runtime_idle) }; /* PCI Device API Driver */ @@ -7939,9 +7955,7 @@ static struct pci_driver e1000_driver = { .id_table = e1000_pci_tbl, .probe = e1000_probe, .remove = e1000_remove, - .driver = { - .pm = &e1000_pm_ops, - }, + .driver.pm = pm_ptr(&e1000e_pm_ops), .shutdown = e1000_shutdown, .err_handler = &e1000_err_handler }; diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index 5e329156d1bae..93544f1cc2a51 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -107,6 +107,16 @@ s32 e1000e_phy_reset_dsp(struct e1000_hw *hw) return e1e_wphy(hw, M88E1000_PHY_GEN_CONTROL, 0); } +void e1000e_disable_phy_retry(struct e1000_hw *hw) +{ + hw->phy.retry_enabled = false; +} + +void e1000e_enable_phy_retry(struct e1000_hw *hw) +{ + hw->phy.retry_enabled = true; +} + /** * e1000e_read_phy_reg_mdic - Read MDI control register * @hw: pointer to the HW structure @@ -118,55 +128,73 @@ s32 e1000e_phy_reset_dsp(struct e1000_hw *hw) **/ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) { + u32 i, mdic = 0, retry_counter, retry_max; struct e1000_phy_info *phy = &hw->phy; - u32 i, mdic = 0; + bool success; if (offset > MAX_PHY_REG_ADDRESS) { e_dbg("PHY Address %d is out of range\n", offset); return -E1000_ERR_PARAM; } + retry_max = phy->retry_enabled ? phy->retry_count : 0; + /* Set up Op-code, Phy Address, and register offset in the MDI * Control register. The MAC will take care of interfacing with the * PHY to retrieve the desired data. */ - mdic = ((offset << E1000_MDIC_REG_SHIFT) | - (phy->addr << E1000_MDIC_PHY_SHIFT) | - (E1000_MDIC_OP_READ)); + for (retry_counter = 0; retry_counter <= retry_max; retry_counter++) { + success = true; - ew32(MDIC, mdic); + mdic = ((offset << E1000_MDIC_REG_SHIFT) | + (phy->addr << E1000_MDIC_PHY_SHIFT) | + (E1000_MDIC_OP_READ)); - /* Poll the ready bit to see if the MDI read completed - * Increasing the time out as testing showed failures with - * the lower time out - */ - for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { - udelay(50); - mdic = er32(MDIC); - if (mdic & E1000_MDIC_READY) - break; - } - if (!(mdic & E1000_MDIC_READY)) { - e_dbg("MDI Read PHY Reg Address %d did not complete\n", offset); - return -E1000_ERR_PHY; - } - if (mdic & E1000_MDIC_ERROR) { - e_dbg("MDI Read PHY Reg Address %d Error\n", offset); - return -E1000_ERR_PHY; - } - if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) { - e_dbg("MDI Read offset error - requested %d, returned %d\n", - offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic)); - return -E1000_ERR_PHY; + ew32(MDIC, mdic); + + /* Poll the ready bit to see if the MDI read completed + * Increasing the time out as testing showed failures with + * the lower time out + */ + for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { + usleep_range(50, 60); + mdic = er32(MDIC); + if (mdic & E1000_MDIC_READY) + break; + } + if (!(mdic & E1000_MDIC_READY)) { + e_dbg("MDI Read PHY Reg Address %d did not complete\n", + offset); + success = false; + } + if (mdic & E1000_MDIC_ERROR) { + e_dbg("MDI Read PHY Reg Address %d Error\n", offset); + success = false; + } + if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) { + e_dbg("MDI Read offset error - requested %d, returned %d\n", + offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic)); + success = false; + } + + /* Allow some time after each MDIC transaction to avoid + * reading duplicate data in the next MDIC transaction. + */ + if (hw->mac.type == e1000_pch2lan) + usleep_range(100, 150); + + if (success) { + *data = (u16)mdic; + return 0; + } + + if (retry_counter != retry_max) { + e_dbg("Perform retry on PHY transaction...\n"); + mdelay(10); + } } - *data = (u16)mdic; - /* Allow some time after each MDIC transaction to avoid - * reading duplicate data in the next MDIC transaction. - */ - if (hw->mac.type == e1000_pch2lan) - udelay(100); - return 0; + return -E1000_ERR_PHY; } /** @@ -179,56 +207,72 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) **/ s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data) { + u32 i, mdic = 0, retry_counter, retry_max; struct e1000_phy_info *phy = &hw->phy; - u32 i, mdic = 0; + bool success; if (offset > MAX_PHY_REG_ADDRESS) { e_dbg("PHY Address %d is out of range\n", offset); return -E1000_ERR_PARAM; } + retry_max = phy->retry_enabled ? phy->retry_count : 0; + /* Set up Op-code, Phy Address, and register offset in the MDI * Control register. The MAC will take care of interfacing with the * PHY to retrieve the desired data. */ - mdic = (((u32)data) | - (offset << E1000_MDIC_REG_SHIFT) | - (phy->addr << E1000_MDIC_PHY_SHIFT) | - (E1000_MDIC_OP_WRITE)); + for (retry_counter = 0; retry_counter <= retry_max; retry_counter++) { + success = true; - ew32(MDIC, mdic); + mdic = (((u32)data) | + (offset << E1000_MDIC_REG_SHIFT) | + (phy->addr << E1000_MDIC_PHY_SHIFT) | + (E1000_MDIC_OP_WRITE)); - /* Poll the ready bit to see if the MDI read completed - * Increasing the time out as testing showed failures with - * the lower time out - */ - for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { - udelay(50); - mdic = er32(MDIC); - if (mdic & E1000_MDIC_READY) - break; - } - if (!(mdic & E1000_MDIC_READY)) { - e_dbg("MDI Write PHY Reg Address %d did not complete\n", offset); - return -E1000_ERR_PHY; - } - if (mdic & E1000_MDIC_ERROR) { - e_dbg("MDI Write PHY Red Address %d Error\n", offset); - return -E1000_ERR_PHY; - } - if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) { - e_dbg("MDI Write offset error - requested %d, returned %d\n", - offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic)); - return -E1000_ERR_PHY; - } + ew32(MDIC, mdic); - /* Allow some time after each MDIC transaction to avoid - * reading duplicate data in the next MDIC transaction. - */ - if (hw->mac.type == e1000_pch2lan) - udelay(100); + /* Poll the ready bit to see if the MDI read completed + * Increasing the time out as testing showed failures with + * the lower time out + */ + for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { + usleep_range(50, 60); + mdic = er32(MDIC); + if (mdic & E1000_MDIC_READY) + break; + } + if (!(mdic & E1000_MDIC_READY)) { + e_dbg("MDI Write PHY Reg Address %d did not complete\n", + offset); + success = false; + } + if (mdic & E1000_MDIC_ERROR) { + e_dbg("MDI Write PHY Reg Address %d Error\n", offset); + success = false; + } + if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) { + e_dbg("MDI Write offset error - requested %d, returned %d\n", + offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic)); + success = false; + } - return 0; + /* Allow some time after each MDIC transaction to avoid + * reading duplicate data in the next MDIC transaction. + */ + if (hw->mac.type == e1000_pch2lan) + usleep_range(100, 150); + + if (success) + return 0; + + if (retry_counter != retry_max) { + e_dbg("Perform retry on PHY transaction...\n"); + mdelay(10); + } + } + + return -E1000_ERR_PHY; } /** diff --git a/drivers/net/ethernet/intel/e1000e/phy.h b/drivers/net/ethernet/intel/e1000e/phy.h index c48777d095235..049bb325b4b14 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.h +++ b/drivers/net/ethernet/intel/e1000e/phy.h @@ -51,6 +51,8 @@ s32 e1000e_read_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 *data); s32 e1000e_write_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 data); void e1000_power_up_phy_copper(struct e1000_hw *hw); void e1000_power_down_phy_copper(struct e1000_hw *hw); +void e1000e_disable_phy_retry(struct e1000_hw *hw); +void e1000e_enable_phy_retry(struct e1000_hw *hw); s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data); s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data); s32 e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index d748b98274e79..92de609b72189 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -2342,7 +2342,7 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface) * suspend or hibernation. This function does not need to handle lower PCIe * device state as the stack takes care of that for us. **/ -static int __maybe_unused fm10k_resume(struct device *dev) +static int fm10k_resume(struct device *dev) { struct fm10k_intfc *interface = dev_get_drvdata(dev); struct net_device *netdev = interface->netdev; @@ -2369,7 +2369,7 @@ static int __maybe_unused fm10k_resume(struct device *dev) * system suspend or hibernation. This function does not need to handle lower * PCIe device state as the stack takes care of that for us. **/ -static int __maybe_unused fm10k_suspend(struct device *dev) +static int fm10k_suspend(struct device *dev) { struct fm10k_intfc *interface = dev_get_drvdata(dev); struct net_device *netdev = interface->netdev; @@ -2502,16 +2502,14 @@ static const struct pci_error_handlers fm10k_err_handler = { .reset_done = fm10k_io_reset_done, }; -static SIMPLE_DEV_PM_OPS(fm10k_pm_ops, fm10k_suspend, fm10k_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(fm10k_pm_ops, fm10k_suspend, fm10k_resume); static struct pci_driver fm10k_driver = { .name = fm10k_driver_name, .id_table = fm10k_pci_tbl, .probe = fm10k_probe, .remove = fm10k_remove, - .driver = { - .pm = &fm10k_pm_ops, - }, + .driver.pm = pm_sleep_ptr(&fm10k_pm_ops), .sriov_configure = fm10k_iov_configure, .err_handler = &fm10k_err_handler }; diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index ba24f3fa92c37..2fbabcdb5bb5f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -955,6 +955,7 @@ struct i40e_q_vector { struct rcu_head rcu; /* to avoid race with update stats on free */ char name[I40E_INT_NAME_STR_LEN]; bool arm_wb_state; + bool in_busy_poll; int irq_num; /* IRQ assigned to this q_vector */ } ____cacheline_internodealigned_in_smp; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index f86578857e8ae..0bdcdea0be3ec 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1253,8 +1253,11 @@ int i40e_count_filters(struct i40e_vsi *vsi) int bkt; int cnt = 0; - hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) - ++cnt; + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { + if (f->state == I40E_FILTER_NEW || + f->state == I40E_FILTER_ACTIVE) + ++cnt; + } return cnt; } @@ -3911,6 +3914,12 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) q_vector->tx.target_itr >> 1); q_vector->tx.current_itr = q_vector->tx.target_itr; + /* Set ITR for software interrupts triggered after exiting + * busy-loop polling. + */ + wr32(hw, I40E_PFINT_ITRN(I40E_SW_ITR, vector - 1), + I40E_ITR_20K); + wr32(hw, I40E_PFINT_RATEN(vector - 1), i40e_intrl_usec_to_reg(vsi->int_rate_limit)); @@ -13108,13 +13117,9 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev, if (!br_spec) return -EINVAL; - nla_for_each_nested(attr, br_spec, rem) { - __u16 mode; - - if (nla_type(attr) != IFLA_BRIDGE_MODE) - continue; + nla_for_each_nested_type(attr, IFLA_BRIDGE_MODE, br_spec, rem) { + __u16 mode = nla_get_u16(attr); - mode = nla_get_u16(attr); if ((mode != BRIDGE_MODE_VEPA) && (mode != BRIDGE_MODE_VEB)) return -EINVAL; @@ -16509,7 +16514,7 @@ static void i40e_shutdown(struct pci_dev *pdev) * i40e_suspend - PM callback for moving to D3 * @dev: generic device information structure **/ -static int __maybe_unused i40e_suspend(struct device *dev) +static int i40e_suspend(struct device *dev) { struct i40e_pf *pf = dev_get_drvdata(dev); struct i40e_hw *hw = &pf->hw; @@ -16560,7 +16565,7 @@ static int __maybe_unused i40e_suspend(struct device *dev) * i40e_resume - PM callback for waking up from D3 * @dev: generic device information structure **/ -static int __maybe_unused i40e_resume(struct device *dev) +static int i40e_resume(struct device *dev) { struct i40e_pf *pf = dev_get_drvdata(dev); int err; @@ -16606,16 +16611,14 @@ static const struct pci_error_handlers i40e_err_handler = { .resume = i40e_pci_error_resume, }; -static SIMPLE_DEV_PM_OPS(i40e_pm_ops, i40e_suspend, i40e_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(i40e_pm_ops, i40e_suspend, i40e_resume); static struct pci_driver i40e_driver = { .name = i40e_driver_name, .id_table = i40e_pci_tbl, .probe = i40e_probe, .remove = i40e_remove, - .driver = { - .pm = &i40e_pm_ops, - }, + .driver.pm = pm_sleep_ptr(&i40e_pm_ops), .shutdown = i40e_shutdown, .err_handler = &i40e_err_handler, .sriov_configure = i40e_pci_sriov_configure, diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index 605fd82f5d20f..7f0936f4e05e2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -734,37 +734,7 @@ int i40e_validate_nvm_checksum(struct i40e_hw *hw, return ret_code; } -static int i40e_nvmupd_state_init(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno); -static int i40e_nvmupd_state_reading(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno); -static int i40e_nvmupd_state_writing(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *errno); -static enum i40e_nvmupd_cmd i40e_nvmupd_validate_command(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - int *perrno); -static int i40e_nvmupd_nvm_erase(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - int *perrno); -static int i40e_nvmupd_nvm_write(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno); -static int i40e_nvmupd_nvm_read(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno); -static int i40e_nvmupd_exec_aq(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno); -static int i40e_nvmupd_get_aq_result(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno); -static int i40e_nvmupd_get_aq_event(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno); -static inline u8 i40e_nvmupd_get_module(u32 val) +static u8 i40e_nvmupd_get_module(u32 val) { return (u8)(val & I40E_NVM_MOD_PNT_MASK); } @@ -799,146 +769,433 @@ static const char * const i40e_nvm_update_state_str[] = { }; /** - * i40e_nvmupd_command - Process an NVM update command + * i40e_nvmupd_validate_command - Validate given command * @hw: pointer to hardware structure - * @cmd: pointer to nvm update command - * @bytes: pointer to the data buffer + * @cmd: pointer to nvm update command buffer * @perrno: pointer to return error code * - * Dispatches command depending on what update state is current + * Return one of the valid command types or I40E_NVMUPD_INVALID **/ -int i40e_nvmupd_command(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno) +static enum i40e_nvmupd_cmd +i40e_nvmupd_validate_command(struct i40e_hw *hw, struct i40e_nvm_access *cmd, + int *perrno) { enum i40e_nvmupd_cmd upd_cmd; - int status; - - /* assume success */ - *perrno = 0; + u8 module, transaction; - /* early check for status command and debug msgs */ - upd_cmd = i40e_nvmupd_validate_command(hw, cmd, perrno); + /* anything that doesn't match a recognized case is an error */ + upd_cmd = I40E_NVMUPD_INVALID; - i40e_debug(hw, I40E_DEBUG_NVM, "%s state %d nvm_release_on_hold %d opc 0x%04x cmd 0x%08x config 0x%08x offset 0x%08x data_size 0x%08x\n", - i40e_nvm_update_state_str[upd_cmd], - hw->nvmupd_state, - hw->nvm_release_on_done, hw->nvm_wait_opcode, - cmd->command, cmd->config, cmd->offset, cmd->data_size); + transaction = i40e_nvmupd_get_transaction(cmd->config); + module = i40e_nvmupd_get_module(cmd->config); - if (upd_cmd == I40E_NVMUPD_INVALID) { - *perrno = -EFAULT; + /* limits on data size */ + if (cmd->data_size < 1 || cmd->data_size > I40E_NVMUPD_MAX_DATA) { i40e_debug(hw, I40E_DEBUG_NVM, - "i40e_nvmupd_validate_command returns %d errno %d\n", - upd_cmd, *perrno); + "%s data_size %d\n", __func__, cmd->data_size); + *perrno = -EFAULT; + return I40E_NVMUPD_INVALID; } - /* a status request returns immediately rather than - * going into the state machine - */ - if (upd_cmd == I40E_NVMUPD_STATUS) { - if (!cmd->data_size) { - *perrno = -EFAULT; - return -EINVAL; + switch (cmd->command) { + case I40E_NVM_READ: + switch (transaction) { + case I40E_NVM_CON: + upd_cmd = I40E_NVMUPD_READ_CON; + break; + case I40E_NVM_SNT: + upd_cmd = I40E_NVMUPD_READ_SNT; + break; + case I40E_NVM_LCB: + upd_cmd = I40E_NVMUPD_READ_LCB; + break; + case I40E_NVM_SA: + upd_cmd = I40E_NVMUPD_READ_SA; + break; + case I40E_NVM_EXEC: + if (module == 0xf) + upd_cmd = I40E_NVMUPD_STATUS; + else if (module == 0) + upd_cmd = I40E_NVMUPD_GET_AQ_RESULT; + break; + case I40E_NVM_AQE: + upd_cmd = I40E_NVMUPD_GET_AQ_EVENT; + break; } + break; - bytes[0] = hw->nvmupd_state; - - if (cmd->data_size >= 4) { - bytes[1] = 0; - *((u16 *)&bytes[2]) = hw->nvm_wait_opcode; + case I40E_NVM_WRITE: + switch (transaction) { + case I40E_NVM_CON: + upd_cmd = I40E_NVMUPD_WRITE_CON; + break; + case I40E_NVM_SNT: + upd_cmd = I40E_NVMUPD_WRITE_SNT; + break; + case I40E_NVM_LCB: + upd_cmd = I40E_NVMUPD_WRITE_LCB; + break; + case I40E_NVM_SA: + upd_cmd = I40E_NVMUPD_WRITE_SA; + break; + case I40E_NVM_ERA: + upd_cmd = I40E_NVMUPD_WRITE_ERA; + break; + case I40E_NVM_CSUM: + upd_cmd = I40E_NVMUPD_CSUM_CON; + break; + case (I40E_NVM_CSUM | I40E_NVM_SA): + upd_cmd = I40E_NVMUPD_CSUM_SA; + break; + case (I40E_NVM_CSUM | I40E_NVM_LCB): + upd_cmd = I40E_NVMUPD_CSUM_LCB; + break; + case I40E_NVM_EXEC: + if (module == 0) + upd_cmd = I40E_NVMUPD_EXEC_AQ; + break; } + break; + } - /* Clear error status on read */ - if (hw->nvmupd_state == I40E_NVMUPD_STATE_ERROR) - hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; + return upd_cmd; +} - return 0; - } +/** + * i40e_nvmupd_nvm_erase - Erase an NVM module + * @hw: pointer to hardware structure + * @cmd: pointer to nvm update command buffer + * @perrno: pointer to return error code + * + * module, offset, data_size and data are in cmd structure + **/ +static int i40e_nvmupd_nvm_erase(struct i40e_hw *hw, + struct i40e_nvm_access *cmd, + int *perrno) +{ + struct i40e_asq_cmd_details cmd_details; + u8 module, transaction; + int status = 0; + bool last; - /* Clear status even it is not read and log */ - if (hw->nvmupd_state == I40E_NVMUPD_STATE_ERROR) { + transaction = i40e_nvmupd_get_transaction(cmd->config); + module = i40e_nvmupd_get_module(cmd->config); + last = (transaction & I40E_NVM_LCB); + + memset(&cmd_details, 0, sizeof(cmd_details)); + cmd_details.wb_desc = &hw->nvm_wb_desc; + + status = i40e_aq_erase_nvm(hw, module, cmd->offset, (u16)cmd->data_size, + last, &cmd_details); + if (status) { i40e_debug(hw, I40E_DEBUG_NVM, - "Clearing I40E_NVMUPD_STATE_ERROR state without reading\n"); - hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; + "%s mod 0x%x off 0x%x len 0x%x\n", + __func__, module, cmd->offset, cmd->data_size); + i40e_debug(hw, I40E_DEBUG_NVM, + "%s status %d aq %d\n", + __func__, status, hw->aq.asq_last_status); + *perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status); } - /* Acquire lock to prevent race condition where adminq_task - * can execute after i40e_nvmupd_nvm_read/write but before state - * variables (nvm_wait_opcode, nvm_release_on_done) are updated. - * - * During NVMUpdate, it is observed that lock could be held for - * ~5ms for most commands. However lock is held for ~60ms for - * NVMUPD_CSUM_LCB command. - */ - mutex_lock(&hw->aq.arq_mutex); - switch (hw->nvmupd_state) { - case I40E_NVMUPD_STATE_INIT: - status = i40e_nvmupd_state_init(hw, cmd, bytes, perrno); - break; - - case I40E_NVMUPD_STATE_READING: - status = i40e_nvmupd_state_reading(hw, cmd, bytes, perrno); - break; + return status; +} - case I40E_NVMUPD_STATE_WRITING: - status = i40e_nvmupd_state_writing(hw, cmd, bytes, perrno); - break; +/** + * i40e_nvmupd_nvm_write - Write NVM + * @hw: pointer to hardware structure + * @cmd: pointer to nvm update command buffer + * @bytes: pointer to the data buffer + * @perrno: pointer to return error code + * + * module, offset, data_size and data are in cmd structure + **/ +static int i40e_nvmupd_nvm_write(struct i40e_hw *hw, + struct i40e_nvm_access *cmd, + u8 *bytes, int *perrno) +{ + struct i40e_asq_cmd_details cmd_details; + u8 module, transaction; + u8 preservation_flags; + int status = 0; + bool last; - case I40E_NVMUPD_STATE_INIT_WAIT: - case I40E_NVMUPD_STATE_WRITE_WAIT: - /* if we need to stop waiting for an event, clear - * the wait info and return before doing anything else - */ - if (cmd->offset == 0xffff) { - i40e_nvmupd_clear_wait_state(hw); - status = 0; - break; - } + transaction = i40e_nvmupd_get_transaction(cmd->config); + module = i40e_nvmupd_get_module(cmd->config); + last = (transaction & I40E_NVM_LCB); + preservation_flags = i40e_nvmupd_get_preservation_flags(cmd->config); - status = -EBUSY; - *perrno = -EBUSY; - break; + memset(&cmd_details, 0, sizeof(cmd_details)); + cmd_details.wb_desc = &hw->nvm_wb_desc; - default: - /* invalid state, should never happen */ + status = i40e_aq_update_nvm(hw, module, cmd->offset, + (u16)cmd->data_size, bytes, last, + preservation_flags, &cmd_details); + if (status) { i40e_debug(hw, I40E_DEBUG_NVM, - "NVMUPD: no such state %d\n", hw->nvmupd_state); - status = -EOPNOTSUPP; - *perrno = -ESRCH; - break; + "%s mod 0x%x off 0x%x len 0x%x\n", + __func__, module, cmd->offset, cmd->data_size); + i40e_debug(hw, I40E_DEBUG_NVM, + "%s status %d aq %d\n", + __func__, status, hw->aq.asq_last_status); + *perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status); } - mutex_unlock(&hw->aq.arq_mutex); return status; } /** - * i40e_nvmupd_state_init - Handle NVM update state Init + * i40e_nvmupd_nvm_read - Read NVM * @hw: pointer to hardware structure * @cmd: pointer to nvm update command buffer * @bytes: pointer to the data buffer * @perrno: pointer to return error code * - * Process legitimate commands of the Init state and conditionally set next - * state. Reject all other commands. + * cmd structure contains identifiers and data buffer **/ -static int i40e_nvmupd_state_init(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno) +static int i40e_nvmupd_nvm_read(struct i40e_hw *hw, + struct i40e_nvm_access *cmd, + u8 *bytes, int *perrno) { - enum i40e_nvmupd_cmd upd_cmd; - int status = 0; + struct i40e_asq_cmd_details cmd_details; + u8 module, transaction; + int status; + bool last; - upd_cmd = i40e_nvmupd_validate_command(hw, cmd, perrno); + transaction = i40e_nvmupd_get_transaction(cmd->config); + module = i40e_nvmupd_get_module(cmd->config); + last = (transaction == I40E_NVM_LCB) || (transaction == I40E_NVM_SA); - switch (upd_cmd) { - case I40E_NVMUPD_READ_SA: - status = i40e_acquire_nvm(hw, I40E_RESOURCE_READ); - if (status) { - *perrno = i40e_aq_rc_to_posix(status, - hw->aq.asq_last_status); - } else { + memset(&cmd_details, 0, sizeof(cmd_details)); + cmd_details.wb_desc = &hw->nvm_wb_desc; + + status = i40e_aq_read_nvm(hw, module, cmd->offset, (u16)cmd->data_size, + bytes, last, &cmd_details); + if (status) { + i40e_debug(hw, I40E_DEBUG_NVM, + "%s mod 0x%x off 0x%x len 0x%x\n", + __func__, module, cmd->offset, cmd->data_size); + i40e_debug(hw, I40E_DEBUG_NVM, + "%s status %d aq %d\n", + __func__, status, hw->aq.asq_last_status); + *perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status); + } + + return status; +} + +/** + * i40e_nvmupd_exec_aq - Run an AQ command + * @hw: pointer to hardware structure + * @cmd: pointer to nvm update command buffer + * @bytes: pointer to the data buffer + * @perrno: pointer to return error code + * + * cmd structure contains identifiers and data buffer + **/ +static int i40e_nvmupd_exec_aq(struct i40e_hw *hw, + struct i40e_nvm_access *cmd, + u8 *bytes, int *perrno) +{ + struct i40e_asq_cmd_details cmd_details; + struct i40e_aq_desc *aq_desc; + u32 buff_size = 0; + u8 *buff = NULL; + u32 aq_desc_len; + u32 aq_data_len; + int status; + + i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: %s\n", __func__); + if (cmd->offset == 0xffff) + return 0; + + memset(&cmd_details, 0, sizeof(cmd_details)); + cmd_details.wb_desc = &hw->nvm_wb_desc; + + aq_desc_len = sizeof(struct i40e_aq_desc); + memset(&hw->nvm_wb_desc, 0, aq_desc_len); + + /* get the aq descriptor */ + if (cmd->data_size < aq_desc_len) { + i40e_debug(hw, I40E_DEBUG_NVM, + "NVMUPD: not enough aq desc bytes for exec, size %d < %d\n", + cmd->data_size, aq_desc_len); + *perrno = -EINVAL; + return -EINVAL; + } + aq_desc = (struct i40e_aq_desc *)bytes; + + /* if data buffer needed, make sure it's ready */ + aq_data_len = cmd->data_size - aq_desc_len; + buff_size = max_t(u32, aq_data_len, le16_to_cpu(aq_desc->datalen)); + if (buff_size) { + if (!hw->nvm_buff.va) { + status = i40e_allocate_virt_mem(hw, &hw->nvm_buff, + hw->aq.asq_buf_size); + if (status) + i40e_debug(hw, I40E_DEBUG_NVM, + "NVMUPD: i40e_allocate_virt_mem for exec buff failed, %d\n", + status); + } + + if (hw->nvm_buff.va) { + buff = hw->nvm_buff.va; + memcpy(buff, &bytes[aq_desc_len], aq_data_len); + } + } + + if (cmd->offset) + memset(&hw->nvm_aq_event_desc, 0, aq_desc_len); + + /* and away we go! */ + status = i40e_asq_send_command(hw, aq_desc, buff, + buff_size, &cmd_details); + if (status) { + i40e_debug(hw, I40E_DEBUG_NVM, + "%s err %pe aq_err %s\n", + __func__, ERR_PTR(status), + i40e_aq_str(hw, hw->aq.asq_last_status)); + *perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status); + return status; + } + + /* should we wait for a followup event? */ + if (cmd->offset) { + hw->nvm_wait_opcode = cmd->offset; + hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT; + } + + return status; +} + +/** + * i40e_nvmupd_get_aq_result - Get the results from the previous exec_aq + * @hw: pointer to hardware structure + * @cmd: pointer to nvm update command buffer + * @bytes: pointer to the data buffer + * @perrno: pointer to return error code + * + * cmd structure contains identifiers and data buffer + **/ +static int i40e_nvmupd_get_aq_result(struct i40e_hw *hw, + struct i40e_nvm_access *cmd, + u8 *bytes, int *perrno) +{ + u32 aq_total_len; + u32 aq_desc_len; + int remainder; + u8 *buff; + + i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: %s\n", __func__); + + aq_desc_len = sizeof(struct i40e_aq_desc); + aq_total_len = aq_desc_len + le16_to_cpu(hw->nvm_wb_desc.datalen); + + /* check offset range */ + if (cmd->offset > aq_total_len) { + i40e_debug(hw, I40E_DEBUG_NVM, "%s: offset too big %d > %d\n", + __func__, cmd->offset, aq_total_len); + *perrno = -EINVAL; + return -EINVAL; + } + + /* check copylength range */ + if (cmd->data_size > (aq_total_len - cmd->offset)) { + int new_len = aq_total_len - cmd->offset; + + i40e_debug(hw, I40E_DEBUG_NVM, "%s: copy length %d too big, trimming to %d\n", + __func__, cmd->data_size, new_len); + cmd->data_size = new_len; + } + + remainder = cmd->data_size; + if (cmd->offset < aq_desc_len) { + u32 len = aq_desc_len - cmd->offset; + + len = min(len, cmd->data_size); + i40e_debug(hw, I40E_DEBUG_NVM, "%s: aq_desc bytes %d to %d\n", + __func__, cmd->offset, cmd->offset + len); + + buff = ((u8 *)&hw->nvm_wb_desc) + cmd->offset; + memcpy(bytes, buff, len); + + bytes += len; + remainder -= len; + buff = hw->nvm_buff.va; + } else { + buff = hw->nvm_buff.va + (cmd->offset - aq_desc_len); + } + + if (remainder > 0) { + int start_byte = buff - (u8 *)hw->nvm_buff.va; + + i40e_debug(hw, I40E_DEBUG_NVM, "%s: databuf bytes %d to %d\n", + __func__, start_byte, start_byte + remainder); + memcpy(bytes, buff, remainder); + } + + return 0; +} + +/** + * i40e_nvmupd_get_aq_event - Get the Admin Queue event from previous exec_aq + * @hw: pointer to hardware structure + * @cmd: pointer to nvm update command buffer + * @bytes: pointer to the data buffer + * @perrno: pointer to return error code + * + * cmd structure contains identifiers and data buffer + **/ +static int i40e_nvmupd_get_aq_event(struct i40e_hw *hw, + struct i40e_nvm_access *cmd, + u8 *bytes, int *perrno) +{ + u32 aq_total_len; + u32 aq_desc_len; + + i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: %s\n", __func__); + + aq_desc_len = sizeof(struct i40e_aq_desc); + aq_total_len = aq_desc_len + le16_to_cpu(hw->nvm_aq_event_desc.datalen); + + /* check copylength range */ + if (cmd->data_size > aq_total_len) { + i40e_debug(hw, I40E_DEBUG_NVM, + "%s: copy length %d too big, trimming to %d\n", + __func__, cmd->data_size, aq_total_len); + cmd->data_size = aq_total_len; + } + + memcpy(bytes, &hw->nvm_aq_event_desc, cmd->data_size); + + return 0; +} + +/** + * i40e_nvmupd_state_init - Handle NVM update state Init + * @hw: pointer to hardware structure + * @cmd: pointer to nvm update command buffer + * @bytes: pointer to the data buffer + * @perrno: pointer to return error code + * + * Process legitimate commands of the Init state and conditionally set next + * state. Reject all other commands. + **/ +static int i40e_nvmupd_state_init(struct i40e_hw *hw, + struct i40e_nvm_access *cmd, + u8 *bytes, int *perrno) +{ + enum i40e_nvmupd_cmd upd_cmd; + int status = 0; + + upd_cmd = i40e_nvmupd_validate_command(hw, cmd, perrno); + + switch (upd_cmd) { + case I40E_NVMUPD_READ_SA: + status = i40e_acquire_nvm(hw, I40E_RESOURCE_READ); + if (status) { + *perrno = i40e_aq_rc_to_posix(status, + hw->aq.asq_last_status); + } else { status = i40e_nvmupd_nvm_read(hw, cmd, bytes, perrno); i40e_release_nvm(hw); } @@ -948,7 +1205,7 @@ static int i40e_nvmupd_state_init(struct i40e_hw *hw, status = i40e_acquire_nvm(hw, I40E_RESOURCE_READ); if (status) { *perrno = i40e_aq_rc_to_posix(status, - hw->aq.asq_last_status); + hw->aq.asq_last_status); } else { status = i40e_nvmupd_nvm_read(hw, cmd, bytes, perrno); if (status) @@ -962,7 +1219,7 @@ static int i40e_nvmupd_state_init(struct i40e_hw *hw, status = i40e_acquire_nvm(hw, I40E_RESOURCE_WRITE); if (status) { *perrno = i40e_aq_rc_to_posix(status, - hw->aq.asq_last_status); + hw->aq.asq_last_status); } else { status = i40e_nvmupd_nvm_erase(hw, cmd, perrno); if (status) { @@ -979,7 +1236,7 @@ static int i40e_nvmupd_state_init(struct i40e_hw *hw, status = i40e_acquire_nvm(hw, I40E_RESOURCE_WRITE); if (status) { *perrno = i40e_aq_rc_to_posix(status, - hw->aq.asq_last_status); + hw->aq.asq_last_status); } else { status = i40e_nvmupd_nvm_write(hw, cmd, bytes, perrno); if (status) { @@ -996,7 +1253,7 @@ static int i40e_nvmupd_state_init(struct i40e_hw *hw, status = i40e_acquire_nvm(hw, I40E_RESOURCE_WRITE); if (status) { *perrno = i40e_aq_rc_to_posix(status, - hw->aq.asq_last_status); + hw->aq.asq_last_status); } else { status = i40e_nvmupd_nvm_write(hw, cmd, bytes, perrno); if (status) { @@ -1012,7 +1269,7 @@ static int i40e_nvmupd_state_init(struct i40e_hw *hw, status = i40e_acquire_nvm(hw, I40E_RESOURCE_WRITE); if (status) { *perrno = i40e_aq_rc_to_posix(status, - hw->aq.asq_last_status); + hw->aq.asq_last_status); } else { status = i40e_update_nvm_checksum(hw); if (status) { @@ -1185,7 +1442,7 @@ static int i40e_nvmupd_state_writing(struct i40e_hw *hw, * so here we try to reacquire the semaphore then retry the write. * We only do one retry, then give up. */ - if (status && (hw->aq.asq_last_status == I40E_AQ_RC_EBUSY) && + if (status && hw->aq.asq_last_status == I40E_AQ_RC_EBUSY && !retry_attempt) { u32 old_asq_status = hw->aq.asq_last_status; int old_status = status; @@ -1215,457 +1472,168 @@ static int i40e_nvmupd_state_writing(struct i40e_hw *hw, } /** - * i40e_nvmupd_clear_wait_state - clear wait state on hw - * @hw: pointer to the hardware structure + * i40e_nvmupd_command - Process an NVM update command + * @hw: pointer to hardware structure + * @cmd: pointer to nvm update command + * @bytes: pointer to the data buffer + * @perrno: pointer to return error code + * + * Dispatches command depending on what update state is current **/ -void i40e_nvmupd_clear_wait_state(struct i40e_hw *hw) +int i40e_nvmupd_command(struct i40e_hw *hw, + struct i40e_nvm_access *cmd, + u8 *bytes, int *perrno) { - i40e_debug(hw, I40E_DEBUG_NVM, - "NVMUPD: clearing wait on opcode 0x%04x\n", - hw->nvm_wait_opcode); - - if (hw->nvm_release_on_done) { - i40e_release_nvm(hw); - hw->nvm_release_on_done = false; - } - hw->nvm_wait_opcode = 0; + enum i40e_nvmupd_cmd upd_cmd; + int status; - if (hw->aq.arq_last_status) { - hw->nvmupd_state = I40E_NVMUPD_STATE_ERROR; - return; - } + /* assume success */ + *perrno = 0; - switch (hw->nvmupd_state) { - case I40E_NVMUPD_STATE_INIT_WAIT: - hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; - break; + /* early check for status command and debug msgs */ + upd_cmd = i40e_nvmupd_validate_command(hw, cmd, perrno); - case I40E_NVMUPD_STATE_WRITE_WAIT: - hw->nvmupd_state = I40E_NVMUPD_STATE_WRITING; - break; + i40e_debug(hw, I40E_DEBUG_NVM, "%s state %d nvm_release_on_hold %d opc 0x%04x cmd 0x%08x config 0x%08x offset 0x%08x data_size 0x%08x\n", + i40e_nvm_update_state_str[upd_cmd], + hw->nvmupd_state, + hw->nvm_release_on_done, hw->nvm_wait_opcode, + cmd->command, cmd->config, cmd->offset, cmd->data_size); - default: - break; + if (upd_cmd == I40E_NVMUPD_INVALID) { + *perrno = -EFAULT; + i40e_debug(hw, I40E_DEBUG_NVM, + "i40e_nvmupd_validate_command returns %d errno %d\n", + upd_cmd, *perrno); } -} -/** - * i40e_nvmupd_check_wait_event - handle NVM update operation events - * @hw: pointer to the hardware structure - * @opcode: the event that just happened - * @desc: AdminQ descriptor - **/ -void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode, - struct i40e_aq_desc *desc) -{ - u32 aq_desc_len = sizeof(struct i40e_aq_desc); + /* a status request returns immediately rather than + * going into the state machine + */ + if (upd_cmd == I40E_NVMUPD_STATUS) { + if (!cmd->data_size) { + *perrno = -EFAULT; + return -EINVAL; + } - if (opcode == hw->nvm_wait_opcode) { - memcpy(&hw->nvm_aq_event_desc, desc, aq_desc_len); - i40e_nvmupd_clear_wait_state(hw); - } -} + bytes[0] = hw->nvmupd_state; -/** - * i40e_nvmupd_validate_command - Validate given command - * @hw: pointer to hardware structure - * @cmd: pointer to nvm update command buffer - * @perrno: pointer to return error code - * - * Return one of the valid command types or I40E_NVMUPD_INVALID - **/ -static enum i40e_nvmupd_cmd i40e_nvmupd_validate_command(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - int *perrno) -{ - enum i40e_nvmupd_cmd upd_cmd; - u8 module, transaction; + if (cmd->data_size >= 4) { + bytes[1] = 0; + *((u16 *)&bytes[2]) = hw->nvm_wait_opcode; + } - /* anything that doesn't match a recognized case is an error */ - upd_cmd = I40E_NVMUPD_INVALID; + /* Clear error status on read */ + if (hw->nvmupd_state == I40E_NVMUPD_STATE_ERROR) + hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; - transaction = i40e_nvmupd_get_transaction(cmd->config); - module = i40e_nvmupd_get_module(cmd->config); + return 0; + } - /* limits on data size */ - if ((cmd->data_size < 1) || - (cmd->data_size > I40E_NVMUPD_MAX_DATA)) { + /* Clear status even it is not read and log */ + if (hw->nvmupd_state == I40E_NVMUPD_STATE_ERROR) { i40e_debug(hw, I40E_DEBUG_NVM, - "i40e_nvmupd_validate_command data_size %d\n", - cmd->data_size); - *perrno = -EFAULT; - return I40E_NVMUPD_INVALID; + "Clearing I40E_NVMUPD_STATE_ERROR state without reading\n"); + hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; } - switch (cmd->command) { - case I40E_NVM_READ: - switch (transaction) { - case I40E_NVM_CON: - upd_cmd = I40E_NVMUPD_READ_CON; - break; - case I40E_NVM_SNT: - upd_cmd = I40E_NVMUPD_READ_SNT; - break; - case I40E_NVM_LCB: - upd_cmd = I40E_NVMUPD_READ_LCB; - break; - case I40E_NVM_SA: - upd_cmd = I40E_NVMUPD_READ_SA; - break; - case I40E_NVM_EXEC: - if (module == 0xf) - upd_cmd = I40E_NVMUPD_STATUS; - else if (module == 0) - upd_cmd = I40E_NVMUPD_GET_AQ_RESULT; - break; - case I40E_NVM_AQE: - upd_cmd = I40E_NVMUPD_GET_AQ_EVENT; - break; - } + /* Acquire lock to prevent race condition where adminq_task + * can execute after i40e_nvmupd_nvm_read/write but before state + * variables (nvm_wait_opcode, nvm_release_on_done) are updated. + * + * During NVMUpdate, it is observed that lock could be held for + * ~5ms for most commands. However lock is held for ~60ms for + * NVMUPD_CSUM_LCB command. + */ + mutex_lock(&hw->aq.arq_mutex); + switch (hw->nvmupd_state) { + case I40E_NVMUPD_STATE_INIT: + status = i40e_nvmupd_state_init(hw, cmd, bytes, perrno); break; - case I40E_NVM_WRITE: - switch (transaction) { - case I40E_NVM_CON: - upd_cmd = I40E_NVMUPD_WRITE_CON; - break; - case I40E_NVM_SNT: - upd_cmd = I40E_NVMUPD_WRITE_SNT; - break; - case I40E_NVM_LCB: - upd_cmd = I40E_NVMUPD_WRITE_LCB; - break; - case I40E_NVM_SA: - upd_cmd = I40E_NVMUPD_WRITE_SA; - break; - case I40E_NVM_ERA: - upd_cmd = I40E_NVMUPD_WRITE_ERA; - break; - case I40E_NVM_CSUM: - upd_cmd = I40E_NVMUPD_CSUM_CON; - break; - case (I40E_NVM_CSUM|I40E_NVM_SA): - upd_cmd = I40E_NVMUPD_CSUM_SA; - break; - case (I40E_NVM_CSUM|I40E_NVM_LCB): - upd_cmd = I40E_NVMUPD_CSUM_LCB; - break; - case I40E_NVM_EXEC: - if (module == 0) - upd_cmd = I40E_NVMUPD_EXEC_AQ; - break; - } + case I40E_NVMUPD_STATE_READING: + status = i40e_nvmupd_state_reading(hw, cmd, bytes, perrno); break; - } - - return upd_cmd; -} - -/** - * i40e_nvmupd_exec_aq - Run an AQ command - * @hw: pointer to hardware structure - * @cmd: pointer to nvm update command buffer - * @bytes: pointer to the data buffer - * @perrno: pointer to return error code - * - * cmd structure contains identifiers and data buffer - **/ -static int i40e_nvmupd_exec_aq(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno) -{ - struct i40e_asq_cmd_details cmd_details; - struct i40e_aq_desc *aq_desc; - u32 buff_size = 0; - u8 *buff = NULL; - u32 aq_desc_len; - u32 aq_data_len; - int status; - - i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: %s\n", __func__); - if (cmd->offset == 0xffff) - return 0; - - memset(&cmd_details, 0, sizeof(cmd_details)); - cmd_details.wb_desc = &hw->nvm_wb_desc; - aq_desc_len = sizeof(struct i40e_aq_desc); - memset(&hw->nvm_wb_desc, 0, aq_desc_len); - - /* get the aq descriptor */ - if (cmd->data_size < aq_desc_len) { - i40e_debug(hw, I40E_DEBUG_NVM, - "NVMUPD: not enough aq desc bytes for exec, size %d < %d\n", - cmd->data_size, aq_desc_len); - *perrno = -EINVAL; - return -EINVAL; - } - aq_desc = (struct i40e_aq_desc *)bytes; - - /* if data buffer needed, make sure it's ready */ - aq_data_len = cmd->data_size - aq_desc_len; - buff_size = max_t(u32, aq_data_len, le16_to_cpu(aq_desc->datalen)); - if (buff_size) { - if (!hw->nvm_buff.va) { - status = i40e_allocate_virt_mem(hw, &hw->nvm_buff, - hw->aq.asq_buf_size); - if (status) - i40e_debug(hw, I40E_DEBUG_NVM, - "NVMUPD: i40e_allocate_virt_mem for exec buff failed, %d\n", - status); - } + case I40E_NVMUPD_STATE_WRITING: + status = i40e_nvmupd_state_writing(hw, cmd, bytes, perrno); + break; - if (hw->nvm_buff.va) { - buff = hw->nvm_buff.va; - memcpy(buff, &bytes[aq_desc_len], aq_data_len); + case I40E_NVMUPD_STATE_INIT_WAIT: + case I40E_NVMUPD_STATE_WRITE_WAIT: + /* if we need to stop waiting for an event, clear + * the wait info and return before doing anything else + */ + if (cmd->offset == 0xffff) { + i40e_nvmupd_clear_wait_state(hw); + status = 0; + break; } - } - if (cmd->offset) - memset(&hw->nvm_aq_event_desc, 0, aq_desc_len); + status = -EBUSY; + *perrno = -EBUSY; + break; - /* and away we go! */ - status = i40e_asq_send_command(hw, aq_desc, buff, - buff_size, &cmd_details); - if (status) { + default: + /* invalid state, should never happen */ i40e_debug(hw, I40E_DEBUG_NVM, - "%s err %pe aq_err %s\n", - __func__, ERR_PTR(status), - i40e_aq_str(hw, hw->aq.asq_last_status)); - *perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status); - return status; - } - - /* should we wait for a followup event? */ - if (cmd->offset) { - hw->nvm_wait_opcode = cmd->offset; - hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT; + "NVMUPD: no such state %d\n", hw->nvmupd_state); + status = -EOPNOTSUPP; + *perrno = -ESRCH; + break; } + mutex_unlock(&hw->aq.arq_mutex); return status; } /** - * i40e_nvmupd_get_aq_result - Get the results from the previous exec_aq - * @hw: pointer to hardware structure - * @cmd: pointer to nvm update command buffer - * @bytes: pointer to the data buffer - * @perrno: pointer to return error code - * - * cmd structure contains identifiers and data buffer - **/ -static int i40e_nvmupd_get_aq_result(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno) -{ - u32 aq_total_len; - u32 aq_desc_len; - int remainder; - u8 *buff; - - i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: %s\n", __func__); - - aq_desc_len = sizeof(struct i40e_aq_desc); - aq_total_len = aq_desc_len + le16_to_cpu(hw->nvm_wb_desc.datalen); - - /* check offset range */ - if (cmd->offset > aq_total_len) { - i40e_debug(hw, I40E_DEBUG_NVM, "%s: offset too big %d > %d\n", - __func__, cmd->offset, aq_total_len); - *perrno = -EINVAL; - return -EINVAL; - } - - /* check copylength range */ - if (cmd->data_size > (aq_total_len - cmd->offset)) { - int new_len = aq_total_len - cmd->offset; - - i40e_debug(hw, I40E_DEBUG_NVM, "%s: copy length %d too big, trimming to %d\n", - __func__, cmd->data_size, new_len); - cmd->data_size = new_len; - } - - remainder = cmd->data_size; - if (cmd->offset < aq_desc_len) { - u32 len = aq_desc_len - cmd->offset; - - len = min(len, cmd->data_size); - i40e_debug(hw, I40E_DEBUG_NVM, "%s: aq_desc bytes %d to %d\n", - __func__, cmd->offset, cmd->offset + len); - - buff = ((u8 *)&hw->nvm_wb_desc) + cmd->offset; - memcpy(bytes, buff, len); - - bytes += len; - remainder -= len; - buff = hw->nvm_buff.va; - } else { - buff = hw->nvm_buff.va + (cmd->offset - aq_desc_len); - } - - if (remainder > 0) { - int start_byte = buff - (u8 *)hw->nvm_buff.va; - - i40e_debug(hw, I40E_DEBUG_NVM, "%s: databuf bytes %d to %d\n", - __func__, start_byte, start_byte + remainder); - memcpy(bytes, buff, remainder); - } - - return 0; -} - -/** - * i40e_nvmupd_get_aq_event - Get the Admin Queue event from previous exec_aq - * @hw: pointer to hardware structure - * @cmd: pointer to nvm update command buffer - * @bytes: pointer to the data buffer - * @perrno: pointer to return error code - * - * cmd structure contains identifiers and data buffer + * i40e_nvmupd_clear_wait_state - clear wait state on hw + * @hw: pointer to the hardware structure **/ -static int i40e_nvmupd_get_aq_event(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno) +void i40e_nvmupd_clear_wait_state(struct i40e_hw *hw) { - u32 aq_total_len; - u32 aq_desc_len; - - i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: %s\n", __func__); - - aq_desc_len = sizeof(struct i40e_aq_desc); - aq_total_len = aq_desc_len + le16_to_cpu(hw->nvm_aq_event_desc.datalen); + i40e_debug(hw, I40E_DEBUG_NVM, + "NVMUPD: clearing wait on opcode 0x%04x\n", + hw->nvm_wait_opcode); - /* check copylength range */ - if (cmd->data_size > aq_total_len) { - i40e_debug(hw, I40E_DEBUG_NVM, - "%s: copy length %d too big, trimming to %d\n", - __func__, cmd->data_size, aq_total_len); - cmd->data_size = aq_total_len; + if (hw->nvm_release_on_done) { + i40e_release_nvm(hw); + hw->nvm_release_on_done = false; } + hw->nvm_wait_opcode = 0; - memcpy(bytes, &hw->nvm_aq_event_desc, cmd->data_size); - - return 0; -} - -/** - * i40e_nvmupd_nvm_read - Read NVM - * @hw: pointer to hardware structure - * @cmd: pointer to nvm update command buffer - * @bytes: pointer to the data buffer - * @perrno: pointer to return error code - * - * cmd structure contains identifiers and data buffer - **/ -static int i40e_nvmupd_nvm_read(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno) -{ - struct i40e_asq_cmd_details cmd_details; - u8 module, transaction; - int status; - bool last; - - transaction = i40e_nvmupd_get_transaction(cmd->config); - module = i40e_nvmupd_get_module(cmd->config); - last = (transaction == I40E_NVM_LCB) || (transaction == I40E_NVM_SA); - - memset(&cmd_details, 0, sizeof(cmd_details)); - cmd_details.wb_desc = &hw->nvm_wb_desc; - - status = i40e_aq_read_nvm(hw, module, cmd->offset, (u16)cmd->data_size, - bytes, last, &cmd_details); - if (status) { - i40e_debug(hw, I40E_DEBUG_NVM, - "i40e_nvmupd_nvm_read mod 0x%x off 0x%x len 0x%x\n", - module, cmd->offset, cmd->data_size); - i40e_debug(hw, I40E_DEBUG_NVM, - "i40e_nvmupd_nvm_read status %d aq %d\n", - status, hw->aq.asq_last_status); - *perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status); + if (hw->aq.arq_last_status) { + hw->nvmupd_state = I40E_NVMUPD_STATE_ERROR; + return; } - return status; -} - -/** - * i40e_nvmupd_nvm_erase - Erase an NVM module - * @hw: pointer to hardware structure - * @cmd: pointer to nvm update command buffer - * @perrno: pointer to return error code - * - * module, offset, data_size and data are in cmd structure - **/ -static int i40e_nvmupd_nvm_erase(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - int *perrno) -{ - struct i40e_asq_cmd_details cmd_details; - u8 module, transaction; - int status = 0; - bool last; - - transaction = i40e_nvmupd_get_transaction(cmd->config); - module = i40e_nvmupd_get_module(cmd->config); - last = (transaction & I40E_NVM_LCB); + switch (hw->nvmupd_state) { + case I40E_NVMUPD_STATE_INIT_WAIT: + hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; + break; - memset(&cmd_details, 0, sizeof(cmd_details)); - cmd_details.wb_desc = &hw->nvm_wb_desc; + case I40E_NVMUPD_STATE_WRITE_WAIT: + hw->nvmupd_state = I40E_NVMUPD_STATE_WRITING; + break; - status = i40e_aq_erase_nvm(hw, module, cmd->offset, (u16)cmd->data_size, - last, &cmd_details); - if (status) { - i40e_debug(hw, I40E_DEBUG_NVM, - "i40e_nvmupd_nvm_erase mod 0x%x off 0x%x len 0x%x\n", - module, cmd->offset, cmd->data_size); - i40e_debug(hw, I40E_DEBUG_NVM, - "i40e_nvmupd_nvm_erase status %d aq %d\n", - status, hw->aq.asq_last_status); - *perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status); + default: + break; } - - return status; } /** - * i40e_nvmupd_nvm_write - Write NVM - * @hw: pointer to hardware structure - * @cmd: pointer to nvm update command buffer - * @bytes: pointer to the data buffer - * @perrno: pointer to return error code - * - * module, offset, data_size and data are in cmd structure + * i40e_nvmupd_check_wait_event - handle NVM update operation events + * @hw: pointer to the hardware structure + * @opcode: the event that just happened + * @desc: AdminQ descriptor **/ -static int i40e_nvmupd_nvm_write(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno) +void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode, + struct i40e_aq_desc *desc) { - struct i40e_asq_cmd_details cmd_details; - u8 module, transaction; - u8 preservation_flags; - int status = 0; - bool last; - - transaction = i40e_nvmupd_get_transaction(cmd->config); - module = i40e_nvmupd_get_module(cmd->config); - last = (transaction & I40E_NVM_LCB); - preservation_flags = i40e_nvmupd_get_preservation_flags(cmd->config); - - memset(&cmd_details, 0, sizeof(cmd_details)); - cmd_details.wb_desc = &hw->nvm_wb_desc; + u32 aq_desc_len = sizeof(struct i40e_aq_desc); - status = i40e_aq_update_nvm(hw, module, cmd->offset, - (u16)cmd->data_size, bytes, last, - preservation_flags, &cmd_details); - if (status) { - i40e_debug(hw, I40E_DEBUG_NVM, - "i40e_nvmupd_nvm_write mod 0x%x off 0x%x len 0x%x\n", - module, cmd->offset, cmd->data_size); - i40e_debug(hw, I40E_DEBUG_NVM, - "i40e_nvmupd_nvm_write status %d aq %d\n", - status, hw->aq.asq_last_status); - *perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status); + if (opcode == hw->nvm_wait_opcode) { + memcpy(&hw->nvm_aq_event_desc, desc, aq_desc_len); + i40e_nvmupd_clear_wait_state(hw); } - - return status; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index 14ab642cafdb2..432afbb642013 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -333,8 +333,11 @@ #define I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT 3 #define I40E_PFINT_DYN_CTLN_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) #define I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT 5 +#define I40E_PFINT_DYN_CTLN_INTERVAL_MASK I40E_MASK(0xFFF, I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT) #define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT 24 #define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT) +#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT 25 +#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT) #define I40E_PFINT_ICR0 0x00038780 /* Reset: CORER */ #define I40E_PFINT_ICR0_INTEVENT_SHIFT 0 #define I40E_PFINT_ICR0_INTEVENT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_INTEVENT_SHIFT) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 0d7177083708f..64d198ed166b3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2144,9 +2144,7 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, */ /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - I40E_RX_HDR_SIZE, - GFP_ATOMIC | __GFP_NOWARN); + skb = napi_alloc_skb(&rx_ring->q_vector->napi, I40E_RX_HDR_SIZE); if (unlikely(!skb)) return NULL; @@ -2630,7 +2628,22 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, return failure ? budget : (int)total_rx_packets; } -static inline u32 i40e_buildreg_itr(const int type, u16 itr) +/** + * i40e_buildreg_itr - build a value for writing to I40E_PFINT_DYN_CTLN register + * @itr_idx: interrupt throttling index + * @interval: interrupt throttling interval value in usecs + * @force_swint: force software interrupt + * + * The function builds a value for I40E_PFINT_DYN_CTLN register that + * is used to update interrupt throttling interval for specified ITR index + * and optionally enforces a software interrupt. If the @itr_idx is equal + * to I40E_ITR_NONE then no interval change is applied and only @force_swint + * parameter is taken into account. If the interval change and enforced + * software interrupt are not requested then the built value just enables + * appropriate vector interrupt. + **/ +static u32 i40e_buildreg_itr(enum i40e_dyn_idx itr_idx, u16 interval, + bool force_swint) { u32 val; @@ -2644,23 +2657,33 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr) * an event in the PBA anyway so we need to rely on the automask * to hold pending events for us until the interrupt is re-enabled * - * The itr value is reported in microseconds, and the register - * value is recorded in 2 microsecond units. For this reason we - * only need to shift by the interval shift - 1 instead of the - * full value. + * We have to shift the given value as it is reported in microseconds + * and the register value is recorded in 2 microsecond units. */ - itr &= I40E_ITR_MASK; + interval >>= 1; + /* 1. Enable vector interrupt + * 2. Update the interval for the specified ITR index + * (I40E_ITR_NONE in the register is used to indicate that + * no interval update is requested) + */ val = I40E_PFINT_DYN_CTLN_INTENA_MASK | - (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | - (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1)); + FIELD_PREP(I40E_PFINT_DYN_CTLN_ITR_INDX_MASK, itr_idx) | + FIELD_PREP(I40E_PFINT_DYN_CTLN_INTERVAL_MASK, interval); + + /* 3. Enforce software interrupt trigger if requested + * (These software interrupts rate is limited by ITR2 that is + * set to 20K interrupts per second) + */ + if (force_swint) + val |= I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK | + I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK | + FIELD_PREP(I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK, + I40E_SW_ITR); return val; } -/* a small macro to shorten up some long lines */ -#define INTREG I40E_PFINT_DYN_CTLN - /* The act of updating the ITR will cause it to immediately trigger. In order * to prevent this from throwing off adaptive update statistics we defer the * update so that it can only happen so often. So after either Tx or Rx are @@ -2679,8 +2702,10 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr) static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) { + enum i40e_dyn_idx itr_idx = I40E_ITR_NONE; struct i40e_hw *hw = &vsi->back->hw; - u32 intval; + u16 interval = 0; + u32 itr_val; /* If we don't have MSIX, then we only need to re-enable icr0 */ if (!test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags)) { @@ -2702,8 +2727,8 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, */ if (q_vector->rx.target_itr < q_vector->rx.current_itr) { /* Rx ITR needs to be reduced, this is highest priority */ - intval = i40e_buildreg_itr(I40E_RX_ITR, - q_vector->rx.target_itr); + itr_idx = I40E_RX_ITR; + interval = q_vector->rx.target_itr; q_vector->rx.current_itr = q_vector->rx.target_itr; q_vector->itr_countdown = ITR_COUNTDOWN_START; } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) || @@ -2712,25 +2737,36 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, /* Tx ITR needs to be reduced, this is second priority * Tx ITR needs to be increased more than Rx, fourth priority */ - intval = i40e_buildreg_itr(I40E_TX_ITR, - q_vector->tx.target_itr); + itr_idx = I40E_TX_ITR; + interval = q_vector->tx.target_itr; q_vector->tx.current_itr = q_vector->tx.target_itr; q_vector->itr_countdown = ITR_COUNTDOWN_START; } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) { /* Rx ITR needs to be increased, third priority */ - intval = i40e_buildreg_itr(I40E_RX_ITR, - q_vector->rx.target_itr); + itr_idx = I40E_RX_ITR; + interval = q_vector->rx.target_itr; q_vector->rx.current_itr = q_vector->rx.target_itr; q_vector->itr_countdown = ITR_COUNTDOWN_START; } else { /* No ITR update, lowest priority */ - intval = i40e_buildreg_itr(I40E_ITR_NONE, 0); if (q_vector->itr_countdown) q_vector->itr_countdown--; } - if (!test_bit(__I40E_VSI_DOWN, vsi->state)) - wr32(hw, INTREG(q_vector->reg_idx), intval); + /* Do not update interrupt control register if VSI is down */ + if (test_bit(__I40E_VSI_DOWN, vsi->state)) + return; + + /* Update ITR interval if necessary and enforce software interrupt + * if we are exiting busy poll. + */ + if (q_vector->in_busy_poll) { + itr_val = i40e_buildreg_itr(itr_idx, interval, true); + q_vector->in_busy_poll = false; + } else { + itr_val = i40e_buildreg_itr(itr_idx, interval, false); + } + wr32(hw, I40E_PFINT_DYN_CTLN(q_vector->reg_idx), itr_val); } /** @@ -2845,6 +2881,8 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) */ if (likely(napi_complete_done(napi, work_done))) i40e_update_enable_itr(vsi, q_vector); + else + q_vector->in_busy_poll = true; return min(work_done, budget - 1); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index abf15067eb5de..2cdc7de6301c1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -68,6 +68,7 @@ enum i40e_dyn_idx { /* these are indexes into ITRN registers */ #define I40E_RX_ITR I40E_IDX_ITR0 #define I40E_TX_ITR I40E_IDX_ITR1 +#define I40E_SW_ITR I40E_IDX_ITR2 /* Supported RSS offloads */ #define I40E_DEFAULT_RSS_HENA ( \ diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 83a34e98bdc79..232b65b9c8eac 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1624,8 +1624,8 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) { struct i40e_hw *hw = &pf->hw; struct i40e_vf *vf; - int i, v; u32 reg; + int i; /* If we don't have any VFs, then there is nothing to reset */ if (!pf->num_alloc_vfs) @@ -1636,11 +1636,10 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) return false; /* Begin reset on all VFs at once */ - for (v = 0; v < pf->num_alloc_vfs; v++) { - vf = &pf->vf[v]; + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* If VF is being reset no need to trigger reset again */ if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) - i40e_trigger_vf_reset(&pf->vf[v], flr); + i40e_trigger_vf_reset(vf, flr); } /* HW requires some time to make sure it can flush the FIFO for a VF @@ -1649,14 +1648,13 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) * the VFs using a simple iterator that increments once that VF has * finished resetting. */ - for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) { + for (i = 0, vf = &pf->vf[0]; i < 10 && vf < &pf->vf[pf->num_alloc_vfs]; ++i) { usleep_range(10000, 20000); /* Check each VF in sequence, beginning with the VF to fail * the previous check. */ - while (v < pf->num_alloc_vfs) { - vf = &pf->vf[v]; + while (vf < &pf->vf[pf->num_alloc_vfs]) { if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) { reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id)); if (!(reg & I40E_VPGEN_VFRSTAT_VFRD_MASK)) @@ -1666,7 +1664,7 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) /* If the current VF has finished resetting, move on * to the next VF in sequence. */ - v++; + ++vf; } } @@ -1676,39 +1674,39 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) /* Display a warning if at least one VF didn't manage to reset in * time, but continue on with the operation. */ - if (v < pf->num_alloc_vfs) + if (vf < &pf->vf[pf->num_alloc_vfs]) dev_err(&pf->pdev->dev, "VF reset check timeout on VF %d\n", - pf->vf[v].vf_id); + vf->vf_id); usleep_range(10000, 20000); /* Begin disabling all the rings associated with VFs, but do not wait * between each VF. */ - for (v = 0; v < pf->num_alloc_vfs; v++) { + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* On initial reset, we don't have any queues to disable */ - if (pf->vf[v].lan_vsi_idx == 0) + if (vf->lan_vsi_idx == 0) continue; /* If VF is reset in another thread just continue */ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) continue; - i40e_vsi_stop_rings_no_wait(pf->vsi[pf->vf[v].lan_vsi_idx]); + i40e_vsi_stop_rings_no_wait(pf->vsi[vf->lan_vsi_idx]); } /* Now that we've notified HW to disable all of the VF rings, wait * until they finish. */ - for (v = 0; v < pf->num_alloc_vfs; v++) { + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* On initial reset, we don't have any queues to disable */ - if (pf->vf[v].lan_vsi_idx == 0) + if (vf->lan_vsi_idx == 0) continue; /* If VF is reset in another thread just continue */ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) continue; - i40e_vsi_wait_queues_disabled(pf->vsi[pf->vf[v].lan_vsi_idx]); + i40e_vsi_wait_queues_disabled(pf->vsi[vf->lan_vsi_idx]); } /* Hw may need up to 50ms to finish disabling the RX queues. We @@ -1717,12 +1715,12 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) mdelay(50); /* Finish the reset on each VF */ - for (v = 0; v < pf->num_alloc_vfs; v++) { + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* If VF is reset in another thread just continue */ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) continue; - i40e_cleanup_reset_vf(&pf->vf[v]); + i40e_cleanup_reset_vf(vf); } i40e_flush(hw); @@ -3139,11 +3137,12 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) /* Allow to delete VF primary MAC only if it was not set * administratively by PF or if VF is trusted. */ - if (ether_addr_equal(addr, vf->default_lan_addr.addr) && - i40e_can_vf_change_mac(vf)) - was_unimac_deleted = true; - else - continue; + if (ether_addr_equal(addr, vf->default_lan_addr.addr)) { + if (i40e_can_vf_change_mac(vf)) + was_unimac_deleted = true; + else + continue; + } if (i40e_del_mac_filter(vsi, al->list[i].addr)) { ret = -EINVAL; diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 11500003af0d4..a85b425794df2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -301,8 +301,7 @@ static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring, net_prefetch(xdp->data_meta); /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, - GFP_ATOMIC | __GFP_NOWARN); + skb = napi_alloc_skb(&rx_ring->q_vector->napi, totalsize); if (unlikely(!skb)) goto out; diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index ef2440f3abf8b..13361a780ece5 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -5023,7 +5023,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * * Called when the system (VM) is entering sleep/suspend. **/ -static int __maybe_unused iavf_suspend(struct device *dev_d) +static int iavf_suspend(struct device *dev_d) { struct net_device *netdev = dev_get_drvdata(dev_d); struct iavf_adapter *adapter = netdev_priv(netdev); @@ -5051,7 +5051,7 @@ static int __maybe_unused iavf_suspend(struct device *dev_d) * * Called when the system (VM) is resumed from sleep/suspend. **/ -static int __maybe_unused iavf_resume(struct device *dev_d) +static int iavf_resume(struct device *dev_d) { struct pci_dev *pdev = to_pci_dev(dev_d); struct iavf_adapter *adapter; @@ -5238,14 +5238,14 @@ static void iavf_shutdown(struct pci_dev *pdev) pci_set_power_state(pdev, PCI_D3hot); } -static SIMPLE_DEV_PM_OPS(iavf_pm_ops, iavf_suspend, iavf_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(iavf_pm_ops, iavf_suspend, iavf_resume); static struct pci_driver iavf_driver = { .name = iavf_driver_name, .id_table = iavf_pci_tbl, .probe = iavf_probe, .remove = iavf_remove, - .driver.pm = &iavf_pm_ops, + .driver.pm = pm_sleep_ptr(&iavf_pm_ops), .shutdown = iavf_shutdown, }; diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index b71484c87a846..32bb604a13825 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -1334,9 +1334,7 @@ static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring, net_prefetch(va); /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - IAVF_RX_HDR_SIZE, - GFP_ATOMIC | __GFP_NOWARN); + skb = napi_alloc_skb(&rx_ring->q_vector->napi, IAVF_RX_HDR_SIZE); if (unlikely(!skb)) return NULL; diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 365c03d1c4622..ca145ce2b1eb4 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -522,17 +522,10 @@ enum ice_misc_thread_tasks { }; struct ice_eswitch { - struct ice_vsi *control_vsi; struct ice_vsi *uplink_vsi; struct ice_esw_br_offloads *br_offloads; struct xarray reprs; bool is_running; - /* struct to allow cp queues management optimization */ - struct { - int to_reach; - int value; - bool is_reaching; - } qs; }; struct ice_agg_node { @@ -662,6 +655,8 @@ struct ice_pf { struct ice_agg_node vf_agg_node[ICE_MAX_VF_AGG_NODES]; struct ice_dplls dplls; struct device *hwmon_dev; + + int split_cnt; }; extern struct workqueue_struct *ice_lag_wq; diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 1f3e7a6903e56..28d9c0792a938 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -1495,6 +1495,10 @@ struct ice_aqc_link_topo_addr { struct ice_aqc_get_link_topo { struct ice_aqc_link_topo_addr addr; u8 node_part_num; +#define ICE_AQC_GET_LINK_TOPO_NODE_NR_SFP_PLUS 0x11 +#define ICE_AQC_GET_LINK_TOPO_NODE_NR_SFP28 0x12 +#define ICE_AQC_GET_LINK_TOPO_NODE_NR_QSFP_PLUS 0x13 +#define ICE_AQC_GET_LINK_TOPO_NODE_NR_QSFP28 0x14 #define ICE_AQC_GET_LINK_TOPO_NODE_NR_PCA9575 0x21 #define ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032 0x24 #define ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384 0x25 @@ -1662,6 +1666,23 @@ struct ice_aqc_nvm { __le32 addr_low; }; +#define ICE_AQC_NVM_CMPO_MOD_ID 0x153 +#define ICE_AQC_NVM_EMP_SETTINGS_MOD_ID 0x0F +#define ICE_AQC_NVM_MAX_PWR_LIMIT_OFFSET 0x1A +#define ICE_AQC_NVM_DFLT_MAX_PWR_MASK GENMASK(7, 0) +#define ICE_AQC_NVM_BOARD_MAX_PWR_MASK GENMASK(15, 8) + +#define ICE_NUM_OF_CAGES 8 + +#define ICE_AQC_NVM_CMPO_ENABLE BIT(8) +#define ICE_AQC_NVM_CMPO_POWER_MASK GENMASK(7, 0) + +/* Cage Max Power override NVM module */ +struct ice_aqc_nvm_cmpo { + __le16 length; + __le16 cages_cfg[ICE_NUM_OF_CAGES]; +}; + #define ICE_AQC_NVM_START_POINT 0 /* NVM Checksum Command (direct, 0x0706) */ diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index a545a7917e4fc..af0384b371192 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -263,30 +263,6 @@ static u16 ice_calc_txq_handle(struct ice_vsi *vsi, struct ice_tx_ring *ring, u8 return ring->q_index - vsi->tc_cfg.tc_info[tc].qoffset; } -/** - * ice_eswitch_calc_txq_handle - * @ring: pointer to ring which unique index is needed - * - * To correctly work with many netdevs ring->q_index of Tx rings on switchdev - * VSI can repeat. Hardware ring setup requires unique q_index. Calculate it - * here by finding index in vsi->tx_rings of this ring. - * - * Return ICE_INVAL_Q_INDEX when index wasn't found. Should never happen, - * because VSI is get from ring->vsi, so it has to be present in this VSI. - */ -static u16 ice_eswitch_calc_txq_handle(struct ice_tx_ring *ring) -{ - const struct ice_vsi *vsi = ring->vsi; - int i; - - ice_for_each_txq(vsi, i) { - if (vsi->tx_rings[i] == ring) - return i; - } - - return ICE_INVAL_Q_INDEX; -} - /** * ice_cfg_xps_tx_ring - Configure XPS for a Tx ring * @ring: The Tx ring to configure @@ -353,9 +329,6 @@ ice_setup_tx_ctx(struct ice_tx_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf->vf_id; tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF; break; - case ICE_VSI_SWITCHDEV_CTRL: - tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VMQ; - break; default: return; } @@ -479,6 +452,14 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) /* Rx queue threshold in units of 64 */ rlan_ctx.lrxqthresh = 1; + /* PF acts as uplink for switchdev; set flex descriptor with src_vsi + * metadata and flags to allow redirecting to PR netdev + */ + if (ice_is_eswitch_mode_switchdev(vsi->back)) { + ring->flags |= ICE_RX_FLAGS_MULTIDEV; + rxdid = ICE_RXDID_FLEX_NIC_2; + } + /* Enable Flexible Descriptors in the queue context which * allows this driver to select a specific receive descriptor format * increasing context priority to pick up profile ID; default is 0x01; @@ -919,14 +900,7 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring, /* Add unique software queue handle of the Tx queue per * TC into the VSI Tx ring */ - if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) { - ring->q_handle = ice_eswitch_calc_txq_handle(ring); - - if (ring->q_handle == ICE_INVAL_Q_INDEX) - return -ENODEV; - } else { - ring->q_handle = ice_calc_txq_handle(vsi, ring, tc); - } + ring->q_handle = ice_calc_txq_handle(vsi, ring, tc); if (ch) status = ice_ena_vsi_txq(vsi->port_info, ch->ch_vsi->idx, 0, diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index db4b2844e1f71..e731ae8afcb6c 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -571,6 +571,23 @@ static bool ice_is_media_cage_present(struct ice_port_info *pi) NULL); } +int ice_get_port_cage_node(struct ice_hw *hw, u8 index, + u16 *node_handle, u8 *node_part_number) +{ + struct ice_aqc_get_link_topo cmd = {}; + + cmd.addr.topo_params.node_type_ctx = + FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_TYPE_M, + ICE_AQC_LINK_TOPO_NODE_TYPE_CAGE); + cmd.addr.topo_params.node_type_ctx |= + FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_CTX_M, + ICE_AQC_LINK_TOPO_NODE_CTX_GLOBAL); + cmd.addr.topo_params.index = index; + + return ice_aq_get_netlist_node(hw, &cmd, node_part_number, + node_handle); +} + /** * ice_get_media_type - Gets media type * @pi: port information structure @@ -5721,6 +5738,29 @@ static bool ice_is_fw_api_min_ver(struct ice_hw *hw, u8 maj, u8 min, u8 patch) return false; } +/** + * ice_is_fw_min_ver + * @hw: pointer to the hardware structure + * @maj: major version + * @min: minor version + * @patch: patch version + * + * Checks if the firmware is minimum version + */ +static bool ice_is_fw_min_ver(struct ice_hw *hw, u8 maj, u8 min, u8 patch) +{ + if (hw->fw_maj_ver > maj) + return true; + if (hw->fw_maj_ver == maj) { + if (hw->fw_min_ver > min) + return true; + if (hw->fw_min_ver == min && hw->fw_patch >= patch) + return true; + } + + return false; +} + /** * ice_fw_supports_link_override * @hw: pointer to the hardware structure @@ -5734,6 +5774,12 @@ bool ice_fw_supports_link_override(struct ice_hw *hw) ICE_FW_API_LINK_OVERRIDE_PATCH); } +bool ice_fw_supports_cmpo(struct ice_hw *hw) +{ + return ice_is_fw_min_ver(hw, ICE_FW_CMPO_MAJ, ICE_FW_CMPO_MIN, + ICE_FW_CMPO_PATCH); +} + /** * ice_get_link_default_override * @ldo: pointer to the link default override struct diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index ffb22c7ce28b7..c476fbf60400c 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -96,6 +96,8 @@ bool ice_is_phy_rclk_in_netlist(struct ice_hw *hw); bool ice_is_clock_mux_in_netlist(struct ice_hw *hw); bool ice_is_cgu_in_netlist(struct ice_hw *hw); bool ice_is_gps_in_netlist(struct ice_hw *hw); +int ice_get_port_cage_node(struct ice_hw *hw, u8 index, + u16 *node_handle, u8 *node_part_number); int ice_aq_get_netlist_node(struct ice_hw *hw, struct ice_aqc_get_link_topo *cmd, u8 *node_part_number, u16 *node_handle); @@ -117,6 +119,7 @@ int ice_aq_set_phy_cfg(struct ice_hw *hw, struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg, struct ice_sq_cd *cd); bool ice_fw_supports_link_override(struct ice_hw *hw); +bool ice_fw_supports_cmpo(struct ice_hw *hw); int ice_get_link_default_override(struct ice_link_default_override_tlv *ldo, struct ice_port_info *pi); diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index 6e20ee6100224..ceb17c004d794 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -291,7 +291,6 @@ static void ice_dcb_ena_dis_vsi(struct ice_pf *pf, bool ena, bool locked) switch (vsi->type) { case ICE_VSI_CHNL: - case ICE_VSI_SWITCHDEV_CTRL: case ICE_VSI_PF: if (ena) ice_ena_vsi(vsi, locked); @@ -776,8 +775,7 @@ void ice_pf_dcb_recfg(struct ice_pf *pf, bool locked) /* no need to proceed with remaining cfg if it is CHNL * or switchdev VSI */ - if (vsi->type == ICE_VSI_CHNL || - vsi->type == ICE_VSI_SWITCHDEV_CTRL) + if (vsi->type == ICE_VSI_CHNL) continue; ice_vsi_map_rings_to_vectors(vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index b516e42b41f0f..4797d9b756f91 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -1604,6 +1604,14 @@ ice_devlink_set_port_split_options(struct ice_pf *pf, return; } + pf->split_cnt = options[active_idx].pmd; + + /* As FW supports only port split options for whole device, + * set port split options only for first PF. + */ + if (pf->hw.pf_id != 0) + return; + /* find the biggest available port split count */ for (i = 0; i < option_count; i++) attrs->lanes = max_t(int, attrs->lanes, options[i].pmd); @@ -1648,11 +1656,7 @@ int ice_devlink_create_pf_port(struct ice_pf *pf) attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; attrs.phys.port_number = pf->hw.bus.func; - /* As FW supports only port split options for whole device, - * set port split options only for first PF. - */ - if (pf->hw.pf_id == 0) - ice_devlink_set_port_split_options(pf, &attrs); + ice_devlink_set_port_split_options(pf, &attrs); ice_devlink_set_switch_id(pf, &attrs.switch_id); diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index 9069725c71b4a..af4e9530eb48a 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -10,85 +10,6 @@ #include "ice_devlink.h" #include "ice_tc_lib.h" -/** - * ice_eswitch_del_sp_rules - delete adv rules added on PRs - * @pf: pointer to the PF struct - * - * Delete all advanced rules that were used to forward packets with the - * device's VSI index to the corresponding eswitch ctrl VSI queue. - */ -static void ice_eswitch_del_sp_rules(struct ice_pf *pf) -{ - struct ice_repr *repr; - unsigned long id; - - xa_for_each(&pf->eswitch.reprs, id, repr) { - if (repr->sp_rule.rid) - ice_rem_adv_rule_by_id(&pf->hw, &repr->sp_rule); - } -} - -/** - * ice_eswitch_add_sp_rule - add adv rule with device's VSI index - * @pf: pointer to PF struct - * @repr: pointer to the repr struct - * - * This function adds advanced rule that forwards packets with - * device's VSI index to the corresponding eswitch ctrl VSI queue. - */ -static int ice_eswitch_add_sp_rule(struct ice_pf *pf, struct ice_repr *repr) -{ - struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi; - struct ice_adv_rule_info rule_info = { 0 }; - struct ice_adv_lkup_elem *list; - struct ice_hw *hw = &pf->hw; - const u16 lkups_cnt = 1; - int err; - - list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC); - if (!list) - return -ENOMEM; - - ice_rule_add_src_vsi_metadata(list); - - rule_info.sw_act.flag = ICE_FLTR_TX; - rule_info.sw_act.vsi_handle = ctrl_vsi->idx; - rule_info.sw_act.fltr_act = ICE_FWD_TO_Q; - rule_info.sw_act.fwd_id.q_id = hw->func_caps.common_cap.rxq_first_id + - ctrl_vsi->rxq_map[repr->q_id]; - rule_info.flags_info.act |= ICE_SINGLE_ACT_LB_ENABLE; - rule_info.flags_info.act_valid = true; - rule_info.tun_type = ICE_SW_TUN_AND_NON_TUN; - rule_info.src_vsi = repr->src_vsi->idx; - - err = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, - &repr->sp_rule); - if (err) - dev_err(ice_pf_to_dev(pf), "Unable to add slow-path rule for eswitch for PR %d", - repr->id); - - kfree(list); - return err; -} - -static int -ice_eswitch_add_sp_rules(struct ice_pf *pf) -{ - struct ice_repr *repr; - unsigned long id; - int err; - - xa_for_each(&pf->eswitch.reprs, id, repr) { - err = ice_eswitch_add_sp_rule(pf, repr); - if (err) { - ice_eswitch_del_sp_rules(pf); - return err; - } - } - - return 0; -} - /** * ice_eswitch_setup_env - configure eswitch HW filters * @pf: pointer to PF struct @@ -99,10 +20,13 @@ ice_eswitch_add_sp_rules(struct ice_pf *pf) static int ice_eswitch_setup_env(struct ice_pf *pf) { struct ice_vsi *uplink_vsi = pf->eswitch.uplink_vsi; - struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi; struct net_device *netdev = uplink_vsi->netdev; + bool if_running = netif_running(netdev); struct ice_vsi_vlan_ops *vlan_ops; - bool rule_added = false; + + if (if_running && !test_and_set_bit(ICE_VSI_DOWN, uplink_vsi->state)) + if (ice_down(uplink_vsi)) + return -ENODEV; ice_remove_vsi_fltr(&pf->hw, uplink_vsi->idx); @@ -112,98 +36,53 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) netif_addr_unlock_bh(netdev); if (ice_vsi_add_vlan_zero(uplink_vsi)) + goto err_vlan_zero; + + if (ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, true, + ICE_FLTR_RX)) goto err_def_rx; - if (!ice_is_dflt_vsi_in_use(uplink_vsi->port_info)) { - if (ice_set_dflt_vsi(uplink_vsi)) - goto err_def_rx; - rule_added = true; - } + if (ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, true, + ICE_FLTR_TX)) + goto err_def_tx; vlan_ops = ice_get_compat_vsi_vlan_ops(uplink_vsi); if (vlan_ops->dis_rx_filtering(uplink_vsi)) - goto err_dis_rx; + goto err_vlan_filtering; if (ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_set_allow_override)) goto err_override_uplink; - if (ice_vsi_update_security(ctrl_vsi, ice_vsi_ctx_set_allow_override)) - goto err_override_control; - if (ice_vsi_update_local_lb(uplink_vsi, true)) goto err_override_local_lb; + if (if_running && ice_up(uplink_vsi)) + goto err_up; + return 0; +err_up: + ice_vsi_update_local_lb(uplink_vsi, false); err_override_local_lb: - ice_vsi_update_security(ctrl_vsi, ice_vsi_ctx_clear_allow_override); -err_override_control: ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override); err_override_uplink: vlan_ops->ena_rx_filtering(uplink_vsi); -err_dis_rx: - if (rule_added) - ice_clear_dflt_vsi(uplink_vsi); +err_vlan_filtering: + ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, false, + ICE_FLTR_TX); +err_def_tx: + ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, false, + ICE_FLTR_RX); err_def_rx: + ice_vsi_del_vlan_zero(uplink_vsi); +err_vlan_zero: ice_fltr_add_mac_and_broadcast(uplink_vsi, uplink_vsi->port_info->mac.perm_addr, ICE_FWD_TO_VSI); - return -ENODEV; -} - -/** - * ice_eswitch_remap_rings_to_vectors - reconfigure rings of eswitch ctrl VSI - * @eswitch: pointer to eswitch struct - * - * In eswitch number of allocated Tx/Rx rings is equal. - * - * This function fills q_vectors structures associated with representor and - * move each ring pairs to port representor netdevs. Each port representor - * will have dedicated 1 Tx/Rx ring pair, so number of rings pair is equal to - * number of VFs. - */ -static void ice_eswitch_remap_rings_to_vectors(struct ice_eswitch *eswitch) -{ - struct ice_vsi *vsi = eswitch->control_vsi; - unsigned long repr_id = 0; - int q_id; - - ice_for_each_txq(vsi, q_id) { - struct ice_q_vector *q_vector; - struct ice_tx_ring *tx_ring; - struct ice_rx_ring *rx_ring; - struct ice_repr *repr; - - repr = xa_find(&eswitch->reprs, &repr_id, U32_MAX, - XA_PRESENT); - if (!repr) - break; - - repr_id += 1; - repr->q_id = q_id; - q_vector = repr->q_vector; - tx_ring = vsi->tx_rings[q_id]; - rx_ring = vsi->rx_rings[q_id]; - - q_vector->vsi = vsi; - q_vector->reg_idx = vsi->q_vectors[0]->reg_idx; - - q_vector->num_ring_tx = 1; - q_vector->tx.tx_ring = tx_ring; - tx_ring->q_vector = q_vector; - tx_ring->next = NULL; - tx_ring->netdev = repr->netdev; - /* In switchdev mode, from OS stack perspective, there is only - * one queue for given netdev, so it needs to be indexed as 0. - */ - tx_ring->q_index = 0; + if (if_running) + ice_up(uplink_vsi); - q_vector->num_ring_rx = 1; - q_vector->rx.rx_ring = rx_ring; - rx_ring->q_vector = q_vector; - rx_ring->next = NULL; - rx_ring->netdev = repr->netdev; - } + return -ENODEV; } /** @@ -225,8 +104,6 @@ ice_eswitch_release_repr(struct ice_pf *pf, struct ice_repr *repr) repr->dst = NULL; ice_fltr_add_mac_and_broadcast(vsi, repr->parent_mac, ICE_FWD_TO_VSI); - - netif_napi_del(&repr->q_vector->napi); } /** @@ -236,7 +113,7 @@ ice_eswitch_release_repr(struct ice_pf *pf, struct ice_repr *repr) */ static int ice_eswitch_setup_repr(struct ice_pf *pf, struct ice_repr *repr) { - struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi; + struct ice_vsi *uplink_vsi = pf->eswitch.uplink_vsi; struct ice_vsi *vsi = repr->src_vsi; struct metadata_dst *dst; @@ -252,15 +129,11 @@ static int ice_eswitch_setup_repr(struct ice_pf *pf, struct ice_repr *repr) if (ice_vsi_add_vlan_zero(vsi)) goto err_update_security; - netif_napi_add(repr->netdev, &repr->q_vector->napi, - ice_napi_poll); - - netif_keep_dst(repr->netdev); + netif_keep_dst(uplink_vsi->netdev); dst = repr->dst; dst->u.port_info.port_id = vsi->vsi_num; - dst->u.port_info.lower_dev = repr->netdev; - ice_repr_set_traffic_vsi(repr, ctrl_vsi); + dst->u.port_info.lower_dev = uplink_vsi->netdev; return 0; @@ -318,27 +191,19 @@ void ice_eswitch_update_repr(unsigned long repr_id, struct ice_vsi *vsi) netdev_tx_t ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev) { - struct ice_netdev_priv *np; - struct ice_repr *repr; - struct ice_vsi *vsi; - - np = netdev_priv(netdev); - vsi = np->vsi; - - if (!vsi || !ice_is_switchdev_running(vsi->back)) - return NETDEV_TX_BUSY; - - if (ice_is_reset_in_progress(vsi->back->state) || - test_bit(ICE_VF_DIS, vsi->back->state)) - return NETDEV_TX_BUSY; + struct ice_repr *repr = ice_netdev_to_repr(netdev); + unsigned int len = skb->len; + int ret; - repr = ice_netdev_to_repr(netdev); skb_dst_drop(skb); dst_hold((struct dst_entry *)repr->dst); skb_dst_set(skb, (struct dst_entry *)repr->dst); - skb->queue_mapping = repr->q_id; + skb->dev = repr->dst->u.port_info.lower_dev; + + ret = dev_queue_xmit(skb); + ice_repr_inc_tx_stats(repr, len, ret); - return ice_start_xmit(skb, netdev); + return ret; } /** @@ -374,13 +239,11 @@ ice_eswitch_set_target_vsi(struct sk_buff *skb, static void ice_eswitch_release_env(struct ice_pf *pf) { struct ice_vsi *uplink_vsi = pf->eswitch.uplink_vsi; - struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi; struct ice_vsi_vlan_ops *vlan_ops; vlan_ops = ice_get_compat_vsi_vlan_ops(uplink_vsi); ice_vsi_update_local_lb(uplink_vsi, false); - ice_vsi_update_security(ctrl_vsi, ice_vsi_ctx_clear_allow_override); ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override); vlan_ops->ena_rx_filtering(uplink_vsi); ice_clear_dflt_vsi(uplink_vsi); @@ -389,56 +252,13 @@ static void ice_eswitch_release_env(struct ice_pf *pf) ICE_FWD_TO_VSI); } -/** - * ice_eswitch_vsi_setup - configure eswitch control VSI - * @pf: pointer to PF structure - * @pi: pointer to port_info structure - */ -static struct ice_vsi * -ice_eswitch_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) -{ - struct ice_vsi_cfg_params params = {}; - - params.type = ICE_VSI_SWITCHDEV_CTRL; - params.pi = pi; - params.flags = ICE_VSI_FLAG_INIT; - - return ice_vsi_setup(pf, ¶ms); -} - -/** - * ice_eswitch_napi_enable - enable NAPI for all port representors - * @reprs: xarray of reprs - */ -static void ice_eswitch_napi_enable(struct xarray *reprs) -{ - struct ice_repr *repr; - unsigned long id; - - xa_for_each(reprs, id, repr) - napi_enable(&repr->q_vector->napi); -} - -/** - * ice_eswitch_napi_disable - disable NAPI for all port representors - * @reprs: xarray of reprs - */ -static void ice_eswitch_napi_disable(struct xarray *reprs) -{ - struct ice_repr *repr; - unsigned long id; - - xa_for_each(reprs, id, repr) - napi_disable(&repr->q_vector->napi); -} - /** * ice_eswitch_enable_switchdev - configure eswitch in switchdev mode * @pf: pointer to PF structure */ static int ice_eswitch_enable_switchdev(struct ice_pf *pf) { - struct ice_vsi *ctrl_vsi, *uplink_vsi; + struct ice_vsi *uplink_vsi; uplink_vsi = ice_get_main_vsi(pf); if (!uplink_vsi) @@ -450,17 +270,10 @@ static int ice_eswitch_enable_switchdev(struct ice_pf *pf) return -EINVAL; } - pf->eswitch.control_vsi = ice_eswitch_vsi_setup(pf, pf->hw.port_info); - if (!pf->eswitch.control_vsi) - return -ENODEV; - - ctrl_vsi = pf->eswitch.control_vsi; - /* cp VSI is createad with 1 queue as default */ - pf->eswitch.qs.value = 1; pf->eswitch.uplink_vsi = uplink_vsi; if (ice_eswitch_setup_env(pf)) - goto err_vsi; + return -ENODEV; if (ice_eswitch_br_offloads_init(pf)) goto err_br_offloads; @@ -471,8 +284,6 @@ static int ice_eswitch_enable_switchdev(struct ice_pf *pf) err_br_offloads: ice_eswitch_release_env(pf); -err_vsi: - ice_vsi_release(ctrl_vsi); return -ENODEV; } @@ -482,14 +293,10 @@ static int ice_eswitch_enable_switchdev(struct ice_pf *pf) */ static void ice_eswitch_disable_switchdev(struct ice_pf *pf) { - struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi; - ice_eswitch_br_offloads_deinit(pf); ice_eswitch_release_env(pf); - ice_vsi_release(ctrl_vsi); pf->eswitch.is_running = false; - pf->eswitch.qs.is_reaching = false; } /** @@ -530,7 +337,7 @@ ice_eswitch_mode_set(struct devlink *devlink, u16 mode, dev_info(ice_pf_to_dev(pf), "PF %d changed eswitch mode to switchdev", pf->hw.pf_id); - xa_init_flags(&pf->eswitch.reprs, XA_FLAGS_ALLOC); + xa_init(&pf->eswitch.reprs); NL_SET_ERR_MSG_MOD(extack, "Changed eswitch mode to switchdev"); break; } @@ -602,56 +409,18 @@ void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf) static void ice_eswitch_stop_reprs(struct ice_pf *pf) { - ice_eswitch_del_sp_rules(pf); ice_eswitch_stop_all_tx_queues(pf); - ice_eswitch_napi_disable(&pf->eswitch.reprs); } static void ice_eswitch_start_reprs(struct ice_pf *pf) { - ice_eswitch_napi_enable(&pf->eswitch.reprs); ice_eswitch_start_all_tx_queues(pf); - ice_eswitch_add_sp_rules(pf); -} - -static void -ice_eswitch_cp_change_queues(struct ice_eswitch *eswitch, int change) -{ - struct ice_vsi *cp = eswitch->control_vsi; - int queues = 0; - - if (eswitch->qs.is_reaching) { - if (eswitch->qs.to_reach >= eswitch->qs.value + change) { - queues = eswitch->qs.to_reach; - eswitch->qs.is_reaching = false; - } else { - queues = 0; - } - } else if ((change > 0 && cp->alloc_txq <= eswitch->qs.value) || - change < 0) { - queues = cp->alloc_txq + change; - } - - if (queues) { - cp->req_txq = queues; - cp->req_rxq = queues; - ice_vsi_close(cp); - ice_vsi_rebuild(cp, ICE_VSI_FLAG_NO_INIT); - ice_vsi_open(cp); - } else if (!change) { - /* change == 0 means that VSI wasn't open, open it here */ - ice_vsi_open(cp); - } - - eswitch->qs.value += change; - ice_eswitch_remap_rings_to_vectors(eswitch); } int ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf) { struct ice_repr *repr; - int change = 1; int err; if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY) @@ -661,9 +430,6 @@ ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf) err = ice_eswitch_enable_switchdev(pf); if (err) return err; - /* Control plane VSI is created with 1 queue as default */ - pf->eswitch.qs.to_reach -= 1; - change = 0; } ice_eswitch_stop_reprs(pf); @@ -678,14 +444,12 @@ ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf) if (err) goto err_setup_repr; - err = xa_alloc(&pf->eswitch.reprs, &repr->id, repr, - XA_LIMIT(1, INT_MAX), GFP_KERNEL); + err = xa_insert(&pf->eswitch.reprs, repr->id, repr, GFP_KERNEL); if (err) goto err_xa_alloc; vf->repr_id = repr->id; - ice_eswitch_cp_change_queues(&pf->eswitch, change); ice_eswitch_start_reprs(pf); return 0; @@ -715,8 +479,6 @@ void ice_eswitch_detach(struct ice_pf *pf, struct ice_vf *vf) if (xa_empty(&pf->eswitch.reprs)) ice_eswitch_disable_switchdev(pf); - else - ice_eswitch_cp_change_queues(&pf->eswitch, -1); ice_eswitch_release_repr(pf, repr); ice_repr_rem_vf(repr); @@ -738,37 +500,37 @@ void ice_eswitch_detach(struct ice_pf *pf, struct ice_vf *vf) * ice_eswitch_rebuild - rebuild eswitch * @pf: pointer to PF structure */ -int ice_eswitch_rebuild(struct ice_pf *pf) +void ice_eswitch_rebuild(struct ice_pf *pf) { struct ice_repr *repr; unsigned long id; - int err; if (!ice_is_switchdev_running(pf)) - return 0; - - err = ice_vsi_rebuild(pf->eswitch.control_vsi, ICE_VSI_FLAG_INIT); - if (err) - return err; + return; xa_for_each(&pf->eswitch.reprs, id, repr) ice_eswitch_detach(pf, repr->vf); - - return 0; } /** - * ice_eswitch_reserve_cp_queues - reserve control plane VSI queues - * @pf: pointer to PF structure - * @change: how many more (or less) queues is needed + * ice_eswitch_get_target - get netdev based on src_vsi from descriptor + * @rx_ring: ring used to receive the packet + * @rx_desc: descriptor used to get src_vsi value * - * Remember to call ice_eswitch_attach/detach() the "change" times. + * Get src_vsi value from descriptor and load correct representor. If it isn't + * found return rx_ring->netdev. */ -void ice_eswitch_reserve_cp_queues(struct ice_pf *pf, int change) +struct net_device *ice_eswitch_get_target(struct ice_rx_ring *rx_ring, + union ice_32b_rx_flex_desc *rx_desc) { - if (pf->eswitch.qs.value + change < 0) - return; + struct ice_eswitch *eswitch = &rx_ring->vsi->back->eswitch; + struct ice_32b_rx_flex_desc_nic_2 *desc; + struct ice_repr *repr; + + desc = (struct ice_32b_rx_flex_desc_nic_2 *)rx_desc; + repr = xa_load(&eswitch->reprs, le16_to_cpu(desc->src_vsi)); + if (!repr) + return rx_ring->netdev; - pf->eswitch.qs.to_reach = pf->eswitch.qs.value + change; - pf->eswitch.qs.is_reaching = true; + return repr->netdev; } diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.h b/drivers/net/ethernet/intel/ice/ice_eswitch.h index 1a288a03a79a1..e2e5c0c75e7d4 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.h +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.h @@ -10,7 +10,7 @@ void ice_eswitch_detach(struct ice_pf *pf, struct ice_vf *vf); int ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf); -int ice_eswitch_rebuild(struct ice_pf *pf); +void ice_eswitch_rebuild(struct ice_pf *pf); int ice_eswitch_mode_get(struct devlink *devlink, u16 *mode); int @@ -26,7 +26,8 @@ void ice_eswitch_set_target_vsi(struct sk_buff *skb, struct ice_tx_offload_params *off); netdev_tx_t ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev); -void ice_eswitch_reserve_cp_queues(struct ice_pf *pf, int change); +struct net_device *ice_eswitch_get_target(struct ice_rx_ring *rx_ring, + union ice_32b_rx_flex_desc *rx_desc); #else /* CONFIG_ICE_SWITCHDEV */ static inline void ice_eswitch_detach(struct ice_pf *pf, struct ice_vf *vf) { } @@ -78,7 +79,11 @@ ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_BUSY; } -static inline void -ice_eswitch_reserve_cp_queues(struct ice_pf *pf, int change) { } +static inline struct net_device * +ice_eswitch_get_target(struct ice_rx_ring *rx_ring, + union ice_32b_rx_flex_desc *rx_desc) +{ + return rx_ring->netdev; +} #endif /* CONFIG_ICE_SWITCHDEV */ #endif /* _ICE_ESWITCH_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 255a9c8151b45..b38a984b44a25 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -4297,6 +4297,465 @@ ice_get_module_eeprom(struct net_device *netdev, return 0; } +/** + * ice_get_min_pwr_allowed - Get min power allowed + * @hw: pointer to the hardware structure + * @extack: extended ACK from the Netlink message + * + * Values are constant based on the cage type. + * Return value in mW. + */ +static int ice_get_min_pwr_allowed(struct ice_hw *hw, + struct netlink_ext_ack *extack) +{ + u8 node_part_number; + u16 node_handle; + int err; + + err = ice_get_port_cage_node(hw, 0, &node_handle, &node_part_number); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to get cage node handle"); + return err; + } + + switch (node_part_number) { + case ICE_AQC_GET_LINK_TOPO_NODE_NR_SFP_PLUS: + case ICE_AQC_GET_LINK_TOPO_NODE_NR_SFP28: + return 1000; + case ICE_AQC_GET_LINK_TOPO_NODE_NR_QSFP_PLUS: + case ICE_AQC_GET_LINK_TOPO_NODE_NR_QSFP28: + return 1500; + default: + return -EINVAL; + } +} + +/** + * ice_pwr_nvm_to_ethtool - Convert NVM values to ethtool + * @pwr: power from NVM + */ +static u32 ice_pwr_nvm_to_ethtool(u32 pwr) +{ + /* ethtool takes power values in mW */ + pwr *= 1000; + /* 0.5 W resolution */ + pwr /= 2; + + return pwr; +} + +/** + * ice_pwr_ethtool_to_nvm - Convert ethtool values to NVM + * @pwr: power from ethtool, in mW + */ +static u32 ice_pwr_ethtool_to_nvm(u32 pwr) +{ + /* 0.5 W resolution */ + pwr *= 2; + /* ethtool takes power values in mW */ + pwr /= 1000; + + return pwr; +} + +/** + * ice_get_num_of_cages - Get number of cages in the board + * @hw: pointer to the hardware structure + * + * We have as many cages as netlist nodes of cage type. + */ +static int ice_get_num_of_cages(struct ice_hw *hw) +{ + int i, err, cage_count = 0; + u8 node_part_number; + u16 node_handle; + + for (i = 0; i < ICE_NUM_OF_CAGES; i++) { + err = ice_get_port_cage_node(hw, i, &node_handle, + &node_part_number); + if (!err && node_handle) + cage_count++; + } + + return cage_count; +} + +/** + * ice_get_board_max_pwr - Get max power allowed per board + * @hw: pointer to the hardware structure + * + * Board maximum power is stored in EMP settings NVM module + */ +static int ice_get_board_max_pwr(struct ice_hw *hw) +{ + u16 board_max_pwr; + __le16 data; + int err; + + err = ice_acquire_nvm(hw, ICE_RES_READ); + if (err) + return err; + + err = ice_aq_read_nvm(hw, ICE_AQC_NVM_EMP_SETTINGS_MOD_ID, + ICE_AQC_NVM_MAX_PWR_LIMIT_OFFSET, + sizeof(data), &data, + true, false, NULL); + if (err) { + ice_release_nvm(hw); + return err; + } + + ice_release_nvm(hw); + + board_max_pwr = __le16_to_cpu(data); + board_max_pwr = FIELD_GET(ICE_AQC_NVM_BOARD_MAX_PWR_MASK, + board_max_pwr); + + return ice_pwr_nvm_to_ethtool(board_max_pwr); +} + +/** + * ice_get_max_pwr_allowed - Get max power allowed per cage + * @hw: pointer to the hardware structure + * @min_pwr_allowed: min allowed power + * @cage_count: number of cages in the board + */ +static int ice_get_max_pwr_allowed(struct ice_hw *hw, u32 min_pwr_allowed, + int cage_count) +{ + int board_max_pwr; + + board_max_pwr = ice_get_board_max_pwr(hw); + if (board_max_pwr < 0) + return board_max_pwr; + + return board_max_pwr - (cage_count - 1) * min_pwr_allowed; +} + +/** + * ice_get_cage_idx - Get index to the cage + * @pf: pointer to the PF structure + * @cage_count: number of cages in the board + * @extack: extended ACK from the Netlink message + * + * Get index to the cage and validate if the given PF + * is associated with the cage. + */ +static int ice_get_cage_idx(struct ice_pf *pf, int cage_count, + struct netlink_ext_ack *extack) +{ + /* if there is only on cage, PF 0 is responsoble for it */ + if (cage_count == 1) { + if (pf->hw.pf_id == 0) + return 0; + goto err; + } else if (cage_count == 4) { + /* if there are 4 cages, than port split is not supported + * so each PF is responsoble for its cage + */ + return pf->hw.pf_id; + } else if (cage_count == 2) { + /* We have 2 cages, PF 0 always takes care of the first one. + * If the split_cnt is 2 than PF 1 takes care of the second cage. + * If the split_cnt is 4 than PF 2 takes care of the second cage. + * If the split_cnt is 8 than PF 4 takes care of the second cage. + * So, the formula for the second cage is pf_id * 2 == split_cnt + */ + if (pf->hw.pf_id == 0 || pf->hw.pf_id * 2 == pf->split_cnt) + return pf->hw.pf_id; + } + +err: + NL_SET_ERR_MSG_MOD(extack, "Cage maximum power cannot be requested for selected port"); + + return -EPERM; +} + +/** + * ice_get_dflt_max_pwr - Get dflt max power + * @hw: pointer to the hardware structure + * + * Default max power is stored in EMP settings NVM module + */ +static int ice_get_dflt_max_pwr(struct ice_hw *hw) +{ + __le16 data; + u16 pwr; + int ret; + + ret = ice_acquire_nvm(hw, ICE_RES_READ); + if (ret) + return ret; + + ret = ice_aq_read_nvm(hw, ICE_AQC_NVM_EMP_SETTINGS_MOD_ID, + ICE_AQC_NVM_MAX_PWR_LIMIT_OFFSET, sizeof(data), + &data, true, false, NULL); + if (ret) { + ice_release_nvm(hw); + return ret; + } + + ice_release_nvm(hw); + + pwr = __le16_to_cpu(data); + pwr = FIELD_GET(ICE_AQC_NVM_DFLT_MAX_PWR_MASK, pwr); + + return ice_pwr_nvm_to_ethtool(pwr); +} + +/** + * ice_get_max_pwr_set - Get currently set max power + * @hw: pointer to the hardware structure + * @idx: index of the cage + * + * If cmpo enable bit is set, use the value from + * CMPO module otherwise use default value. + */ +static int ice_get_max_pwr_set(struct ice_hw *hw, int idx) +{ + struct ice_aqc_nvm_cmpo data; + int max_pwr_set; + u16 temp; + int ret; + + ret = ice_acquire_nvm(hw, ICE_RES_READ); + if (ret) + return ret; + + ret = ice_aq_read_nvm(hw, ICE_AQC_NVM_CMPO_MOD_ID, 0, sizeof(data), + &data, true, false, NULL); + if (ret) { + ice_release_nvm(hw); + return ret; + } + + ice_release_nvm(hw); + + temp = le16_to_cpu(data.cages_cfg[idx]); + + if (FIELD_GET(ICE_AQC_NVM_CMPO_ENABLE, temp)) { + max_pwr_set = FIELD_GET(ICE_AQC_NVM_CMPO_POWER_MASK, temp); + return ice_pwr_nvm_to_ethtool(max_pwr_set); + } else { + return ice_get_dflt_max_pwr(hw); + } +} + +/** + * ice_get_module_power_cfg - Get device's power setting + * @dev: network device + * @params: output parameters + * @extack: extended ACK from the Netlink message + * + * We care only about min_pwr_allowed, max_pwr_allowed and max_pwr_set params. + */ +static int +ice_get_module_power_cfg(struct net_device *dev, + struct ethtool_module_power_params *params, + struct netlink_ext_ack *extack) +{ + int min_pwr_allowed, max_pwr_allowed, max_pwr_set, cage_count; + struct ice_netdev_priv *np = netdev_priv(dev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + int idx; + + if (!ice_fw_supports_cmpo(hw)) { + NL_SET_ERR_MSG_MOD(extack, "Cage maximum power request is unsupported by the current firmware"); + return -EOPNOTSUPP; + } + + cage_count = ice_get_num_of_cages(hw); + + idx = ice_get_cage_idx(pf, cage_count, extack); + if (idx < 0) + return idx; + + min_pwr_allowed = ice_get_min_pwr_allowed(hw, extack); + if (min_pwr_allowed < 0) { + NL_SET_ERR_MSG_MOD(extack, "Unable to get min power limit"); + return min_pwr_allowed; + } + + max_pwr_allowed = ice_get_max_pwr_allowed(hw, min_pwr_allowed, + cage_count); + if (max_pwr_allowed < 0) { + NL_SET_ERR_MSG_MOD(extack, "Unable to get max power limit"); + return max_pwr_allowed; + } + + max_pwr_set = ice_get_max_pwr_set(hw, idx); + if (max_pwr_set < 0) { + NL_SET_ERR_MSG_MOD(extack, "Unable to get max power currently set"); + return max_pwr_set; + } + + params->min_pwr_allowed = min_pwr_allowed; + params->max_pwr_allowed = max_pwr_allowed; + params->max_pwr_set = max_pwr_set; + + return 0; +} + +/** + * ice_check_board_pwr_sum - Check if the new sum exceeds the board maximum. + * @hw: pointer to the hardware structure + * @data: current power config from NVM + * @idx: index of the cage we want to update + * @power: new power value from ethtool + * @cage_count: number of cages in the board + * @extack: extended ACK from the Netlink message + * + * Get number of cages, board maximum and default power value. + * Add up all values. If cmpo enable bit is set, use the value from + * CMPO module otherwise use default value. + */ +static int +ice_check_board_pwr_sum(struct ice_hw *hw, struct ice_aqc_nvm_cmpo *data, + int idx, u32 power, int cage_count, + struct netlink_ext_ack *extack) +{ + int board_max_pwr, dflt_pwr, max_pwr_set, sum = 0; + u16 temp; + int i; + + board_max_pwr = ice_get_board_max_pwr(hw); + if (board_max_pwr < 0) + return board_max_pwr; + + dflt_pwr = ice_get_dflt_max_pwr(hw); + if (dflt_pwr < 0) + return dflt_pwr; + + for (i = 0; i < cage_count; i++) { + temp = le16_to_cpu(data->cages_cfg[i]); + + /* skipping the cage we want to update with the new value, we + * want to add the new power, not the value from NVM + */ + if (i == idx) + continue; + + if (FIELD_GET(ICE_AQC_NVM_CMPO_ENABLE, temp)) { + max_pwr_set = FIELD_GET(ICE_AQC_NVM_CMPO_POWER_MASK, + temp); + sum += ice_pwr_nvm_to_ethtool(max_pwr_set); + } else { + sum += dflt_pwr; + } + } + + sum += power; + + if (sum > board_max_pwr) { + NL_SET_ERR_MSG_MOD(extack, "Sum of power values is out of range: overbudgeting board level."); + return -EINVAL; + } + + return 0; +} + +/** + * ice_set_module_power_cfg - Update device's power setting + * @dev: network device + * @params: new power config + * @extack: extended ACK from the Netlink message + * + * We care only about max_pwr_set and max_pwr_reset params. + */ +static int +ice_set_module_power_cfg(struct net_device *dev, + const struct ethtool_module_power_params *params, + struct netlink_ext_ack *extack) +{ + struct ice_netdev_priv *np = netdev_priv(dev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + struct ice_aqc_nvm_cmpo data; + struct ice_hw *hw = &pf->hw; + int idx, ret, cage_count; + u16 power; + + if (params->policy) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported power parameter."); + return -EOPNOTSUPP; + } + + if (!ice_fw_supports_cmpo(hw)) { + NL_SET_ERR_MSG_MOD(extack, "Cage maximum power request is unsupported by the current firmware."); + return -EOPNOTSUPP; + } + + if (params->max_pwr_set % 500) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported power resolution, use 500 mW resolution."); + return -EOPNOTSUPP; + } + + cage_count = ice_get_num_of_cages(hw); + + idx = ice_get_cage_idx(pf, cage_count, extack); + if (idx < 0) + return idx; + + ret = ice_acquire_nvm(hw, ICE_RES_READ); + if (ret) + return ret; + + ret = ice_aq_read_nvm(hw, ICE_AQC_NVM_CMPO_MOD_ID, 0, sizeof(data), + &data, true, false, NULL); + if (ret) { + ice_release_nvm(hw); + NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM power config."); + return ret; + } + + ice_release_nvm(hw); + + power = ice_pwr_ethtool_to_nvm(params->max_pwr_set); + + if (power) { + ret = ice_check_board_pwr_sum(hw, &data, idx, + params->max_pwr_set, cage_count, + extack); + if (ret) + return ret; + + data.cages_cfg[idx] = + cpu_to_le16(power & ICE_AQC_NVM_CMPO_POWER_MASK); + data.cages_cfg[idx] |= cpu_to_le16(ICE_AQC_NVM_CMPO_ENABLE); + } else { + data.cages_cfg[idx] &= ~cpu_to_le16(ICE_AQC_NVM_CMPO_ENABLE); + } + + ret = ice_acquire_nvm(hw, ICE_RES_WRITE); + if (ret) + return ret; + + ret = ice_aq_update_nvm(hw, ICE_AQC_NVM_CMPO_MOD_ID, 2, + sizeof(data.cages_cfg), data.cages_cfg, + false, 0, NULL); + if (ret) { + ice_release_nvm(hw); + NL_SET_ERR_MSG_MOD(extack, "Failed to update NVM power config."); + return ret; + } + + ret = ice_nvm_write_activate(&pf->hw, ICE_AQC_NVM_ACTIV_REQ_EMPR, + NULL); + if (ret) { + ice_release_nvm(hw); + NL_SET_ERR_MSG_MOD(extack, "Failed to save NVM power config."); + return ret; + } + + ice_release_nvm(hw); + + dev_info(ice_pf_to_dev(pf), "Reboot is required to complete power change."); + + return 0; +} + static const struct ethtool_ops ice_ethtool_ops = { .cap_rss_ctx_supported = true, .supported_coalesce_params = ETHTOOL_COALESCE_USECS | @@ -4344,6 +4803,8 @@ static const struct ethtool_ops ice_ethtool_ops = { .set_fecparam = ice_set_fecparam, .get_module_info = ice_get_module_info, .get_module_eeprom = ice_get_module_eeprom, + .get_module_power_cfg = ice_get_module_power_cfg, + .set_module_power_cfg = ice_set_module_power_cfg, }; static const struct ethtool_ops ice_ethtool_safe_mode_ops = { diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index f0e76f0a6d603..1ccb572ce285d 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -202,11 +202,12 @@ static struct ice_lag *ice_lag_find_primary(struct ice_lag *lag) * @act: rule action * @recipe_id: recipe id for the new rule * @rule_idx: pointer to rule index + * @direction: ICE_FLTR_RX or ICE_FLTR_TX * @add: boolean on whether we are adding filters */ static int ice_lag_cfg_fltr(struct ice_lag *lag, u32 act, u16 recipe_id, u16 *rule_idx, - bool add) + u8 direction, bool add) { struct ice_sw_rule_lkup_rx_tx *s_rule; u16 s_rule_sz, vsi_num; @@ -231,9 +232,16 @@ ice_lag_cfg_fltr(struct ice_lag *lag, u32 act, u16 recipe_id, u16 *rule_idx, act |= FIELD_PREP(ICE_SINGLE_ACT_VSI_ID_M, vsi_num); - s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX); s_rule->recipe_id = cpu_to_le16(recipe_id); - s_rule->src = cpu_to_le16(hw->port_info->lport); + if (direction == ICE_FLTR_RX) { + s_rule->hdr.type = + cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX); + s_rule->src = cpu_to_le16(hw->port_info->lport); + } else { + s_rule->hdr.type = + cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_TX); + s_rule->src = cpu_to_le16(vsi_num); + } s_rule->act = cpu_to_le32(act); s_rule->hdr_len = cpu_to_le16(DUMMY_ETH_HDR_LEN); opc = ice_aqc_opc_add_sw_rules; @@ -266,9 +274,27 @@ ice_lag_cfg_dflt_fltr(struct ice_lag *lag, bool add) { u32 act = ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_VALID_BIT | ICE_SINGLE_ACT_LAN_ENABLE; + int err; + + err = ice_lag_cfg_fltr(lag, act, lag->pf_recipe, &lag->pf_rx_rule_id, + ICE_FLTR_RX, add); + if (err) + goto err_rx; - return ice_lag_cfg_fltr(lag, act, lag->pf_recipe, - &lag->pf_rule_id, add); + act = ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_VALID_BIT | + ICE_SINGLE_ACT_LB_ENABLE; + err = ice_lag_cfg_fltr(lag, act, lag->pf_recipe, &lag->pf_tx_rule_id, + ICE_FLTR_TX, add); + if (err) + goto err_tx; + + return 0; + +err_tx: + ice_lag_cfg_fltr(lag, act, lag->pf_recipe, &lag->pf_rx_rule_id, + ICE_FLTR_RX, !add); +err_rx: + return err; } /** @@ -284,7 +310,7 @@ ice_lag_cfg_drop_fltr(struct ice_lag *lag, bool add) ICE_SINGLE_ACT_DROP; return ice_lag_cfg_fltr(lag, act, lag->lport_recipe, - &lag->lport_rule_idx, add); + &lag->lport_rule_idx, ICE_FLTR_RX, add); } /** @@ -310,7 +336,7 @@ ice_lag_cfg_pf_fltrs(struct ice_lag *lag, void *ptr) dev = ice_pf_to_dev(lag->pf); /* interface not active - remove old default VSI rule */ - if (bonding_info->slave.state && lag->pf_rule_id) { + if (bonding_info->slave.state && lag->pf_rx_rule_id) { if (ice_lag_cfg_dflt_fltr(lag, false)) dev_err(dev, "Error removing old default VSI filter\n"); if (ice_lag_cfg_drop_fltr(lag, true)) @@ -319,7 +345,7 @@ ice_lag_cfg_pf_fltrs(struct ice_lag *lag, void *ptr) } /* interface becoming active - add new default VSI rule */ - if (!bonding_info->slave.state && !lag->pf_rule_id) { + if (!bonding_info->slave.state && !lag->pf_rx_rule_id) { if (ice_lag_cfg_dflt_fltr(lag, true)) dev_err(dev, "Error adding new default VSI filter\n"); if (lag->lport_rule_idx && ice_lag_cfg_drop_fltr(lag, false)) @@ -714,8 +740,7 @@ static void ice_lag_move_vf_nodes(struct ice_lag *lag, u8 oldport, u8 newport) pf = lag->pf; ice_for_each_vsi(pf, i) - if (pf->vsi[i] && (pf->vsi[i]->type == ICE_VSI_VF || - pf->vsi[i]->type == ICE_VSI_SWITCHDEV_CTRL)) + if (pf->vsi[i] && pf->vsi[i]->type == ICE_VSI_VF) ice_lag_move_single_vf_nodes(lag, oldport, newport, i); } @@ -953,8 +978,7 @@ ice_lag_reclaim_vf_nodes(struct ice_lag *lag, struct ice_hw *src_hw) pf = lag->pf; ice_for_each_vsi(pf, i) - if (pf->vsi[i] && (pf->vsi[i]->type == ICE_VSI_VF || - pf->vsi[i]->type == ICE_VSI_SWITCHDEV_CTRL)) + if (pf->vsi[i] && pf->vsi[i]->type == ICE_VSI_VF) ice_for_each_traffic_class(tc) ice_lag_reclaim_vf_tc(lag, src_hw, i, tc); } @@ -1976,8 +2000,7 @@ ice_lag_move_vf_nodes_sync(struct ice_lag *lag, struct ice_hw *dest_hw) pf = lag->pf; ice_for_each_vsi(pf, i) - if (pf->vsi[i] && (pf->vsi[i]->type == ICE_VSI_VF || - pf->vsi[i]->type == ICE_VSI_SWITCHDEV_CTRL)) + if (pf->vsi[i] && pf->vsi[i]->type == ICE_VSI_VF) ice_for_each_traffic_class(tc) ice_lag_move_vf_nodes_tc_sync(lag, dest_hw, i, tc); @@ -2149,7 +2172,7 @@ void ice_lag_rebuild(struct ice_pf *pf) ice_lag_cfg_cp_fltr(lag, true); - if (lag->pf_rule_id) + if (lag->pf_rx_rule_id) if (ice_lag_cfg_dflt_fltr(lag, true)) dev_err(ice_pf_to_dev(pf), "Error adding default VSI rule in rebuild\n"); diff --git a/drivers/net/ethernet/intel/ice/ice_lag.h b/drivers/net/ethernet/intel/ice/ice_lag.h index 183b38792ef22..bab2c83142a1a 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.h +++ b/drivers/net/ethernet/intel/ice/ice_lag.h @@ -43,7 +43,8 @@ struct ice_lag { u8 primary:1; /* this is primary */ u16 pf_recipe; u16 lport_recipe; - u16 pf_rule_id; + u16 pf_rx_rule_id; + u16 pf_tx_rule_id; u16 cp_rule_idx; u16 lport_rule_idx; u8 role; diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 558422120312b..327de754922c6 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -27,8 +27,6 @@ const char *ice_vsi_type_str(enum ice_vsi_type vsi_type) return "ICE_VSI_CHNL"; case ICE_VSI_LB: return "ICE_VSI_LB"; - case ICE_VSI_SWITCHDEV_CTRL: - return "ICE_VSI_SWITCHDEV_CTRL"; default: return "unknown"; } @@ -144,7 +142,6 @@ static void ice_vsi_set_num_desc(struct ice_vsi *vsi) { switch (vsi->type) { case ICE_VSI_PF: - case ICE_VSI_SWITCHDEV_CTRL: case ICE_VSI_CTRL: case ICE_VSI_LB: /* a user could change the values of num_[tr]x_desc using @@ -211,21 +208,6 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi) max_t(int, vsi->alloc_rxq, vsi->alloc_txq)); break; - case ICE_VSI_SWITCHDEV_CTRL: - /* The number of queues for ctrl VSI is equal to number of PRs - * Each ring is associated to the corresponding VF_PR netdev. - * Tx and Rx rings are always equal - */ - if (vsi->req_txq && vsi->req_rxq) { - vsi->alloc_txq = vsi->req_txq; - vsi->alloc_rxq = vsi->req_rxq; - } else { - vsi->alloc_txq = 1; - vsi->alloc_rxq = 1; - } - - vsi->num_q_vectors = 1; - break; case ICE_VSI_VF: if (vf->num_req_qs) vf->num_vf_qs = vf->num_req_qs; @@ -522,22 +504,6 @@ static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data) return IRQ_HANDLED; } -static irqreturn_t ice_eswitch_msix_clean_rings(int __always_unused irq, void *data) -{ - struct ice_q_vector *q_vector = (struct ice_q_vector *)data; - struct ice_pf *pf = q_vector->vsi->back; - struct ice_repr *repr; - unsigned long id; - - if (!q_vector->tx.tx_ring && !q_vector->rx.rx_ring) - return IRQ_HANDLED; - - xa_for_each(&pf->eswitch.reprs, id, repr) - napi_schedule(&repr->q_vector->napi); - - return IRQ_HANDLED; -} - /** * ice_vsi_alloc_stat_arrays - Allocate statistics arrays * @vsi: VSI pointer @@ -600,10 +566,6 @@ ice_vsi_alloc_def(struct ice_vsi *vsi, struct ice_channel *ch) } switch (vsi->type) { - case ICE_VSI_SWITCHDEV_CTRL: - /* Setup eswitch MSIX irq handler for VSI */ - vsi->irq_handler = ice_eswitch_msix_clean_rings; - break; case ICE_VSI_PF: /* Setup default MSIX irq handler for VSI */ vsi->irq_handler = ice_msix_clean_rings; @@ -933,11 +895,6 @@ static void ice_vsi_set_rss_params(struct ice_vsi *vsi) max_rss_size); vsi->rss_lut_type = ICE_LUT_PF; break; - case ICE_VSI_SWITCHDEV_CTRL: - vsi->rss_table_size = ICE_LUT_VSI_SIZE; - vsi->rss_size = min_t(u16, num_online_cpus(), max_rss_size); - vsi->rss_lut_type = ICE_LUT_VSI; - break; case ICE_VSI_VF: /* VF VSI will get a small RSS table. * For VSI_LUT, LUT size should be set to 64 bytes. @@ -1263,7 +1220,6 @@ static int ice_vsi_init(struct ice_vsi *vsi, u32 vsi_flags) case ICE_VSI_PF: ctxt->flags = ICE_AQ_VSI_TYPE_PF; break; - case ICE_VSI_SWITCHDEV_CTRL: case ICE_VSI_CHNL: ctxt->flags = ICE_AQ_VSI_TYPE_VMDQ2; break; @@ -2145,7 +2101,6 @@ static void ice_set_agg_vsi(struct ice_vsi *vsi) case ICE_VSI_CHNL: case ICE_VSI_LB: case ICE_VSI_PF: - case ICE_VSI_SWITCHDEV_CTRL: max_agg_nodes = ICE_MAX_PF_AGG_NODES; agg_node_id_start = ICE_PF_AGG_NODE_ID_START; agg_node_iter = &pf->pf_agg_node[0]; @@ -2317,7 +2272,6 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params) switch (vsi->type) { case ICE_VSI_CTRL: - case ICE_VSI_SWITCHDEV_CTRL: case ICE_VSI_PF: ret = ice_vsi_alloc_q_vectors(vsi); if (ret) @@ -2750,8 +2704,7 @@ void ice_dis_vsi(struct ice_vsi *vsi, bool locked) } else { ice_vsi_close(vsi); } - } else if (vsi->type == ICE_VSI_CTRL || - vsi->type == ICE_VSI_SWITCHDEV_CTRL) { + } else if (vsi->type == ICE_VSI_CTRL) { ice_vsi_close(vsi); } } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 33a164fa325ac..1c70551482645 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5321,7 +5321,6 @@ static void ice_shutdown(struct pci_dev *pdev) } } -#ifdef CONFIG_PM /** * ice_prepare_for_shutdown - prep for PCI shutdown * @pf: board private structure @@ -5410,7 +5409,7 @@ static int ice_reinit_interrupt_scheme(struct ice_pf *pf) * Power Management callback to quiesce the device and prepare * for D3 transition. */ -static int __maybe_unused ice_suspend(struct device *dev) +static int ice_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct ice_pf *pf; @@ -5477,7 +5476,7 @@ static int __maybe_unused ice_suspend(struct device *dev) * ice_resume - PM callback for waking up from D3 * @dev: generic device information structure */ -static int __maybe_unused ice_resume(struct device *dev) +static int ice_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); enum ice_reset_req reset_type; @@ -5528,7 +5527,6 @@ static int __maybe_unused ice_resume(struct device *dev) return 0; } -#endif /* CONFIG_PM */ /** * ice_pci_err_detected - warning that PCI error has been detected @@ -5702,7 +5700,7 @@ static const struct pci_device_id ice_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, ice_pci_tbl); -static __maybe_unused SIMPLE_DEV_PM_OPS(ice_pm_ops, ice_suspend, ice_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(ice_pm_ops, ice_suspend, ice_resume); static const struct pci_error_handlers ice_pci_err_handler = { .error_detected = ice_pci_err_detected, @@ -5717,9 +5715,7 @@ static struct pci_driver ice_driver = { .id_table = ice_pci_tbl, .probe = ice_probe, .remove = ice_remove, -#ifdef CONFIG_PM - .driver.pm = &ice_pm_ops, -#endif /* CONFIG_PM */ + .driver.pm = pm_sleep_ptr(&ice_pm_ops), .shutdown = ice_shutdown, .sriov_configure = ice_sriov_configure, .sriov_get_vf_total_msix = ice_sriov_get_vf_total_msix, @@ -7055,13 +7051,11 @@ int ice_down(struct ice_vsi *vsi) WARN_ON(!test_bit(ICE_VSI_DOWN, vsi->state)); - if (vsi->netdev && vsi->type == ICE_VSI_PF) { + if (vsi->netdev) { vlan_err = ice_vsi_del_vlan_zero(vsi); ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false); netif_carrier_off(vsi->netdev); netif_tx_disable(vsi->netdev); - } else if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) { - ice_eswitch_stop_all_tx_queues(vsi->back); } ice_vsi_dis_irq(vsi); @@ -7544,11 +7538,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) goto err_vsi_rebuild; } - err = ice_eswitch_rebuild(pf); - if (err) { - dev_err(dev, "Switchdev rebuild failed: %d\n", err); - goto err_vsi_rebuild; - } + ice_eswitch_rebuild(pf); if (reset_type == ICE_RESET_PFR) { err = ice_rebuild_channels(pf); @@ -7999,12 +7989,9 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, if (!br_spec) return -EINVAL; - nla_for_each_nested(attr, br_spec, rem) { - __u16 mode; + nla_for_each_nested_type(attr, IFLA_BRIDGE_MODE, br_spec, rem) { + __u16 mode = nla_get_u16(attr); - if (nla_type(attr) != IFLA_BRIDGE_MODE) - continue; - mode = nla_get_u16(attr); if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB) return -EINVAL; /* Continue if bridge mode is not being flipped */ diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index d4e05d2cb30c4..f3ef1211acd71 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -18,7 +18,7 @@ * * Read the NVM using the admin queue commands (0x0701) */ -static int +int ice_aq_read_nvm(struct ice_hw *hw, u16 module_typeid, u32 offset, u16 length, void *data, bool last_command, bool read_shadow_ram, struct ice_sq_cd *cd) diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.h b/drivers/net/ethernet/intel/ice/ice_nvm.h index 774c2317967d7..63cdc6bdac589 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.h +++ b/drivers/net/ethernet/intel/ice/ice_nvm.h @@ -14,6 +14,9 @@ struct ice_orom_civd_info { int ice_acquire_nvm(struct ice_hw *hw, enum ice_aq_res_access_type access); void ice_release_nvm(struct ice_hw *hw); +int ice_aq_read_nvm(struct ice_hw *hw, u16 module_typeid, u32 offset, + u16 length, void *data, bool last_command, + bool read_shadow_ram, struct ice_sq_cd *cd); int ice_read_flat_nvm(struct ice_hw *hw, u32 offset, u32 *length, u8 *data, bool read_shadow_ram); diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index 5f30fb131f74e..2429727d55629 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -41,6 +41,47 @@ ice_repr_get_phys_port_name(struct net_device *netdev, char *buf, size_t len) return 0; } +/** + * ice_repr_inc_tx_stats - increment Tx statistic by one packet + * @repr: repr to increment stats on + * @len: length of the packet + * @xmit_status: value returned by xmit function + */ +void ice_repr_inc_tx_stats(struct ice_repr *repr, unsigned int len, + int xmit_status) +{ + struct ice_repr_pcpu_stats *stats; + + if (unlikely(xmit_status != NET_XMIT_SUCCESS && + xmit_status != NET_XMIT_CN)) { + this_cpu_inc(repr->stats->tx_drops); + return; + } + + stats = this_cpu_ptr(repr->stats); + u64_stats_update_begin(&stats->syncp); + stats->tx_packets++; + stats->tx_bytes += len; + u64_stats_update_end(&stats->syncp); +} + +/** + * ice_repr_inc_rx_stats - increment Rx statistic by one packet + * @netdev: repr netdev to increment stats on + * @len: length of the packet + */ +void ice_repr_inc_rx_stats(struct net_device *netdev, unsigned int len) +{ + struct ice_repr *repr = ice_netdev_to_repr(netdev); + struct ice_repr_pcpu_stats *stats; + + stats = this_cpu_ptr(repr->stats); + u64_stats_update_begin(&stats->syncp); + stats->rx_packets++; + stats->rx_bytes += len; + u64_stats_update_end(&stats->syncp); +} + /** * ice_repr_get_stats64 - get VF stats for VFPR use * @netdev: pointer to port representor netdev @@ -76,7 +117,7 @@ ice_repr_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) * ice_netdev_to_repr - Get port representor for given netdevice * @netdev: pointer to port representor netdev */ -struct ice_repr *ice_netdev_to_repr(struct net_device *netdev) +struct ice_repr *ice_netdev_to_repr(const struct net_device *netdev) { struct ice_netdev_priv *np = netdev_priv(netdev); @@ -139,38 +180,35 @@ static int ice_repr_stop(struct net_device *netdev) * ice_repr_sp_stats64 - get slow path stats for port representor * @dev: network interface device structure * @stats: netlink stats structure - * - * RX/TX stats are being swapped here to be consistent with VF stats. In slow - * path, port representor receives data when the corresponding VF is sending it - * (and vice versa), TX and RX bytes/packets are effectively swapped on port - * representor. */ static int ice_repr_sp_stats64(const struct net_device *dev, struct rtnl_link_stats64 *stats) { - struct ice_netdev_priv *np = netdev_priv(dev); - int vf_id = np->repr->vf->vf_id; - struct ice_tx_ring *tx_ring; - struct ice_rx_ring *rx_ring; - u64 pkts, bytes; - - tx_ring = np->vsi->tx_rings[vf_id]; - ice_fetch_u64_stats_per_ring(&tx_ring->ring_stats->syncp, - tx_ring->ring_stats->stats, - &pkts, &bytes); - stats->rx_packets = pkts; - stats->rx_bytes = bytes; - - rx_ring = np->vsi->rx_rings[vf_id]; - ice_fetch_u64_stats_per_ring(&rx_ring->ring_stats->syncp, - rx_ring->ring_stats->stats, - &pkts, &bytes); - stats->tx_packets = pkts; - stats->tx_bytes = bytes; - stats->tx_dropped = rx_ring->ring_stats->rx_stats.alloc_page_failed + - rx_ring->ring_stats->rx_stats.alloc_buf_failed; - + struct ice_repr *repr = ice_netdev_to_repr(dev); + int i; + + for_each_possible_cpu(i) { + u64 tbytes, tpkts, tdrops, rbytes, rpkts; + struct ice_repr_pcpu_stats *repr_stats; + unsigned int start; + + repr_stats = per_cpu_ptr(repr->stats, i); + do { + start = u64_stats_fetch_begin(&repr_stats->syncp); + tbytes = repr_stats->tx_bytes; + tpkts = repr_stats->tx_packets; + tdrops = repr_stats->tx_drops; + rbytes = repr_stats->rx_bytes; + rpkts = repr_stats->rx_packets; + } while (u64_stats_fetch_retry(&repr_stats->syncp, start)); + + stats->tx_bytes += tbytes; + stats->tx_packets += tpkts; + stats->tx_dropped += tdrops; + stats->rx_bytes += rbytes; + stats->rx_packets += rpkts; + } return 0; } @@ -291,7 +329,7 @@ static void ice_repr_remove_node(struct devlink_port *devlink_port) */ static void ice_repr_rem(struct ice_repr *repr) { - kfree(repr->q_vector); + free_percpu(repr->stats); free_netdev(repr->netdev); kfree(repr); } @@ -331,7 +369,6 @@ static void ice_repr_set_tx_topology(struct ice_pf *pf) static struct ice_repr * ice_repr_add(struct ice_pf *pf, struct ice_vsi *src_vsi, const u8 *parent_mac) { - struct ice_q_vector *q_vector; struct ice_netdev_priv *np; struct ice_repr *repr; int err; @@ -346,23 +383,22 @@ ice_repr_add(struct ice_pf *pf, struct ice_vsi *src_vsi, const u8 *parent_mac) goto err_alloc; } + repr->stats = netdev_alloc_pcpu_stats(struct ice_repr_pcpu_stats); + if (!repr->stats) { + err = -ENOMEM; + goto err_stats; + } + repr->src_vsi = src_vsi; + repr->id = src_vsi->vsi_num; np = netdev_priv(repr->netdev); np->repr = repr; - q_vector = kzalloc(sizeof(*q_vector), GFP_KERNEL); - if (!q_vector) { - err = -ENOMEM; - goto err_alloc_q_vector; - } - repr->q_vector = q_vector; - repr->q_id = repr->id; - ether_addr_copy(repr->parent_mac, parent_mac); return repr; -err_alloc_q_vector: +err_stats: free_netdev(repr->netdev); err_alloc: kfree(repr); @@ -439,15 +475,3 @@ void ice_repr_stop_tx_queues(struct ice_repr *repr) netif_carrier_off(repr->netdev); netif_tx_stop_all_queues(repr->netdev); } - -/** - * ice_repr_set_traffic_vsi - set traffic VSI for port representor - * @repr: repr on with VSI will be set - * @vsi: pointer to VSI that will be used by port representor to pass traffic - */ -void ice_repr_set_traffic_vsi(struct ice_repr *repr, struct ice_vsi *vsi) -{ - struct ice_netdev_priv *np = netdev_priv(repr->netdev); - - np->vsi = vsi; -} diff --git a/drivers/net/ethernet/intel/ice/ice_repr.h b/drivers/net/ethernet/intel/ice/ice_repr.h index f9aede3157161..cff730b15ca0e 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.h +++ b/drivers/net/ethernet/intel/ice/ice_repr.h @@ -6,20 +6,24 @@ #include +struct ice_repr_pcpu_stats { + struct u64_stats_sync syncp; + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + u64 tx_drops; +}; + struct ice_repr { struct ice_vsi *src_vsi; struct ice_vf *vf; - struct ice_q_vector *q_vector; struct net_device *netdev; struct metadata_dst *dst; struct ice_esw_br_port *br_port; - int q_id; + struct ice_repr_pcpu_stats __percpu *stats; u32 id; u8 parent_mac[ETH_ALEN]; -#ifdef CONFIG_ICE_SWITCHDEV - /* info about slow path rule */ - struct ice_rule_query_data sp_rule; -#endif }; struct ice_repr *ice_repr_add_vf(struct ice_vf *vf); @@ -28,10 +32,12 @@ void ice_repr_rem_vf(struct ice_repr *repr); void ice_repr_start_tx_queues(struct ice_repr *repr); void ice_repr_stop_tx_queues(struct ice_repr *repr); -void ice_repr_set_traffic_vsi(struct ice_repr *repr, struct ice_vsi *vsi); - -struct ice_repr *ice_netdev_to_repr(struct net_device *netdev); +struct ice_repr *ice_netdev_to_repr(const struct net_device *netdev); bool ice_is_port_repr_netdev(const struct net_device *netdev); struct ice_repr *ice_repr_get_by_vsi(struct ice_vsi *vsi); + +void ice_repr_inc_tx_stats(struct ice_repr *repr, unsigned int len, + int xmit_status); +void ice_repr_inc_rx_stats(struct net_device *netdev, unsigned int len); #endif diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index a958fcf3e6bef..65e1986af7772 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -170,8 +170,6 @@ void ice_free_vfs(struct ice_pf *pf) else dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n"); - ice_eswitch_reserve_cp_queues(pf, -ice_get_num_vfs(pf)); - mutex_lock(&vfs->table_lock); ice_for_each_vf(pf, bkt, vf) { @@ -897,7 +895,6 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) goto err_unroll_sriov; } - ice_eswitch_reserve_cp_queues(pf, num_vfs); ret = ice_start_vfs(pf); if (ret) { dev_err(dev, "Failed to start %d VFs, err %d\n", num_vfs, ret); diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index b4ea935e83005..f1f9d6b7b5c16 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -2446,6 +2446,9 @@ static void ice_fill_sw_info(struct ice_hw *hw, struct ice_fltr_info *fi) fi->lan_en = true; } } + + if (fi->flag & ICE_FLTR_TX_ONLY) + fi->lan_en = false; } /** @@ -3821,6 +3824,7 @@ ice_cfg_dflt_vsi(struct ice_port_info *pi, u16 vsi_handle, bool set, } else if (f_info.flag & ICE_FLTR_TX) { f_info.src_id = ICE_SRC_ID_VSI; f_info.src = hw_vsi_id; + f_info.flag |= ICE_FLTR_TX_ONLY; } f_list_entry.fltr_info = f_info; diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index 89ffa1b51b5ad..89e6b18975ef2 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -8,8 +8,9 @@ #define ICE_SW_CFG_MAX_BUF_LEN 2048 #define ICE_DFLT_VSI_INVAL 0xff -#define ICE_FLTR_RX BIT(0) -#define ICE_FLTR_TX BIT(1) +#define ICE_FLTR_RX BIT(0) +#define ICE_FLTR_TX BIT(1) +#define ICE_FLTR_TX_ONLY BIT(2) #define ICE_VSI_INVAL_ID 0xffff #define ICE_INVAL_Q_HANDLE 0xFFFF diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 97d41d6ebf1fb..8bb743f78fcb4 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1051,8 +1051,7 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) } /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE, - GFP_ATOMIC | __GFP_NOWARN); + skb = napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE); if (unlikely(!skb)) return NULL; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index af955b0e5dc5c..feba314a3fe44 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -365,6 +365,7 @@ struct ice_rx_ring { u8 ptp_rx; #define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1) #define ICE_RX_FLAGS_CRC_STRIP_DIS BIT(2) +#define ICE_RX_FLAGS_MULTIDEV BIT(3) u8 flags; /* CL5 - 5th cacheline starts here */ struct xdp_rxq_info xdp_rxq; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index f8f1d2bdc1be9..df072ce767b1e 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -236,7 +236,16 @@ ice_process_skb_fields(struct ice_rx_ring *rx_ring, ice_rx_hash_to_skb(rx_ring, rx_desc, skb, ptype); /* modifies the skb - consumes the enet header */ - skb->protocol = eth_type_trans(skb, rx_ring->netdev); + if (unlikely(rx_ring->flags & ICE_RX_FLAGS_MULTIDEV)) { + struct net_device *netdev = ice_eswitch_get_target(rx_ring, + rx_desc); + + if (ice_is_port_repr_netdev(netdev)) + ice_repr_inc_rx_stats(netdev, skb->len); + skb->protocol = eth_type_trans(skb, netdev); + } else { + skb->protocol = eth_type_trans(skb, rx_ring->netdev); + } ice_rx_csum(rx_ring, skb, rx_desc, ptype); diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 9ff92dba58236..24cec8375c2c0 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -150,7 +150,6 @@ enum ice_vsi_type { ICE_VSI_CTRL = 3, /* equates to ICE_VSI_PF with 1 queue pair */ ICE_VSI_CHNL = 4, ICE_VSI_LB = 6, - ICE_VSI_SWITCHDEV_CTRL = 7, }; struct ice_link_status { @@ -1151,6 +1150,10 @@ struct ice_aq_get_set_rss_lut_params { #define ICE_FW_API_LINK_OVERRIDE_MIN 5 #define ICE_FW_API_LINK_OVERRIDE_PATCH 2 +#define ICE_FW_CMPO_MAJ 7 +#define ICE_FW_CMPO_MIN 4 +#define ICE_FW_CMPO_PATCH 1 + #define ICE_SR_WORDS_IN_1KB 512 /* AQ API version for LLDP_FILTER_CONTROL */ diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c index 4a6c850d83ac9..7aae7fdcfcdb9 100644 --- a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c +++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c @@ -72,7 +72,6 @@ void ice_vsi_init_vlan_ops(struct ice_vsi *vsi) switch (vsi->type) { case ICE_VSI_PF: - case ICE_VSI_SWITCHDEV_CTRL: ice_pf_vsi_init_vlan_ops(vsi); break; case ICE_VSI_VF: diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 1857220d27fee..aa81d1162b815 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -555,8 +555,7 @@ ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) } net_prefetch(xdp->data_meta); - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, - GFP_ATOMIC | __GFP_NOWARN); + skb = napi_alloc_skb(&rx_ring->q_vector->napi, totalsize); if (unlikely(!skb)) return NULL; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 6dd7a66bb8979..f940f650cd788 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -3005,8 +3005,7 @@ struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq, /* prefetch first cache line of first page */ net_prefetch(va); /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rxq->q_vector->napi, IDPF_RX_HDR_SIZE, - GFP_ATOMIC); + skb = napi_alloc_skb(&rxq->q_vector->napi, IDPF_RX_HDR_SIZE); if (unlikely(!skb)) { idpf_rx_put_page(rx_buf); @@ -3060,7 +3059,7 @@ static struct sk_buff *idpf_rx_hdr_construct_skb(struct idpf_queue *rxq, struct sk_buff *skb; /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rxq->q_vector->napi, size, GFP_ATOMIC); + skb = napi_alloc_skb(&rxq->q_vector->napi, size); if (unlikely(!skb)) return NULL; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index df76493faa756..3d046b81e507a 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -8,6 +8,8 @@ #include #include +#include "virtchnl2_lan_desc.h" + #define IDPF_LARGE_MAX_Q 256 #define IDPF_MAX_Q 16 #define IDPF_MIN_Q 2 diff --git a/drivers/net/ethernet/intel/idpf/virtchnl2.h b/drivers/net/ethernet/intel/idpf/virtchnl2.h index 4a3c4454d25ab..63deb120359cf 100644 --- a/drivers/net/ethernet/intel/idpf/virtchnl2.h +++ b/drivers/net/ethernet/intel/idpf/virtchnl2.h @@ -4,6 +4,8 @@ #ifndef _VIRTCHNL2_H_ #define _VIRTCHNL2_H_ +#include + /* All opcodes associated with virtchnl2 are prefixed with virtchnl2 or * VIRTCHNL2. Any future opcodes, offloads/capabilities, structures, * and defines must be prefixed with virtchnl2 or VIRTCHNL2 to avoid confusion. @@ -17,8 +19,6 @@ * must remain unchanged over time, so we specify explicit values for all enums. */ -#include "virtchnl2_lan_desc.h" - /* This macro is used to generate compilation errors if a structure * is not exactly the correct length. */ @@ -555,7 +555,7 @@ VIRTCHNL2_CHECK_STRUCT_LEN(32, virtchnl2_queue_reg_chunk); struct virtchnl2_queue_reg_chunks { __le16 num_chunks; u8 pad[6]; - struct virtchnl2_queue_reg_chunk chunks[]; + struct virtchnl2_queue_reg_chunk chunks[] __counted_by_le(num_chunks); }; VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_queue_reg_chunks); @@ -703,7 +703,7 @@ struct virtchnl2_config_tx_queues { __le32 vport_id; __le16 num_qinfo; u8 pad[10]; - struct virtchnl2_txq_info qinfo[]; + struct virtchnl2_txq_info qinfo[] __counted_by_le(num_qinfo); }; VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_config_tx_queues); @@ -782,7 +782,7 @@ struct virtchnl2_config_rx_queues { __le32 vport_id; __le16 num_qinfo; u8 pad[18]; - struct virtchnl2_rxq_info qinfo[]; + struct virtchnl2_rxq_info qinfo[] __counted_by_le(num_qinfo); }; VIRTCHNL2_CHECK_STRUCT_LEN(24, virtchnl2_config_rx_queues); @@ -868,7 +868,7 @@ VIRTCHNL2_CHECK_STRUCT_LEN(32, virtchnl2_vector_chunk); struct virtchnl2_vector_chunks { __le16 num_vchunks; u8 pad[14]; - struct virtchnl2_vector_chunk vchunks[]; + struct virtchnl2_vector_chunk vchunks[] __counted_by_le(num_vchunks); }; VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_vector_chunks); @@ -912,7 +912,7 @@ struct virtchnl2_rss_lut { __le16 lut_entries_start; __le16 lut_entries; u8 pad[4]; - __le32 lut[]; + __le32 lut[] __counted_by_le(lut_entries); }; VIRTCHNL2_CHECK_STRUCT_LEN(12, virtchnl2_rss_lut); @@ -977,7 +977,7 @@ struct virtchnl2_ptype { u8 ptype_id_8; u8 proto_id_count; __le16 pad; - __le16 proto_id[]; + __le16 proto_id[] __counted_by(proto_id_count); } __packed __aligned(2); VIRTCHNL2_CHECK_STRUCT_LEN(6, virtchnl2_ptype); @@ -1104,7 +1104,7 @@ struct virtchnl2_rss_key { __le32 vport_id; __le16 key_len; u8 pad; - u8 key_flex[]; + u8 key_flex[] __counted_by_le(key_len); } __packed; VIRTCHNL2_CHECK_STRUCT_LEN(7, virtchnl2_rss_key); @@ -1131,7 +1131,7 @@ VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_queue_chunk); struct virtchnl2_queue_chunks { __le16 num_chunks; u8 pad[6]; - struct virtchnl2_queue_chunk chunks[]; + struct virtchnl2_queue_chunk chunks[] __counted_by_le(num_chunks); }; VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_queue_chunks); @@ -1195,7 +1195,7 @@ struct virtchnl2_queue_vector_maps { __le32 vport_id; __le16 num_qv_maps; u8 pad[10]; - struct virtchnl2_queue_vector qv_maps[]; + struct virtchnl2_queue_vector qv_maps[] __counted_by_le(num_qv_maps); }; VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_queue_vector_maps); @@ -1247,7 +1247,7 @@ struct virtchnl2_mac_addr_list { __le32 vport_id; __le16 num_mac_addr; u8 pad[2]; - struct virtchnl2_mac_addr mac_addr_list[]; + struct virtchnl2_mac_addr mac_addr_list[] __counted_by_le(num_mac_addr); }; VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_mac_addr_list); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index a3f100769e399..74a998fcaa6f3 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -106,8 +106,6 @@ static int igb_setup_all_rx_resources(struct igb_adapter *); static void igb_free_all_tx_resources(struct igb_adapter *); static void igb_free_all_rx_resources(struct igb_adapter *); static void igb_setup_mrqc(struct igb_adapter *); -static int igb_probe(struct pci_dev *, const struct pci_device_id *); -static void igb_remove(struct pci_dev *pdev); static void igb_init_queue_configuration(struct igb_adapter *adapter); static int igb_sw_init(struct igb_adapter *); int igb_open(struct net_device *); @@ -178,20 +176,6 @@ static int igb_vf_configure(struct igb_adapter *adapter, int vf); static int igb_disable_sriov(struct pci_dev *dev, bool reinit); #endif -static int igb_suspend(struct device *); -static int igb_resume(struct device *); -static int igb_runtime_suspend(struct device *dev); -static int igb_runtime_resume(struct device *dev); -static int igb_runtime_idle(struct device *dev); -#ifdef CONFIG_PM -static const struct dev_pm_ops igb_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume) - SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume, - igb_runtime_idle) -}; -#endif -static void igb_shutdown(struct pci_dev *); -static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs); #ifdef CONFIG_IGB_DCA static int igb_notify_dca(struct notifier_block *, unsigned long, void *); static struct notifier_block dca_notifier = { @@ -219,19 +203,6 @@ static const struct pci_error_handlers igb_err_handler = { static void igb_init_dmac(struct igb_adapter *adapter, u32 pba); -static struct pci_driver igb_driver = { - .name = igb_driver_name, - .id_table = igb_pci_tbl, - .probe = igb_probe, - .remove = igb_remove, -#ifdef CONFIG_PM - .driver.pm = &igb_pm_ops, -#endif - .shutdown = igb_shutdown, - .sriov_configure = igb_pci_sriov_configure, - .err_handler = &igb_err_handler -}; - MODULE_AUTHOR("Intel Corporation, "); MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver"); MODULE_LICENSE("GPL v2"); @@ -647,6 +618,8 @@ struct net_device *igb_get_hw_dev(struct e1000_hw *hw) return adapter->netdev; } +static struct pci_driver igb_driver; + /** * igb_init_module - Driver Registration Routine * @@ -9453,12 +9426,12 @@ static void igb_deliver_wake_packet(struct net_device *netdev) netif_rx(skb); } -static int __maybe_unused igb_suspend(struct device *dev) +static int igb_suspend(struct device *dev) { return __igb_shutdown(to_pci_dev(dev), NULL, 0); } -static int __maybe_unused __igb_resume(struct device *dev, bool rpm) +static int __igb_resume(struct device *dev, bool rpm) { struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); @@ -9514,12 +9487,12 @@ static int __maybe_unused __igb_resume(struct device *dev, bool rpm) return err; } -static int __maybe_unused igb_resume(struct device *dev) +static int igb_resume(struct device *dev) { return __igb_resume(dev, false); } -static int __maybe_unused igb_runtime_idle(struct device *dev) +static int igb_runtime_idle(struct device *dev) { struct net_device *netdev = dev_get_drvdata(dev); struct igb_adapter *adapter = netdev_priv(netdev); @@ -9530,12 +9503,12 @@ static int __maybe_unused igb_runtime_idle(struct device *dev) return -EBUSY; } -static int __maybe_unused igb_runtime_suspend(struct device *dev) +static int igb_runtime_suspend(struct device *dev) { return __igb_shutdown(to_pci_dev(dev), NULL, 1); } -static int __maybe_unused igb_runtime_resume(struct device *dev) +static int igb_runtime_resume(struct device *dev) { return __igb_resume(dev, true); } @@ -10157,4 +10130,20 @@ static void igb_nfc_filter_restore(struct igb_adapter *adapter) spin_unlock(&adapter->nfc_lock); } + +static _DEFINE_DEV_PM_OPS(igb_pm_ops, igb_suspend, igb_resume, + igb_runtime_suspend, igb_runtime_resume, + igb_runtime_idle); + +static struct pci_driver igb_driver = { + .name = igb_driver_name, + .id_table = igb_pci_tbl, + .probe = igb_probe, + .remove = igb_remove, + .driver.pm = pm_ptr(&igb_pm_ops), + .shutdown = igb_shutdown, + .sriov_configure = igb_pci_sriov_configure, + .err_handler = &igb_err_handler +}; + /* igb_main.c */ diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index b0cf310e6f7bd..40ccd24ffc539 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2470,7 +2470,7 @@ static int igbvf_suspend(struct device *dev_d) return 0; } -static int __maybe_unused igbvf_resume(struct device *dev_d) +static int igbvf_resume(struct device *dev_d) { struct pci_dev *pdev = to_pci_dev(dev_d); struct net_device *netdev = pci_get_drvdata(pdev); @@ -2957,7 +2957,7 @@ static const struct pci_device_id igbvf_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, igbvf_pci_tbl); -static SIMPLE_DEV_PM_OPS(igbvf_pm_ops, igbvf_suspend, igbvf_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(igbvf_pm_ops, igbvf_suspend, igbvf_resume); /* PCI Device API Driver */ static struct pci_driver igbvf_driver = { @@ -2965,7 +2965,7 @@ static struct pci_driver igbvf_driver = { .id_table = igbvf_pci_tbl, .probe = igbvf_probe, .remove = igbvf_remove, - .driver.pm = &igbvf_pm_ops, + .driver.pm = pm_sleep_ptr(&igbvf_pm_ops), .shutdown = igbvf_shutdown, .err_handler = &igbvf_err_handler }; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 35ad40a803cb6..d9bd001af7ba4 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -2712,8 +2712,7 @@ static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring, net_prefetch(xdp->data_meta); - skb = __napi_alloc_skb(&ring->q_vector->napi, totalsize, - GFP_ATOMIC | __GFP_NOWARN); + skb = napi_alloc_skb(&ring->q_vector->napi, totalsize); if (unlikely(!skb)) return NULL; @@ -5930,15 +5929,6 @@ static int __igc_open(struct net_device *netdev, bool resuming) if (err) goto err_req_irq; - /* Notify the stack of the actual queue counts. */ - err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues); - if (err) - goto err_set_queues; - - err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues); - if (err) - goto err_set_queues; - clear_bit(__IGC_DOWN, &adapter->state); for (i = 0; i < adapter->num_q_vectors; i++) @@ -5959,8 +5949,6 @@ static int __igc_open(struct net_device *netdev, bool resuming) return IGC_SUCCESS; -err_set_queues: - igc_free_irq(adapter); err_req_irq: igc_release_hw_control(adapter); igc_power_down_phy_copper_base(&adapter->hw); @@ -5977,6 +5965,17 @@ static int __igc_open(struct net_device *netdev, bool resuming) int igc_open(struct net_device *netdev) { + struct igc_adapter *adapter = netdev_priv(netdev); + int err; + + /* Notify the stack of the actual queue counts. */ + err = netif_set_real_num_queues(netdev, adapter->num_tx_queues, + adapter->num_rx_queues); + if (err) { + netdev_err(netdev, "error setting real queue count\n"); + return err; + } + return __igc_open(netdev, false); } @@ -7105,8 +7104,7 @@ static int __igc_shutdown(struct pci_dev *pdev, bool *enable_wake, return 0; } -#ifdef CONFIG_PM -static int __maybe_unused igc_runtime_suspend(struct device *dev) +static int igc_runtime_suspend(struct device *dev) { return __igc_shutdown(to_pci_dev(dev), NULL, 1); } @@ -7141,7 +7139,7 @@ static void igc_deliver_wake_packet(struct net_device *netdev) netif_rx(skb); } -static int __maybe_unused igc_resume(struct device *dev) +static int igc_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); @@ -7183,23 +7181,21 @@ static int __maybe_unused igc_resume(struct device *dev) wr32(IGC_WUS, ~0); - rtnl_lock(); - if (!err && netif_running(netdev)) + if (netif_running(netdev)) { err = __igc_open(netdev, true); - - if (!err) - netif_device_attach(netdev); - rtnl_unlock(); + if (!err) + netif_device_attach(netdev); + } return err; } -static int __maybe_unused igc_runtime_resume(struct device *dev) +static int igc_runtime_resume(struct device *dev) { return igc_resume(dev); } -static int __maybe_unused igc_suspend(struct device *dev) +static int igc_suspend(struct device *dev) { return __igc_shutdown(to_pci_dev(dev), NULL, 0); } @@ -7214,7 +7210,6 @@ static int __maybe_unused igc_runtime_idle(struct device *dev) return -EBUSY; } -#endif /* CONFIG_PM */ static void igc_shutdown(struct pci_dev *pdev) { @@ -7329,22 +7324,16 @@ static const struct pci_error_handlers igc_err_handler = { .resume = igc_io_resume, }; -#ifdef CONFIG_PM -static const struct dev_pm_ops igc_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume) - SET_RUNTIME_PM_OPS(igc_runtime_suspend, igc_runtime_resume, - igc_runtime_idle) -}; -#endif +static _DEFINE_DEV_PM_OPS(igc_pm_ops, igc_suspend, igc_resume, + igc_runtime_suspend, igc_runtime_resume, + igc_runtime_idle); static struct pci_driver igc_driver = { .name = igc_driver_name, .id_table = igc_pci_tbl, .probe = igc_probe, .remove = igc_remove, -#ifdef CONFIG_PM - .driver.pm = &igc_pm_ops, -#endif + .driver.pm = pm_ptr(&igc_pm_ops), .shutdown = igc_shutdown, .err_handler = &igc_err_handler, }; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index f985252c8c8d6..43e7e75ae18c0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6974,7 +6974,7 @@ int ixgbe_close(struct net_device *netdev) return 0; } -static int __maybe_unused ixgbe_resume(struct device *dev_d) +static int ixgbe_resume(struct device *dev_d) { struct pci_dev *pdev = to_pci_dev(dev_d); struct ixgbe_adapter *adapter = pci_get_drvdata(pdev); @@ -7082,7 +7082,7 @@ static int __ixgbe_shutdown(struct pci_dev *pdev, bool *enable_wake) return 0; } -static int __maybe_unused ixgbe_suspend(struct device *dev_d) +static int ixgbe_suspend(struct device *dev_d) { struct pci_dev *pdev = to_pci_dev(dev_d); int retval; @@ -10061,15 +10061,10 @@ static int ixgbe_ndo_bridge_setlink(struct net_device *dev, if (!br_spec) return -EINVAL; - nla_for_each_nested(attr, br_spec, rem) { - int status; - __u16 mode; + nla_for_each_nested_type(attr, IFLA_BRIDGE_MODE, br_spec, rem) { + __u16 mode = nla_get_u16(attr); + int status = ixgbe_configure_bridge_mode(adapter, mode); - if (nla_type(attr) != IFLA_BRIDGE_MODE) - continue; - - mode = nla_get_u16(attr); - status = ixgbe_configure_bridge_mode(adapter, mode); if (status) return status; @@ -11588,14 +11583,14 @@ static const struct pci_error_handlers ixgbe_err_handler = { .resume = ixgbe_io_resume, }; -static SIMPLE_DEV_PM_OPS(ixgbe_pm_ops, ixgbe_suspend, ixgbe_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(ixgbe_pm_ops, ixgbe_suspend, ixgbe_resume); static struct pci_driver ixgbe_driver = { .name = ixgbe_driver_name, .id_table = ixgbe_pci_tbl, .probe = ixgbe_probe, .remove = ixgbe_remove, - .driver.pm = &ixgbe_pm_ops, + .driver.pm = pm_sleep_ptr(&ixgbe_pm_ops), .shutdown = ixgbe_shutdown, .sriov_configure = ixgbe_pci_sriov_configure, .err_handler = &ixgbe_err_handler diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index d34d715c59ebc..397cb773fabbe 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -220,8 +220,7 @@ static struct sk_buff *ixgbe_construct_skb_zc(struct ixgbe_ring *rx_ring, net_prefetch(xdp->data_meta); /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, - GFP_ATOMIC | __GFP_NOWARN); + skb = napi_alloc_skb(&rx_ring->q_vector->napi, totalsize); if (unlikely(!skb)) return NULL; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 9c960017a6de5..3161a13079fe6 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4300,7 +4300,7 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu) return 0; } -static int __maybe_unused ixgbevf_suspend(struct device *dev_d) +static int ixgbevf_suspend(struct device *dev_d) { struct net_device *netdev = dev_get_drvdata(dev_d); struct ixgbevf_adapter *adapter = netdev_priv(netdev); @@ -4317,7 +4317,7 @@ static int __maybe_unused ixgbevf_suspend(struct device *dev_d) return 0; } -static int __maybe_unused ixgbevf_resume(struct device *dev_d) +static int ixgbevf_resume(struct device *dev_d) { struct pci_dev *pdev = to_pci_dev(dev_d); struct net_device *netdev = pci_get_drvdata(pdev); @@ -4854,7 +4854,7 @@ static const struct pci_error_handlers ixgbevf_err_handler = { .resume = ixgbevf_io_resume, }; -static SIMPLE_DEV_PM_OPS(ixgbevf_pm_ops, ixgbevf_suspend, ixgbevf_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(ixgbevf_pm_ops, ixgbevf_suspend, ixgbevf_resume); static struct pci_driver ixgbevf_driver = { .name = ixgbevf_driver_name, @@ -4863,7 +4863,7 @@ static struct pci_driver ixgbevf_driver = { .remove = ixgbevf_remove, /* Power Management Hooks */ - .driver.pm = &ixgbevf_pm_ops, + .driver.pm = pm_sleep_ptr(&ixgbevf_pm_ops), .shutdown = ixgbevf_shutdown, .err_handler = &ixgbevf_err_handler diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index eb2a20b5a0d0c..3d801a1a4f701 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1213,10 +1213,8 @@ struct nix_bp_cfg_req { /* bpid_per_chan = 1 assigns separate bp id for each channel */ }; -/* PF can be mapped to either CGX or LBK interface, - * so maximum 64 channels are possible. - */ -#define NIX_MAX_BPID_CHAN 64 +/* Maximum channels any single NIX interface can have */ +#define NIX_MAX_BPID_CHAN 256 struct nix_bp_cfg_rsp { struct mbox_msghdr hdr; u16 chan_bpid[NIX_MAX_BPID_CHAN]; /* Channel and bpid mapping */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 72e060cf6b618..e9bf9231b0185 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -160,6 +160,8 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu) continue; lmac_bmap = cgx_get_lmac_bmap(rvu_cgx_pdata(cgx, rvu)); for_each_set_bit(iter, &lmac_bmap, rvu->hw->lmac_per_cgx) { + if (iter >= MAX_LMAC_COUNT) + continue; lmac = cgx_get_lmacid(rvu_cgx_pdata(cgx, rvu), iter); rvu->pf2cgxlmac_map[pf] = cgxlmac_id_to_bmap(cgx, lmac); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index e350242bbafba..be709f83f3318 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -1657,7 +1657,7 @@ static int npc_fwdb_detect_load_prfl_img(struct rvu *rvu, uint64_t prfl_sz, struct npc_coalesced_kpu_prfl *img_data = NULL; int i = 0, rc = -EINVAL; void __iomem *kpu_prfl_addr; - u16 offset; + u32 offset; img_data = (struct npc_coalesced_kpu_prfl __force *)rvu->kpu_prfl_addr; if (le64_to_cpu(img_data->signature) == KPU_SIGN && diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index b40bd0e467514..637b05c79c42c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -450,7 +450,6 @@ static void otx2_pfvf_mbox_handler(struct work_struct *work) struct mbox_msghdr *msg = NULL; int offset, vf_idx, id, err; struct otx2_mbox_dev *mdev; - struct mbox_hdr *req_hdr; struct otx2_mbox *mbox; struct mbox *vf_mbox; struct otx2_nic *pf; @@ -461,9 +460,8 @@ static void otx2_pfvf_mbox_handler(struct work_struct *work) mbox = &pf->mbox_pfvf[0].mbox; mdev = &mbox->dev[vf_idx]; - req_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); - offset = ALIGN(sizeof(*req_hdr), MBOX_MSG_ALIGN); + offset = ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); for (id = 0; id < vf_mbox->num_msgs; id++) { msg = (struct mbox_msghdr *)(mdev->mbase + mbox->rx_start + @@ -494,7 +492,6 @@ static void otx2_pfvf_mbox_up_handler(struct work_struct *work) struct otx2_nic *pf = vf_mbox->pfvf; struct otx2_mbox_dev *mdev; int offset, id, vf_idx = 0; - struct mbox_hdr *rsp_hdr; struct mbox_msghdr *msg; struct otx2_mbox *mbox; @@ -502,8 +499,7 @@ static void otx2_pfvf_mbox_up_handler(struct work_struct *work) mbox = &pf->mbox_pfvf[0].mbox_up; mdev = &mbox->dev[vf_idx]; - rsp_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); - offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); + offset = mbox->rx_start + ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); for (id = 0; id < vf_mbox->up_num_msgs; id++) { msg = mdev->mbase + offset; @@ -1933,7 +1929,7 @@ int otx2_open(struct net_device *netdev) * mcam entries are enabled to receive the packets. Hence disable the * packet I/O. */ - if (err == EIO) + if (err == -EIO) goto err_disable_rxtx; else if (err) goto err_tx_stop_queues; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 06592b9f04242..9240cfe25d102 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -28,8 +28,10 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, struct mlx5_core_dev *mdev) { - /* AF_XDP doesn't support frames larger than PAGE_SIZE. */ - if (xsk->chunk_size > PAGE_SIZE || xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE) { + /* AF_XDP doesn't support frames larger than PAGE_SIZE, + * and xsk->chunk_size is limited to 65535 bytes. + */ + if ((size_t)xsk->chunk_size > PAGE_SIZE || xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE) { mlx5_core_err(mdev, "XSK chunk size %u out of bounds [%u, %lu]\n", xsk->chunk_size, MLX5E_MIN_XSK_CHUNK_SIZE, PAGE_SIZE); return false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 91848eae45655..81e1c1e401f9f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4950,10 +4950,7 @@ static int mlx5e_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, if (!br_spec) return -EINVAL; - nla_for_each_nested(attr, br_spec, rem) { - if (nla_type(attr) != IFLA_BRIDGE_MODE) - continue; - + nla_for_each_nested_type(attr, IFLA_BRIDGE_MODE, br_spec, rem) { mode = nla_get_u16(attr); if (mode > BRIDGE_MODE_VEPA) return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index 1b9bc32efd6fa..c5ea1d1d2b035 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -1874,7 +1874,7 @@ int mlx5_esw_bridge_port_mdb_add(struct net_device *dev, u16 vport_num, u16 esw_ "Failed to lookup bridge port vlan metadata to create MDB (MAC=%pM,vid=%u,vport=%u)\n", addr, vid, vport_num); NL_SET_ERR_MSG_FMT_MOD(extack, - "Failed to lookup bridge port vlan metadata to create MDB (MAC=%pM,vid=%u,vport=%u)\n", + "Failed to lookup vlan metadata for MDB (MAC=%pM,vid=%u,vport=%u)\n", addr, vid, vport_num); return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index 77134ca929382..ba303868686a7 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "mlxbf_gige.h" @@ -492,8 +493,13 @@ static void mlxbf_gige_shutdown(struct platform_device *pdev) { struct mlxbf_gige *priv = platform_get_drvdata(pdev); - writeq(0, priv->base + MLXBF_GIGE_INT_EN); - mlxbf_gige_clean_port(priv); + rtnl_lock(); + netif_device_detach(priv->netdev); + + if (netif_running(priv->netdev)) + dev_close(priv->netdev); + + rtnl_unlock(); } static const struct acpi_device_id __maybe_unused mlxbf_gige_acpi_match[] = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c index 53b150b7ae4e7..3d7841f0ceecf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -581,7 +581,7 @@ EXPORT_SYMBOL(mlxsw_env_reset_module); int mlxsw_env_get_module_power_mode(struct mlxsw_core *mlxsw_core, u8 slot_index, u8 module, - struct ethtool_module_power_mode_params *params, + struct ethtool_module_power_params *params, struct netlink_ext_ack *extack) { struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.h b/drivers/net/ethernet/mellanox/mlxsw/core_env.h index a197e3ae069c0..979a35e967b38 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.h @@ -35,7 +35,7 @@ int mlxsw_env_reset_module(struct net_device *netdev, int mlxsw_env_get_module_power_mode(struct mlxsw_core *mlxsw_core, u8 slot_index, u8 module, - struct ethtool_module_power_mode_params *params, + struct ethtool_module_power_params *params, struct netlink_ext_ack *extack); int diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c index f0ceb196a6cea..df46962e92b43 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c @@ -152,7 +152,7 @@ static int mlxsw_m_reset(struct net_device *netdev, u32 *flags) static int mlxsw_m_get_module_power_mode(struct net_device *netdev, - struct ethtool_module_power_mode_params *params, + struct ethtool_module_power_params *params, struct netlink_ext_ack *extack) { struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev); @@ -165,7 +165,7 @@ mlxsw_m_get_module_power_mode(struct net_device *netdev, static int mlxsw_m_set_module_power_mode(struct net_device *netdev, - const struct ethtool_module_power_mode_params *params, + const struct ethtool_module_power_params *params, struct netlink_ext_ack *extack) { struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev); @@ -182,8 +182,8 @@ static const struct ethtool_ops mlxsw_m_port_ethtool_ops = { .get_module_eeprom = mlxsw_m_get_module_eeprom, .get_module_eeprom_by_page = mlxsw_m_get_module_eeprom_by_page, .reset = mlxsw_m_reset, - .get_module_power_mode = mlxsw_m_get_module_power_mode, - .set_module_power_mode = mlxsw_m_set_module_power_mode, + .get_module_power_cfg = mlxsw_m_get_module_power_mode, + .set_module_power_cfg = mlxsw_m_set_module_power_mode, }; static int diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c index 0f29e9c194113..5ea7241bfabad 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c @@ -1214,7 +1214,7 @@ static int mlxsw_sp_reset(struct net_device *dev, u32 *flags) static int mlxsw_sp_get_module_power_mode(struct net_device *dev, - struct ethtool_module_power_mode_params *params, + struct ethtool_module_power_params *params, struct netlink_ext_ack *extack) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); @@ -1228,7 +1228,7 @@ mlxsw_sp_get_module_power_mode(struct net_device *dev, static int mlxsw_sp_set_module_power_mode(struct net_device *dev, - const struct ethtool_module_power_mode_params *params, + const struct ethtool_module_power_params *params, struct netlink_ext_ack *extack) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); @@ -1262,8 +1262,8 @@ const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .get_eth_ctrl_stats = mlxsw_sp_get_eth_ctrl_stats, .get_rmon_stats = mlxsw_sp_get_rmon_stats, .reset = mlxsw_sp_reset, - .get_module_power_mode = mlxsw_sp_get_module_power_mode, - .set_module_power_mode = mlxsw_sp_set_module_power_mode, + .get_module_power_cfg = mlxsw_sp_get_module_power_mode, + .set_module_power_cfg = mlxsw_sp_set_module_power_mode, }; struct mlxsw_sp1_port_link_mode { diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c index d7c8aa77ec75c..cdc2872ace1b4 100644 --- a/drivers/net/ethernet/microchip/encx24j600.c +++ b/drivers/net/ethernet/microchip/encx24j600.c @@ -1112,7 +1112,6 @@ MODULE_DEVICE_TABLE(spi, encx24j600_spi_id_table); static struct spi_driver encx24j600_spi_net_driver = { .driver = { .name = DRV_NAME, - .owner = THIS_MODULE, .bus = &spi_bus_type, }, .probe = encx24j600_spi_probe, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index f28e769e6fdad..997cc4fcffdbf 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2289,10 +2289,7 @@ static int nfp_net_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, if (!br_spec) return -EINVAL; - nla_for_each_nested(attr, br_spec, rem) { - if (nla_type(attr) != IFLA_BRIDGE_MODE) - continue; - + nla_for_each_nested_type(attr, IFLA_BRIDGE_MODE, br_spec, rem) { new_ctrl = nn->dp.ctrl; mode = nla_get_u16(attr); if (mode == BRIDGE_MODE_VEPA) diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 1d719726f72b2..b7def3b549376 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -662,8 +662,6 @@ struct qed_hwfn { }; struct pci_params { - int pm_cap; - unsigned long mem_start; unsigned long mem_end; unsigned int irq; diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index c278f8893042b..17f284e9f06d3 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -323,8 +323,7 @@ static int qed_init_pci(struct qed_dev *cdev, struct pci_dev *pdev) goto err2; } - cdev->pci_params.pm_cap = pci_find_capability(pdev, PCI_CAP_ID_PM); - if (IS_PF(cdev) && !cdev->pci_params.pm_cap) + if (IS_PF(cdev) && !pdev->pm_cap) DP_NOTICE(cdev, "Cannot find power management capability\n"); rc = dma_set_mask_and_coherent(&cdev->pdev->dev, DMA_BIT_MASK(64)); @@ -1206,7 +1205,6 @@ static void qed_slowpath_task(struct work_struct *work) static int qed_slowpath_wq_start(struct qed_dev *cdev) { struct qed_hwfn *hwfn; - char name[NAME_SIZE]; int i; if (IS_VF(cdev)) @@ -1215,11 +1213,11 @@ static int qed_slowpath_wq_start(struct qed_dev *cdev) for_each_hwfn(cdev, i) { hwfn = &cdev->hwfns[i]; - snprintf(name, NAME_SIZE, "slowpath-%02x:%02x.%02x", - cdev->pdev->bus->number, - PCI_SLOT(cdev->pdev->devfn), hwfn->abs_pf_id); + hwfn->slowpath_wq = alloc_workqueue("slowpath-%02x:%02x.%02x", + 0, 0, cdev->pdev->bus->number, + PCI_SLOT(cdev->pdev->devfn), + hwfn->abs_pf_id); - hwfn->slowpath_wq = alloc_workqueue(name, 0, 0); if (!hwfn->slowpath_wq) { DP_NOTICE(hwfn, "Cannot create slowpath workqueue\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 5c879a5c86d70..4ac444eb269ff 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1314,17 +1314,40 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp) RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01); } +static void rtl_dash_loop_wait(struct rtl8169_private *tp, + const struct rtl_cond *c, + unsigned long usecs, int n, bool high) +{ + if (!tp->dash_enabled) + return; + rtl_loop_wait(tp, c, usecs, n, high); +} + +static void rtl_dash_loop_wait_high(struct rtl8169_private *tp, + const struct rtl_cond *c, + unsigned long d, int n) +{ + rtl_dash_loop_wait(tp, c, d, n, true); +} + +static void rtl_dash_loop_wait_low(struct rtl8169_private *tp, + const struct rtl_cond *c, + unsigned long d, int n) +{ + rtl_dash_loop_wait(tp, c, d, n, false); +} + static void rtl8168dp_driver_start(struct rtl8169_private *tp) { r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START); - rtl_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10); + rtl_dash_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10); } static void rtl8168ep_driver_start(struct rtl8169_private *tp) { r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_START); r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01); - rtl_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30); + rtl_dash_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30); } static void rtl8168_driver_start(struct rtl8169_private *tp) @@ -1338,7 +1361,7 @@ static void rtl8168_driver_start(struct rtl8169_private *tp) static void rtl8168dp_driver_stop(struct rtl8169_private *tp) { r8168dp_oob_notify(tp, OOB_CMD_DRIVER_STOP); - rtl_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10); + rtl_dash_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10); } static void rtl8168ep_driver_stop(struct rtl8169_private *tp) @@ -1346,7 +1369,7 @@ static void rtl8168ep_driver_stop(struct rtl8169_private *tp) rtl8168ep_stop_cmac(tp); r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_STOP); r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01); - rtl_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10); + rtl_dash_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10); } static void rtl8168_driver_stop(struct rtl8169_private *tp) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index a6fefe675ef15..f55cf09f0783a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -593,7 +593,7 @@ struct mac_device_info { const struct stmmac_mmc_ops *mmc; const struct stmmac_est_ops *est; struct dw_xpcs *xpcs; - struct phylink_pcs *lynx_pcs; /* Lynx external PCS */ + struct phylink_pcs *phylink_pcs; struct mii_regs mii; /* MII register Addresses */ struct mac_link link; void __iomem *pcsr; /* vpointer to device CSRs */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 68f85e4605cba..12b4a80ea3aa1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -479,9 +479,9 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) goto err_dvr_remove; } - stpriv->hw->lynx_pcs = lynx_pcs_create_mdiodev(pcs_bus, 0); - if (IS_ERR(stpriv->hw->lynx_pcs)) { - ret = PTR_ERR(stpriv->hw->lynx_pcs); + stpriv->hw->phylink_pcs = lynx_pcs_create_mdiodev(pcs_bus, 0); + if (IS_ERR(stpriv->hw->phylink_pcs)) { + ret = PTR_ERR(stpriv->hw->phylink_pcs); goto err_dvr_remove; } } @@ -498,7 +498,7 @@ static void socfpga_dwmac_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct stmmac_priv *priv = netdev_priv(ndev); - struct phylink_pcs *pcs = priv->hw->lynx_pcs; + struct phylink_pcs *pcs = priv->hw->phylink_pcs; stmmac_pltfr_remove(pdev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 24cd80490d19c..fe3498e86de9d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -944,10 +944,7 @@ static struct phylink_pcs *stmmac_mac_select_pcs(struct phylink_config *config, if (priv->hw->xpcs) return &priv->hw->xpcs->pcs; - if (priv->hw->lynx_pcs) - return priv->hw->lynx_pcs; - - return NULL; + return priv->hw->phylink_pcs; } static void stmmac_mac_config(struct phylink_config *config, unsigned int mode, @@ -1221,6 +1218,9 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) priv->phylink_config.type = PHYLINK_NETDEV; priv->phylink_config.mac_managed_pm = true; + /* Stmmac always requires an RX clock for hardware initialization */ + priv->phylink_config.mac_requires_rxc = true; + mdio_bus_data = priv->plat->mdio_bus_data; if (mdio_bus_data) priv->phylink_config.ovr_an_inband = @@ -3411,6 +3411,10 @@ static int stmmac_hw_setup(struct net_device *dev, bool ptp_register) u32 chan; int ret; + /* Make sure RX clock is enabled */ + if (priv->hw->phylink_pcs) + phylink_pcs_pre_init(priv->phylink, priv->hw->phylink_pcs); + /* DMA initialization and SW reset */ ret = stmmac_init_dma_engine(priv); if (ret < 0) { @@ -3960,8 +3964,7 @@ static int __stmmac_open(struct net_device *dev, if (priv->hw->pcs != STMMAC_PCS_TBI && priv->hw->pcs != STMMAC_PCS_RTBI && (!priv->hw->xpcs || - xpcs_get_an_mode(priv->hw->xpcs, mode) != DW_AN_C73) && - !priv->hw->lynx_pcs) { + xpcs_get_an_mode(priv->hw->xpcs, mode) != DW_AN_C73)) { ret = stmmac_init_phy(dev); if (ret) { netdev_err(priv->dev, @@ -5109,9 +5112,8 @@ static struct sk_buff *stmmac_construct_skb_zc(struct stmmac_channel *ch, unsigned int datasize = xdp->data_end - xdp->data; struct sk_buff *skb; - skb = __napi_alloc_skb(&ch->rxtx_napi, - xdp->data_end - xdp->data_hard_start, - GFP_ATOMIC | __GFP_NOWARN); + skb = napi_alloc_skb(&ch->rxtx_napi, + xdp->data_end - xdp->data_hard_start); if (unlikely(!skb)) return NULL; diff --git a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c index d6ce2c9f0a8d8..a1d0935d1ebe4 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c +++ b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c @@ -695,6 +695,17 @@ static int am65_cpsw_get_ethtool_ts_info(struct net_device *ndev, struct ethtool_ts_info *info) { struct am65_cpsw_common *common = am65_ndev_to_common(ndev); + unsigned int ptp_v2_filter; + + ptp_v2_filter = BIT(HWTSTAMP_FILTER_PTP_V2_L4_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V2_L4_SYNC) | + BIT(HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) | + BIT(HWTSTAMP_FILTER_PTP_V2_L2_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V2_L2_SYNC) | + BIT(HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) | + BIT(HWTSTAMP_FILTER_PTP_V2_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V2_SYNC) | + BIT(HWTSTAMP_FILTER_PTP_V2_DELAY_REQ); if (!IS_ENABLED(CONFIG_TI_K3_AM65_CPTS)) return ethtool_op_get_ts_info(ndev, info); @@ -708,7 +719,7 @@ static int am65_cpsw_get_ethtool_ts_info(struct net_device *ndev, SOF_TIMESTAMPING_RAW_HARDWARE; info->phc_index = am65_cpts_phc_index(common->cpts); info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON); - info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL); + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | ptp_v2_filter; return 0; } diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 2939a21ca74f3..7809bb8149341 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -101,6 +101,12 @@ #define AM65_CPSW_PN_TS_CTL_TX_HOST_TS_EN BIT(11) #define AM65_CPSW_PN_TS_CTL_MSG_TYPE_EN_SHIFT 16 +#define AM65_CPSW_PN_TS_CTL_RX_ANX_F_EN BIT(0) +#define AM65_CPSW_PN_TS_CTL_RX_VLAN_LT1_EN BIT(1) +#define AM65_CPSW_PN_TS_CTL_RX_VLAN_LT2_EN BIT(2) +#define AM65_CPSW_PN_TS_CTL_RX_ANX_D_EN BIT(3) +#define AM65_CPSW_PN_TS_CTL_RX_ANX_E_EN BIT(9) + /* AM65_CPSW_PORTN_REG_TS_SEQ_LTYPE_REG register fields */ #define AM65_CPSW_PN_TS_SEQ_ID_OFFSET_SHIFT 16 @@ -124,6 +130,11 @@ AM65_CPSW_PN_TS_CTL_TX_ANX_E_EN | \ AM65_CPSW_PN_TS_CTL_TX_ANX_F_EN) +#define AM65_CPSW_TS_RX_ANX_ALL_EN \ + (AM65_CPSW_PN_TS_CTL_RX_ANX_D_EN | \ + AM65_CPSW_PN_TS_CTL_RX_ANX_E_EN | \ + AM65_CPSW_PN_TS_CTL_RX_ANX_F_EN) + #define AM65_CPSW_ALE_AGEOUT_DEFAULT 30 /* Number of TX/RX descriptors */ #define AM65_CPSW_MAX_TX_DESC 500 @@ -749,18 +760,6 @@ static int am65_cpsw_nuss_ndo_slave_open(struct net_device *ndev) return ret; } -static void am65_cpsw_nuss_rx_ts(struct sk_buff *skb, u32 *psdata) -{ - struct skb_shared_hwtstamps *ssh; - u64 ns; - - ns = ((u64)psdata[1] << 32) | psdata[0]; - - ssh = skb_hwtstamps(skb); - memset(ssh, 0, sizeof(*ssh)); - ssh->hwtstamp = ns_to_ktime(ns); -} - /* RX psdata[2] word format - checksum information */ #define AM65_CPSW_RX_PSD_CSUM_ADD GENMASK(15, 0) #define AM65_CPSW_RX_PSD_CSUM_ERR BIT(16) @@ -841,9 +840,6 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common, skb->dev = ndev; psdata = cppi5_hdesc_get_psdata(desc_rx); - /* add RX timestamp */ - if (port->rx_ts_enabled) - am65_cpsw_nuss_rx_ts(skb, psdata); csum_info = psdata[2]; dev_dbg(dev, "%s rx csum_info:%#x\n", __func__, csum_info); @@ -856,6 +852,8 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common, ndev_priv = netdev_priv(ndev); am65_cpsw_nuss_set_offload_fwd_mark(skb, ndev_priv->offload_fwd_mark); skb_put(skb, pkt_len); + if (port->rx_ts_enabled) + am65_cpts_rx_timestamp(common->cpts, skb); skb->protocol = eth_type_trans(skb, ndev); am65_cpsw_nuss_rx_csum(skb, csum_info); napi_gro_receive(&common->napi_rx, skb); @@ -1334,7 +1332,6 @@ static int am65_cpsw_nuss_ndo_slave_set_mac_address(struct net_device *ndev, static int am65_cpsw_nuss_hwtstamp_set(struct net_device *ndev, struct ifreq *ifr) { - struct am65_cpsw_common *common = am65_ndev_to_common(ndev); struct am65_cpsw_port *port = am65_ndev_to_port(ndev); u32 ts_ctrl, seq_id, ts_ctrl_ltype2, ts_vlan_ltype; struct hwtstamp_config cfg; @@ -1358,11 +1355,6 @@ static int am65_cpsw_nuss_hwtstamp_set(struct net_device *ndev, case HWTSTAMP_FILTER_NONE: port->rx_ts_enabled = false; break; - case HWTSTAMP_FILTER_ALL: - case HWTSTAMP_FILTER_SOME: - case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: @@ -1372,10 +1364,13 @@ static int am65_cpsw_nuss_hwtstamp_set(struct net_device *ndev, case HWTSTAMP_FILTER_PTP_V2_EVENT: case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: - case HWTSTAMP_FILTER_NTP_ALL: port->rx_ts_enabled = true; - cfg.rx_filter = HWTSTAMP_FILTER_ALL; + cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_NTP_ALL: + return -EOPNOTSUPP; default: return -ERANGE; } @@ -1405,6 +1400,10 @@ static int am65_cpsw_nuss_hwtstamp_set(struct net_device *ndev, ts_ctrl |= AM65_CPSW_TS_TX_ANX_ALL_EN | AM65_CPSW_PN_TS_CTL_TX_VLAN_LT1_EN; + if (port->rx_ts_enabled) + ts_ctrl |= AM65_CPSW_TS_RX_ANX_ALL_EN | + AM65_CPSW_PN_TS_CTL_RX_VLAN_LT1_EN; + writel(seq_id, port->port_base + AM65_CPSW_PORTN_REG_TS_SEQ_LTYPE_REG); writel(ts_vlan_ltype, port->port_base + AM65_CPSW_PORTN_REG_TS_VLAN_LTYPE_REG); @@ -1412,9 +1411,6 @@ static int am65_cpsw_nuss_hwtstamp_set(struct net_device *ndev, AM65_CPSW_PORTN_REG_TS_CTL_LTYPE2); writel(ts_ctrl, port->port_base + AM65_CPSW_PORTN_REG_TS_CTL); - /* en/dis RX timestamp */ - am65_cpts_rx_enable(common->cpts, port->rx_ts_enabled); - return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; } @@ -1431,7 +1427,7 @@ static int am65_cpsw_nuss_hwtstamp_get(struct net_device *ndev, cfg.tx_type = port->tx_ts_enabled ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; cfg.rx_filter = port->rx_ts_enabled ? - HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE; + HWTSTAMP_FILTER_PTP_V2_EVENT : HWTSTAMP_FILTER_NONE; return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; } diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c index c66618d91c28f..53bfda6c798c0 100644 --- a/drivers/net/ethernet/ti/am65-cpts.c +++ b/drivers/net/ethernet/ti/am65-cpts.c @@ -859,29 +859,6 @@ static long am65_cpts_ts_work(struct ptp_clock_info *ptp) return delay; } -/** - * am65_cpts_rx_enable - enable rx timestamping - * @cpts: cpts handle - * @en: enable - * - * This functions enables rx packets timestamping. The CPTS can timestamp all - * rx packets. - */ -void am65_cpts_rx_enable(struct am65_cpts *cpts, bool en) -{ - u32 val; - - mutex_lock(&cpts->ptp_clk_lock); - val = am65_cpts_read32(cpts, control); - if (en) - val |= AM65_CPTS_CONTROL_TSTAMP_EN; - else - val &= ~AM65_CPTS_CONTROL_TSTAMP_EN; - am65_cpts_write32(cpts, val, control); - mutex_unlock(&cpts->ptp_clk_lock); -} -EXPORT_SYMBOL_GPL(am65_cpts_rx_enable); - static int am65_skb_get_mtype_seqid(struct sk_buff *skb, u32 *mtype_seqid) { unsigned int ptp_class = ptp_classify_raw(skb); @@ -906,6 +883,71 @@ static int am65_skb_get_mtype_seqid(struct sk_buff *skb, u32 *mtype_seqid) return 1; } +static u64 am65_cpts_find_rx_ts(struct am65_cpts *cpts, u32 skb_mtype_seqid) +{ + struct list_head *this, *next; + struct am65_cpts_event *event; + unsigned long flags; + u32 mtype_seqid; + u64 ns = 0; + + am65_cpts_fifo_read(cpts); + spin_lock_irqsave(&cpts->lock, flags); + list_for_each_safe(this, next, &cpts->events) { + event = list_entry(this, struct am65_cpts_event, list); + if (time_after(jiffies, event->tmo)) { + list_del_init(&event->list); + list_add(&event->list, &cpts->pool); + continue; + } + + mtype_seqid = event->event1 & + (AM65_CPTS_EVENT_1_MESSAGE_TYPE_MASK | + AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK | + AM65_CPTS_EVENT_1_EVENT_TYPE_MASK); + + if (mtype_seqid == skb_mtype_seqid) { + ns = event->timestamp; + list_del_init(&event->list); + list_add(&event->list, &cpts->pool); + break; + } + } + spin_unlock_irqrestore(&cpts->lock, flags); + + return ns; +} + +void am65_cpts_rx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb) +{ + struct am65_cpts_skb_cb_data *skb_cb = (struct am65_cpts_skb_cb_data *)skb->cb; + struct skb_shared_hwtstamps *ssh; + int ret; + u64 ns; + + /* am65_cpts_rx_timestamp() is called before eth_type_trans(), so + * skb MAC Hdr properties are not configured yet. Hence need to + * reset skb MAC header here + */ + skb_reset_mac_header(skb); + ret = am65_skb_get_mtype_seqid(skb, &skb_cb->skb_mtype_seqid); + if (!ret) + return; /* if not PTP class packet */ + + skb_cb->skb_mtype_seqid |= (AM65_CPTS_EV_RX << AM65_CPTS_EVENT_1_EVENT_TYPE_SHIFT); + + dev_dbg(cpts->dev, "%s mtype seqid %08x\n", __func__, skb_cb->skb_mtype_seqid); + + ns = am65_cpts_find_rx_ts(cpts, skb_cb->skb_mtype_seqid); + if (!ns) + return; + + ssh = skb_hwtstamps(skb); + memset(ssh, 0, sizeof(*ssh)); + ssh->hwtstamp = ns_to_ktime(ns); +} +EXPORT_SYMBOL_GPL(am65_cpts_rx_timestamp); + /** * am65_cpts_tx_timestamp - save tx packet for timestamping * @cpts: cpts handle diff --git a/drivers/net/ethernet/ti/am65-cpts.h b/drivers/net/ethernet/ti/am65-cpts.h index 6e14df0be1137..6099d772799da 100644 --- a/drivers/net/ethernet/ti/am65-cpts.h +++ b/drivers/net/ethernet/ti/am65-cpts.h @@ -22,9 +22,9 @@ void am65_cpts_release(struct am65_cpts *cpts); struct am65_cpts *am65_cpts_create(struct device *dev, void __iomem *regs, struct device_node *node); int am65_cpts_phc_index(struct am65_cpts *cpts); +void am65_cpts_rx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb); void am65_cpts_tx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb); void am65_cpts_prep_tx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb); -void am65_cpts_rx_enable(struct am65_cpts *cpts, bool en); u64 am65_cpts_ns_gettime(struct am65_cpts *cpts); int am65_cpts_estf_enable(struct am65_cpts *cpts, int idx, struct am65_cpts_estf_cfg *cfg); @@ -48,17 +48,18 @@ static inline int am65_cpts_phc_index(struct am65_cpts *cpts) return -1; } -static inline void am65_cpts_tx_timestamp(struct am65_cpts *cpts, +static inline void am65_cpts_rx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb) { } -static inline void am65_cpts_prep_tx_timestamp(struct am65_cpts *cpts, - struct sk_buff *skb) +static inline void am65_cpts_tx_timestamp(struct am65_cpts *cpts, + struct sk_buff *skb) { } -static inline void am65_cpts_rx_enable(struct am65_cpts *cpts, bool en) +static inline void am65_cpts_prep_tx_timestamp(struct am65_cpts *cpts, + struct sk_buff *skb) { } diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index 807ead6785511..fa5500decc960 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -359,6 +359,7 @@ * @app2: MM2S/S2MM User Application Field 2. * @app3: MM2S/S2MM User Application Field 3. * @app4: MM2S/S2MM User Application Field 4. + * @skb: Pointer to SKB transferred using DMA */ struct axidma_bd { u32 next; /* Physical address of next buffer descriptor */ @@ -399,7 +400,6 @@ struct skbuf_dma_descriptor { * struct axienet_local - axienet private per device data * @ndev: Pointer for net_device to which it will be attached. * @dev: Pointer to device structure - * @phy_node: Pointer to device node structure * @phylink: Pointer to phylink instance * @phylink_config: phylink configuration settings * @pcs_phy: Reference to PCS/PMA PHY if used @@ -537,7 +537,7 @@ struct axienet_local { }; /** - * struct axiethernet_option - Used to set axi ethernet hardware options + * struct axienet_option - Used to set axi ethernet hardware options * @opt: Option to be set. * @reg: Register offset to be written for setting the option * @m_or: Mask to be ORed for setting the option in the register diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c index 2f07fde361aaf..9ca2643c921e5 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c @@ -20,7 +20,14 @@ #define DEFAULT_MDIO_FREQ 2500000 /* 2.5 MHz */ #define DEFAULT_HOST_CLOCK 150000000 /* 150 MHz */ -/* Wait till MDIO interface is ready to accept a new transaction.*/ +/** + * axienet_mdio_wait_until_ready - MDIO wait function + * @lp: Pointer to axienet local data structure. + * + * Return : 0 on success, Negative value on errors + * + * Wait till MDIO interface is ready to accept a new transaction. + */ static int axienet_mdio_wait_until_ready(struct axienet_local *lp) { u32 val; @@ -30,14 +37,24 @@ static int axienet_mdio_wait_until_ready(struct axienet_local *lp) 1, 20000); } -/* Enable the MDIO MDC. Called prior to a read/write operation */ +/** + * axienet_mdio_mdc_enable - MDIO MDC enable function + * @lp: Pointer to axienet local data structure. + * + * Enable the MDIO MDC. Called prior to a read/write operation + */ static void axienet_mdio_mdc_enable(struct axienet_local *lp) { axienet_iow(lp, XAE_MDIO_MC_OFFSET, ((u32)lp->mii_clk_div | XAE_MDIO_MC_MDIOEN_MASK)); } -/* Disable the MDIO MDC. Called after a read/write operation*/ +/** + * axienet_mdio_mdc_disable - MDIO MDC disable function + * @lp: Pointer to axienet local data structure. + * + * Disable the MDIO MDC. Called after a read/write operation + */ static void axienet_mdio_mdc_disable(struct axienet_local *lp) { u32 mc_reg; diff --git a/drivers/net/pcs/pcs-rzn1-miic.c b/drivers/net/pcs/pcs-rzn1-miic.c index 4bd66fdde3674..d0a722d43368f 100644 --- a/drivers/net/pcs/pcs-rzn1-miic.c +++ b/drivers/net/pcs/pcs-rzn1-miic.c @@ -279,10 +279,38 @@ static int miic_validate(struct phylink_pcs *pcs, unsigned long *supported, return -EINVAL; } +static int miic_pre_init(struct phylink_pcs *pcs) +{ + struct miic_port *miic_port = phylink_pcs_to_miic_port(pcs); + struct miic *miic = miic_port->miic; + u32 val, mask; + + /* Start RX clock if required */ + if (pcs->rxc_always_on) { + /* In MII through mode, the clock signals will be driven by the + * external PHY, which might not be initialized yet. Set RMII + * as default mode to ensure that a reference clock signal is + * generated. + */ + miic_port->interface = PHY_INTERFACE_MODE_RMII; + + val = FIELD_PREP(MIIC_CONVCTRL_CONV_MODE, CONV_MODE_RMII) | + FIELD_PREP(MIIC_CONVCTRL_CONV_SPEED, CONV_MODE_100MBPS); + mask = MIIC_CONVCTRL_CONV_MODE | MIIC_CONVCTRL_CONV_SPEED; + + miic_reg_rmw(miic, MIIC_CONVCTRL(miic_port->port), mask, val); + + miic_converter_enable(miic, miic_port->port, 1); + } + + return 0; +} + static const struct phylink_pcs_ops miic_phylink_ops = { .pcs_validate = miic_validate, .pcs_config = miic_config, .pcs_link_up = miic_link_up, + .pcs_pre_init = miic_pre_init, }; struct phylink_pcs *miic_create(struct device *dev, struct device_node *np) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 1df0595c5ba9f..7fddc8306d822 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -76,6 +76,11 @@ config SFP comment "MII PHY device drivers" +config AIR_EN8811H_PHY + tristate "Airoha EN8811H 2.5 Gigabit PHY" + help + Currently supports the Airoha EN8811H PHY. + config AMD_PHY tristate "AMD and Altima PHYs" help diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 197acfa0b4126..202ed7f450da6 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -34,6 +34,7 @@ obj-y += $(sfp-obj-y) $(sfp-obj-m) obj-$(CONFIG_ADIN_PHY) += adin.o obj-$(CONFIG_ADIN1100_PHY) += adin1100.o +obj-$(CONFIG_AIR_EN8811H_PHY) += air_en8811h.o obj-$(CONFIG_AMD_PHY) += amd.o obj-$(CONFIG_AQUANTIA_PHY) += aquantia/ ifdef CONFIG_AX88796B_RUST_PHY diff --git a/drivers/net/phy/air_en8811h.c b/drivers/net/phy/air_en8811h.c new file mode 100644 index 0000000000000..720542a4fd82a --- /dev/null +++ b/drivers/net/phy/air_en8811h.c @@ -0,0 +1,1086 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Driver for the Airoha EN8811H 2.5 Gigabit PHY. + * + * Limitations of the EN8811H: + * - Only full duplex supported + * - Forced speed (AN off) is not supported by hardware (100Mbps) + * + * Source originated from airoha's en8811h.c and en8811h.h v1.2.1 + * + * Copyright (C) 2023 Airoha Technology Corp. + */ + +#include +#include +#include +#include +#include + +#define EN8811H_PHY_ID 0x03a2a411 + +#define EN8811H_MD32_DM "airoha/EthMD32.dm.bin" +#define EN8811H_MD32_DSP "airoha/EthMD32.DSP.bin" + +#define AIR_FW_ADDR_DM 0x00000000 +#define AIR_FW_ADDR_DSP 0x00100000 + +/* MII Registers */ +#define AIR_AUX_CTRL_STATUS 0x1d +#define AIR_AUX_CTRL_STATUS_SPEED_MASK GENMASK(4, 2) +#define AIR_AUX_CTRL_STATUS_SPEED_100 0x4 +#define AIR_AUX_CTRL_STATUS_SPEED_1000 0x8 +#define AIR_AUX_CTRL_STATUS_SPEED_2500 0xc + +#define AIR_EXT_PAGE_ACCESS 0x1f +#define AIR_PHY_PAGE_STANDARD 0x0000 +#define AIR_PHY_PAGE_EXTENDED_4 0x0004 + +/* MII Registers Page 4*/ +#define AIR_BPBUS_MODE 0x10 +#define AIR_BPBUS_MODE_ADDR_FIXED 0x0000 +#define AIR_BPBUS_MODE_ADDR_INCR BIT(15) +#define AIR_BPBUS_WR_ADDR_HIGH 0x11 +#define AIR_BPBUS_WR_ADDR_LOW 0x12 +#define AIR_BPBUS_WR_DATA_HIGH 0x13 +#define AIR_BPBUS_WR_DATA_LOW 0x14 +#define AIR_BPBUS_RD_ADDR_HIGH 0x15 +#define AIR_BPBUS_RD_ADDR_LOW 0x16 +#define AIR_BPBUS_RD_DATA_HIGH 0x17 +#define AIR_BPBUS_RD_DATA_LOW 0x18 + +/* Registers on MDIO_MMD_VEND1 */ +#define EN8811H_PHY_FW_STATUS 0x8009 +#define EN8811H_PHY_READY 0x02 + +#define AIR_PHY_MCU_CMD_1 0x800c +#define AIR_PHY_MCU_CMD_1_MODE1 0x0 +#define AIR_PHY_MCU_CMD_2 0x800d +#define AIR_PHY_MCU_CMD_2_MODE1 0x0 +#define AIR_PHY_MCU_CMD_3 0x800e +#define AIR_PHY_MCU_CMD_3_MODE1 0x1101 +#define AIR_PHY_MCU_CMD_3_DOCMD 0x1100 +#define AIR_PHY_MCU_CMD_4 0x800f +#define AIR_PHY_MCU_CMD_4_MODE1 0x0002 +#define AIR_PHY_MCU_CMD_4_INTCLR 0x00e4 + +/* Registers on MDIO_MMD_VEND2 */ +#define AIR_PHY_LED_BCR 0x021 +#define AIR_PHY_LED_BCR_MODE_MASK GENMASK(1, 0) +#define AIR_PHY_LED_BCR_TIME_TEST BIT(2) +#define AIR_PHY_LED_BCR_CLK_EN BIT(3) +#define AIR_PHY_LED_BCR_EXT_CTRL BIT(15) + +#define AIR_PHY_LED_DUR_ON 0x022 + +#define AIR_PHY_LED_DUR_BLINK 0x023 + +#define AIR_PHY_LED_ON(i) (0x024 + ((i) * 2)) +#define AIR_PHY_LED_ON_MASK (GENMASK(6, 0) | BIT(8)) +#define AIR_PHY_LED_ON_LINK1000 BIT(0) +#define AIR_PHY_LED_ON_LINK100 BIT(1) +#define AIR_PHY_LED_ON_LINK10 BIT(2) +#define AIR_PHY_LED_ON_LINKDOWN BIT(3) +#define AIR_PHY_LED_ON_FDX BIT(4) /* Full duplex */ +#define AIR_PHY_LED_ON_HDX BIT(5) /* Half duplex */ +#define AIR_PHY_LED_ON_FORCE_ON BIT(6) +#define AIR_PHY_LED_ON_LINK2500 BIT(8) +#define AIR_PHY_LED_ON_POLARITY BIT(14) +#define AIR_PHY_LED_ON_ENABLE BIT(15) + +#define AIR_PHY_LED_BLINK(i) (0x025 + ((i) * 2)) +#define AIR_PHY_LED_BLINK_1000TX BIT(0) +#define AIR_PHY_LED_BLINK_1000RX BIT(1) +#define AIR_PHY_LED_BLINK_100TX BIT(2) +#define AIR_PHY_LED_BLINK_100RX BIT(3) +#define AIR_PHY_LED_BLINK_10TX BIT(4) +#define AIR_PHY_LED_BLINK_10RX BIT(5) +#define AIR_PHY_LED_BLINK_COLLISION BIT(6) +#define AIR_PHY_LED_BLINK_RX_CRC_ERR BIT(7) +#define AIR_PHY_LED_BLINK_RX_IDLE_ERR BIT(8) +#define AIR_PHY_LED_BLINK_FORCE_BLINK BIT(9) +#define AIR_PHY_LED_BLINK_2500TX BIT(10) +#define AIR_PHY_LED_BLINK_2500RX BIT(11) + +/* Registers on BUCKPBUS */ +#define EN8811H_2P5G_LPA 0x3b30 +#define EN8811H_2P5G_LPA_2P5G BIT(0) + +#define EN8811H_FW_VERSION 0x3b3c + +#define EN8811H_POLARITY 0xca0f8 +#define EN8811H_POLARITY_TX_NORMAL BIT(0) +#define EN8811H_POLARITY_RX_REVERSE BIT(1) + +#define EN8811H_GPIO_OUTPUT 0xcf8b8 +#define EN8811H_GPIO_OUTPUT_345 (BIT(3) | BIT(4) | BIT(5)) + +#define EN8811H_FW_CTRL_1 0x0f0018 +#define EN8811H_FW_CTRL_1_START 0x0 +#define EN8811H_FW_CTRL_1_FINISH 0x1 +#define EN8811H_FW_CTRL_2 0x800000 +#define EN8811H_FW_CTRL_2_LOADING BIT(11) + +/* Led definitions */ +#define EN8811H_LED_COUNT 3 + +/* Default LED setup: + * GPIO5 <-> LED0 On: Link detected, blink Rx/Tx + * GPIO4 <-> LED1 On: Link detected at 2500 or 1000 Mbps + * GPIO3 <-> LED2 On: Link detected at 2500 or 100 Mbps + */ +#define AIR_DEFAULT_TRIGGER_LED0 (BIT(TRIGGER_NETDEV_LINK) | \ + BIT(TRIGGER_NETDEV_RX) | \ + BIT(TRIGGER_NETDEV_TX)) +#define AIR_DEFAULT_TRIGGER_LED1 (BIT(TRIGGER_NETDEV_LINK_2500) | \ + BIT(TRIGGER_NETDEV_LINK_1000)) +#define AIR_DEFAULT_TRIGGER_LED2 (BIT(TRIGGER_NETDEV_LINK_2500) | \ + BIT(TRIGGER_NETDEV_LINK_100)) + +struct led { + unsigned long rules; + unsigned long state; +}; + +struct en8811h_priv { + u32 firmware_version; + bool mcu_needs_restart; + struct led led[EN8811H_LED_COUNT]; +}; + +enum { + AIR_PHY_LED_STATE_FORCE_ON, + AIR_PHY_LED_STATE_FORCE_BLINK, +}; + +enum { + AIR_PHY_LED_DUR_BLINK_32MS, + AIR_PHY_LED_DUR_BLINK_64MS, + AIR_PHY_LED_DUR_BLINK_128MS, + AIR_PHY_LED_DUR_BLINK_256MS, + AIR_PHY_LED_DUR_BLINK_512MS, + AIR_PHY_LED_DUR_BLINK_1024MS, +}; + +enum { + AIR_LED_DISABLE, + AIR_LED_ENABLE, +}; + +enum { + AIR_ACTIVE_LOW, + AIR_ACTIVE_HIGH, +}; + +enum { + AIR_LED_MODE_DISABLE, + AIR_LED_MODE_USER_DEFINE, +}; + +#define AIR_PHY_LED_DUR_UNIT 1024 +#define AIR_PHY_LED_DUR (AIR_PHY_LED_DUR_UNIT << AIR_PHY_LED_DUR_BLINK_64MS) + +static const unsigned long en8811h_led_trig = BIT(TRIGGER_NETDEV_FULL_DUPLEX) | + BIT(TRIGGER_NETDEV_LINK) | + BIT(TRIGGER_NETDEV_LINK_10) | + BIT(TRIGGER_NETDEV_LINK_100) | + BIT(TRIGGER_NETDEV_LINK_1000) | + BIT(TRIGGER_NETDEV_LINK_2500) | + BIT(TRIGGER_NETDEV_RX) | + BIT(TRIGGER_NETDEV_TX); + +static int air_phy_read_page(struct phy_device *phydev) +{ + return __phy_read(phydev, AIR_EXT_PAGE_ACCESS); +} + +static int air_phy_write_page(struct phy_device *phydev, int page) +{ + return __phy_write(phydev, AIR_EXT_PAGE_ACCESS, page); +} + +static int __air_buckpbus_reg_write(struct phy_device *phydev, + u32 pbus_address, u32 pbus_data) +{ + int ret; + + ret = __phy_write(phydev, AIR_BPBUS_MODE, AIR_BPBUS_MODE_ADDR_FIXED); + if (ret < 0) + return ret; + + ret = __phy_write(phydev, AIR_BPBUS_WR_ADDR_HIGH, + upper_16_bits(pbus_address)); + if (ret < 0) + return ret; + + ret = __phy_write(phydev, AIR_BPBUS_WR_ADDR_LOW, + lower_16_bits(pbus_address)); + if (ret < 0) + return ret; + + ret = __phy_write(phydev, AIR_BPBUS_WR_DATA_HIGH, + upper_16_bits(pbus_data)); + if (ret < 0) + return ret; + + ret = __phy_write(phydev, AIR_BPBUS_WR_DATA_LOW, + lower_16_bits(pbus_data)); + if (ret < 0) + return ret; + + return 0; +} + +static int air_buckpbus_reg_write(struct phy_device *phydev, + u32 pbus_address, u32 pbus_data) +{ + int saved_page; + int ret = 0; + + saved_page = phy_select_page(phydev, AIR_PHY_PAGE_EXTENDED_4); + + if (saved_page >= 0) { + ret = __air_buckpbus_reg_write(phydev, pbus_address, + pbus_data); + if (ret < 0) + phydev_err(phydev, "%s 0x%08x failed: %d\n", __func__, + pbus_address, ret); + } + + return phy_restore_page(phydev, saved_page, ret); +} + +static int __air_buckpbus_reg_read(struct phy_device *phydev, + u32 pbus_address, u32 *pbus_data) +{ + int pbus_data_low, pbus_data_high; + int ret; + + ret = __phy_write(phydev, AIR_BPBUS_MODE, AIR_BPBUS_MODE_ADDR_FIXED); + if (ret < 0) + return ret; + + ret = __phy_write(phydev, AIR_BPBUS_RD_ADDR_HIGH, + upper_16_bits(pbus_address)); + if (ret < 0) + return ret; + + ret = __phy_write(phydev, AIR_BPBUS_RD_ADDR_LOW, + lower_16_bits(pbus_address)); + if (ret < 0) + return ret; + + pbus_data_high = __phy_read(phydev, AIR_BPBUS_RD_DATA_HIGH); + if (pbus_data_high < 0) + return ret; + + pbus_data_low = __phy_read(phydev, AIR_BPBUS_RD_DATA_LOW); + if (pbus_data_low < 0) + return ret; + + *pbus_data = pbus_data_low | (pbus_data_high << 16); + return 0; +} + +static int air_buckpbus_reg_read(struct phy_device *phydev, + u32 pbus_address, u32 *pbus_data) +{ + int saved_page; + int ret = 0; + + saved_page = phy_select_page(phydev, AIR_PHY_PAGE_EXTENDED_4); + + if (saved_page >= 0) { + ret = __air_buckpbus_reg_read(phydev, pbus_address, pbus_data); + if (ret < 0) + phydev_err(phydev, "%s 0x%08x failed: %d\n", __func__, + pbus_address, ret); + } + + return phy_restore_page(phydev, saved_page, ret); +} + +static int __air_buckpbus_reg_modify(struct phy_device *phydev, + u32 pbus_address, u32 mask, u32 set) +{ + int pbus_data_low, pbus_data_high; + u32 pbus_data_old, pbus_data_new; + int ret; + + ret = __phy_write(phydev, AIR_BPBUS_MODE, AIR_BPBUS_MODE_ADDR_FIXED); + if (ret < 0) + return ret; + + ret = __phy_write(phydev, AIR_BPBUS_RD_ADDR_HIGH, + upper_16_bits(pbus_address)); + if (ret < 0) + return ret; + + ret = __phy_write(phydev, AIR_BPBUS_RD_ADDR_LOW, + lower_16_bits(pbus_address)); + if (ret < 0) + return ret; + + pbus_data_high = __phy_read(phydev, AIR_BPBUS_RD_DATA_HIGH); + if (pbus_data_high < 0) + return ret; + + pbus_data_low = __phy_read(phydev, AIR_BPBUS_RD_DATA_LOW); + if (pbus_data_low < 0) + return ret; + + pbus_data_old = pbus_data_low | (pbus_data_high << 16); + pbus_data_new = (pbus_data_old & ~mask) | set; + if (pbus_data_new == pbus_data_old) + return 0; + + ret = __phy_write(phydev, AIR_BPBUS_WR_ADDR_HIGH, + upper_16_bits(pbus_address)); + if (ret < 0) + return ret; + + ret = __phy_write(phydev, AIR_BPBUS_WR_ADDR_LOW, + lower_16_bits(pbus_address)); + if (ret < 0) + return ret; + + ret = __phy_write(phydev, AIR_BPBUS_WR_DATA_HIGH, + upper_16_bits(pbus_data_new)); + if (ret < 0) + return ret; + + ret = __phy_write(phydev, AIR_BPBUS_WR_DATA_LOW, + lower_16_bits(pbus_data_new)); + if (ret < 0) + return ret; + + return 0; +} + +static int air_buckpbus_reg_modify(struct phy_device *phydev, + u32 pbus_address, u32 mask, u32 set) +{ + int saved_page; + int ret = 0; + + saved_page = phy_select_page(phydev, AIR_PHY_PAGE_EXTENDED_4); + + if (saved_page >= 0) { + ret = __air_buckpbus_reg_modify(phydev, pbus_address, mask, + set); + if (ret < 0) + phydev_err(phydev, "%s 0x%08x failed: %d\n", __func__, + pbus_address, ret); + } + + return phy_restore_page(phydev, saved_page, ret); +} + +static int __air_write_buf(struct phy_device *phydev, u32 address, + const struct firmware *fw) +{ + unsigned int offset; + int ret; + u16 val; + + ret = __phy_write(phydev, AIR_BPBUS_MODE, AIR_BPBUS_MODE_ADDR_INCR); + if (ret < 0) + return ret; + + ret = __phy_write(phydev, AIR_BPBUS_WR_ADDR_HIGH, + upper_16_bits(address)); + if (ret < 0) + return ret; + + ret = __phy_write(phydev, AIR_BPBUS_WR_ADDR_LOW, + lower_16_bits(address)); + if (ret < 0) + return ret; + + for (offset = 0; offset < fw->size; offset += 4) { + val = get_unaligned_le16(&fw->data[offset + 2]); + ret = __phy_write(phydev, AIR_BPBUS_WR_DATA_HIGH, val); + if (ret < 0) + return ret; + + val = get_unaligned_le16(&fw->data[offset]); + ret = __phy_write(phydev, AIR_BPBUS_WR_DATA_LOW, val); + if (ret < 0) + return ret; + } + + return 0; +} + +static int air_write_buf(struct phy_device *phydev, u32 address, + const struct firmware *fw) +{ + int saved_page; + int ret = 0; + + saved_page = phy_select_page(phydev, AIR_PHY_PAGE_EXTENDED_4); + + if (saved_page >= 0) { + ret = __air_write_buf(phydev, address, fw); + if (ret < 0) + phydev_err(phydev, "%s 0x%08x failed: %d\n", __func__, + address, ret); + } + + return phy_restore_page(phydev, saved_page, ret); +} + +static int en8811h_wait_mcu_ready(struct phy_device *phydev) +{ + int ret, reg_value; + + /* Because of mdio-lock, may have to wait for multiple loads */ + ret = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1, + EN8811H_PHY_FW_STATUS, reg_value, + reg_value == EN8811H_PHY_READY, + 20000, 7500000, true); + if (ret) { + phydev_err(phydev, "MCU not ready: 0x%x\n", reg_value); + return -ENODEV; + } + + return 0; +} + +static int en8811h_load_firmware(struct phy_device *phydev) +{ + struct en8811h_priv *priv = phydev->priv; + struct device *dev = &phydev->mdio.dev; + const struct firmware *fw1, *fw2; + int ret; + + ret = request_firmware_direct(&fw1, EN8811H_MD32_DM, dev); + if (ret < 0) + return ret; + + ret = request_firmware_direct(&fw2, EN8811H_MD32_DSP, dev); + if (ret < 0) + goto en8811h_load_firmware_rel1; + + ret = air_buckpbus_reg_write(phydev, EN8811H_FW_CTRL_1, + EN8811H_FW_CTRL_1_START); + if (ret < 0) + goto en8811h_load_firmware_out; + + ret = air_buckpbus_reg_modify(phydev, EN8811H_FW_CTRL_2, + EN8811H_FW_CTRL_2_LOADING, + EN8811H_FW_CTRL_2_LOADING); + if (ret < 0) + goto en8811h_load_firmware_out; + + ret = air_write_buf(phydev, AIR_FW_ADDR_DM, fw1); + if (ret < 0) + goto en8811h_load_firmware_out; + + ret = air_write_buf(phydev, AIR_FW_ADDR_DSP, fw2); + if (ret < 0) + goto en8811h_load_firmware_out; + + ret = air_buckpbus_reg_modify(phydev, EN8811H_FW_CTRL_2, + EN8811H_FW_CTRL_2_LOADING, 0); + if (ret < 0) + goto en8811h_load_firmware_out; + + ret = air_buckpbus_reg_write(phydev, EN8811H_FW_CTRL_1, + EN8811H_FW_CTRL_1_FINISH); + if (ret < 0) + goto en8811h_load_firmware_out; + + ret = en8811h_wait_mcu_ready(phydev); + + air_buckpbus_reg_read(phydev, EN8811H_FW_VERSION, + &priv->firmware_version); + phydev_info(phydev, "MD32 firmware version: %08x\n", + priv->firmware_version); + +en8811h_load_firmware_out: + release_firmware(fw2); + +en8811h_load_firmware_rel1: + release_firmware(fw1); + + if (ret < 0) + phydev_err(phydev, "Load firmware failed: %d\n", ret); + + return ret; +} + +static int en8811h_restart_mcu(struct phy_device *phydev) +{ + int ret; + + ret = air_buckpbus_reg_write(phydev, EN8811H_FW_CTRL_1, + EN8811H_FW_CTRL_1_START); + if (ret < 0) + return ret; + + ret = air_buckpbus_reg_write(phydev, EN8811H_FW_CTRL_1, + EN8811H_FW_CTRL_1_FINISH); + if (ret < 0) + return ret; + + return en8811h_wait_mcu_ready(phydev); +} + +static int air_hw_led_on_set(struct phy_device *phydev, u8 index, bool on) +{ + struct en8811h_priv *priv = phydev->priv; + bool changed; + + if (index >= EN8811H_LED_COUNT) + return -EINVAL; + + if (on) + changed = !test_and_set_bit(AIR_PHY_LED_STATE_FORCE_ON, + &priv->led[index].state); + else + changed = !!test_and_clear_bit(AIR_PHY_LED_STATE_FORCE_ON, + &priv->led[index].state); + + changed |= (priv->led[index].rules != 0); + + if (changed) + return phy_modify_mmd(phydev, MDIO_MMD_VEND2, + AIR_PHY_LED_ON(index), + AIR_PHY_LED_ON_MASK, + on ? AIR_PHY_LED_ON_FORCE_ON : 0); + + return 0; +} + +static int air_hw_led_blink_set(struct phy_device *phydev, u8 index, + bool blinking) +{ + struct en8811h_priv *priv = phydev->priv; + bool changed; + + if (index >= EN8811H_LED_COUNT) + return -EINVAL; + + if (blinking) + changed = !test_and_set_bit(AIR_PHY_LED_STATE_FORCE_BLINK, + &priv->led[index].state); + else + changed = !!test_and_clear_bit(AIR_PHY_LED_STATE_FORCE_BLINK, + &priv->led[index].state); + + changed |= (priv->led[index].rules != 0); + + if (changed) + return phy_write_mmd(phydev, MDIO_MMD_VEND2, + AIR_PHY_LED_BLINK(index), + blinking ? + AIR_PHY_LED_BLINK_FORCE_BLINK : 0); + else + return 0; +} + +static int air_led_blink_set(struct phy_device *phydev, u8 index, + unsigned long *delay_on, + unsigned long *delay_off) +{ + struct en8811h_priv *priv = phydev->priv; + bool blinking = false; + int err; + + if (index >= EN8811H_LED_COUNT) + return -EINVAL; + + if (delay_on && delay_off && (*delay_on > 0) && (*delay_off > 0)) { + blinking = true; + *delay_on = 50; + *delay_off = 50; + } + + err = air_hw_led_blink_set(phydev, index, blinking); + if (err) + return err; + + /* led-blink set, so switch led-on off */ + err = air_hw_led_on_set(phydev, index, false); + if (err) + return err; + + /* hw-control is off*/ + if (!!test_bit(AIR_PHY_LED_STATE_FORCE_BLINK, &priv->led[index].state)) + priv->led[index].rules = 0; + + return 0; +} + +static int air_led_brightness_set(struct phy_device *phydev, u8 index, + enum led_brightness value) +{ + struct en8811h_priv *priv = phydev->priv; + int err; + + if (index >= EN8811H_LED_COUNT) + return -EINVAL; + + /* led-on set, so switch led-blink off */ + err = air_hw_led_blink_set(phydev, index, false); + if (err) + return err; + + err = air_hw_led_on_set(phydev, index, (value != LED_OFF)); + if (err) + return err; + + /* hw-control is off */ + if (!!test_bit(AIR_PHY_LED_STATE_FORCE_ON, &priv->led[index].state)) + priv->led[index].rules = 0; + + return 0; +} + +static int air_led_hw_control_get(struct phy_device *phydev, u8 index, + unsigned long *rules) +{ + struct en8811h_priv *priv = phydev->priv; + + if (index >= EN8811H_LED_COUNT) + return -EINVAL; + + *rules = priv->led[index].rules; + + return 0; +}; + +static int air_led_hw_control_set(struct phy_device *phydev, u8 index, + unsigned long rules) +{ + struct en8811h_priv *priv = phydev->priv; + u16 on = 0, blink = 0; + int ret; + + if (index >= EN8811H_LED_COUNT) + return -EINVAL; + + priv->led[index].rules = rules; + + if (rules & BIT(TRIGGER_NETDEV_FULL_DUPLEX)) + on |= AIR_PHY_LED_ON_FDX; + + if (rules & (BIT(TRIGGER_NETDEV_LINK_10) | BIT(TRIGGER_NETDEV_LINK))) + on |= AIR_PHY_LED_ON_LINK10; + + if (rules & (BIT(TRIGGER_NETDEV_LINK_100) | BIT(TRIGGER_NETDEV_LINK))) + on |= AIR_PHY_LED_ON_LINK100; + + if (rules & (BIT(TRIGGER_NETDEV_LINK_1000) | BIT(TRIGGER_NETDEV_LINK))) + on |= AIR_PHY_LED_ON_LINK1000; + + if (rules & (BIT(TRIGGER_NETDEV_LINK_2500) | BIT(TRIGGER_NETDEV_LINK))) + on |= AIR_PHY_LED_ON_LINK2500; + + if (rules & BIT(TRIGGER_NETDEV_RX)) { + blink |= AIR_PHY_LED_BLINK_10RX | + AIR_PHY_LED_BLINK_100RX | + AIR_PHY_LED_BLINK_1000RX | + AIR_PHY_LED_BLINK_2500RX; + } + + if (rules & BIT(TRIGGER_NETDEV_TX)) { + blink |= AIR_PHY_LED_BLINK_10TX | + AIR_PHY_LED_BLINK_100TX | + AIR_PHY_LED_BLINK_1000TX | + AIR_PHY_LED_BLINK_2500TX; + } + + if (blink || on) { + /* switch hw-control on, so led-on and led-blink are off */ + clear_bit(AIR_PHY_LED_STATE_FORCE_ON, + &priv->led[index].state); + clear_bit(AIR_PHY_LED_STATE_FORCE_BLINK, + &priv->led[index].state); + } else { + priv->led[index].rules = 0; + } + + ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, AIR_PHY_LED_ON(index), + AIR_PHY_LED_ON_MASK, on); + + if (ret < 0) + return ret; + + return phy_write_mmd(phydev, MDIO_MMD_VEND2, AIR_PHY_LED_BLINK(index), + blink); +}; + +static int air_led_init(struct phy_device *phydev, u8 index, u8 state, u8 pol) +{ + int val = 0; + int err; + + if (index >= EN8811H_LED_COUNT) + return -EINVAL; + + if (state == AIR_LED_ENABLE) + val |= AIR_PHY_LED_ON_ENABLE; + else + val &= ~AIR_PHY_LED_ON_ENABLE; + + if (pol == AIR_ACTIVE_HIGH) + val |= AIR_PHY_LED_ON_POLARITY; + else + val &= ~AIR_PHY_LED_ON_POLARITY; + + err = phy_modify_mmd(phydev, MDIO_MMD_VEND2, AIR_PHY_LED_ON(index), + AIR_PHY_LED_ON_ENABLE | + AIR_PHY_LED_ON_POLARITY, val); + + if (err < 0) + return err; + + return 0; +} + +static int air_leds_init(struct phy_device *phydev, int num, int dur, int mode) +{ + struct en8811h_priv *priv = phydev->priv; + int ret, i; + + ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, AIR_PHY_LED_DUR_BLINK, + dur); + if (ret < 0) + return ret; + + ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, AIR_PHY_LED_DUR_ON, + dur >> 1); + if (ret < 0) + return ret; + + switch (mode) { + case AIR_LED_MODE_DISABLE: + ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, AIR_PHY_LED_BCR, + AIR_PHY_LED_BCR_EXT_CTRL | + AIR_PHY_LED_BCR_MODE_MASK, 0); + if (ret < 0) + return ret; + break; + case AIR_LED_MODE_USER_DEFINE: + ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, AIR_PHY_LED_BCR, + AIR_PHY_LED_BCR_EXT_CTRL | + AIR_PHY_LED_BCR_CLK_EN, + AIR_PHY_LED_BCR_EXT_CTRL | + AIR_PHY_LED_BCR_CLK_EN); + if (ret < 0) + return ret; + break; + default: + phydev_err(phydev, "LED mode %d is not supported\n", mode); + return -EINVAL; + } + + for (i = 0; i < num; ++i) { + ret = air_led_init(phydev, i, AIR_LED_ENABLE, AIR_ACTIVE_HIGH); + if (ret < 0) { + phydev_err(phydev, "LED%d init failed: %d\n", i, ret); + return ret; + } + air_led_hw_control_set(phydev, i, priv->led[i].rules); + } + + return 0; +} + +static int en8811h_led_hw_is_supported(struct phy_device *phydev, u8 index, + unsigned long rules) +{ + if (index >= EN8811H_LED_COUNT) + return -EINVAL; + + /* All combinations of the supported triggers are allowed */ + if (rules & ~en8811h_led_trig) + return -EOPNOTSUPP; + + return 0; +}; + +static int en8811h_probe(struct phy_device *phydev) +{ + struct en8811h_priv *priv; + int ret; + + priv = devm_kzalloc(&phydev->mdio.dev, sizeof(struct en8811h_priv), + GFP_KERNEL); + if (!priv) + return -ENOMEM; + phydev->priv = priv; + + ret = en8811h_load_firmware(phydev); + if (ret < 0) + return ret; + + /* mcu has just restarted after firmware load */ + priv->mcu_needs_restart = false; + + priv->led[0].rules = AIR_DEFAULT_TRIGGER_LED0; + priv->led[1].rules = AIR_DEFAULT_TRIGGER_LED1; + priv->led[2].rules = AIR_DEFAULT_TRIGGER_LED2; + + /* MDIO_DEVS1/2 empty, so set mmds_present bits here */ + phydev->c45_ids.mmds_present |= MDIO_DEVS_PMAPMD | MDIO_DEVS_AN; + + ret = air_leds_init(phydev, EN8811H_LED_COUNT, AIR_PHY_LED_DUR, + AIR_LED_MODE_DISABLE); + if (ret < 0) { + phydev_err(phydev, "Failed to disable leds: %d\n", ret); + return ret; + } + + /* Configure led gpio pins as output */ + ret = air_buckpbus_reg_modify(phydev, EN8811H_GPIO_OUTPUT, + EN8811H_GPIO_OUTPUT_345, + EN8811H_GPIO_OUTPUT_345); + if (ret < 0) + return ret; + + return 0; +} + +static int en8811h_config_init(struct phy_device *phydev) +{ + struct en8811h_priv *priv = phydev->priv; + struct device *dev = &phydev->mdio.dev; + u32 pbus_value; + int ret; + + /* If restart happened in .probe(), no need to restart now */ + if (priv->mcu_needs_restart) { + ret = en8811h_restart_mcu(phydev); + if (ret < 0) + return ret; + } else { + /* Next calls to .config_init() mcu needs to restart */ + priv->mcu_needs_restart = true; + } + + /* Select mode 1, the only mode supported. + * Configures the SerDes for 2500Base-X with rate adaptation + */ + ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, AIR_PHY_MCU_CMD_1, + AIR_PHY_MCU_CMD_1_MODE1); + if (ret < 0) + return ret; + ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, AIR_PHY_MCU_CMD_2, + AIR_PHY_MCU_CMD_2_MODE1); + if (ret < 0) + return ret; + ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, AIR_PHY_MCU_CMD_3, + AIR_PHY_MCU_CMD_3_MODE1); + if (ret < 0) + return ret; + ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, AIR_PHY_MCU_CMD_4, + AIR_PHY_MCU_CMD_4_MODE1); + if (ret < 0) + return ret; + + /* Serdes polarity */ + pbus_value = 0; + if (device_property_read_bool(dev, "airoha,pnswap-rx")) + pbus_value |= EN8811H_POLARITY_RX_REVERSE; + else + pbus_value &= ~EN8811H_POLARITY_RX_REVERSE; + if (device_property_read_bool(dev, "airoha,pnswap-tx")) + pbus_value &= ~EN8811H_POLARITY_TX_NORMAL; + else + pbus_value |= EN8811H_POLARITY_TX_NORMAL; + ret = air_buckpbus_reg_modify(phydev, EN8811H_POLARITY, + EN8811H_POLARITY_RX_REVERSE | + EN8811H_POLARITY_TX_NORMAL, pbus_value); + if (ret < 0) + return ret; + + ret = air_leds_init(phydev, EN8811H_LED_COUNT, AIR_PHY_LED_DUR, + AIR_LED_MODE_USER_DEFINE); + if (ret < 0) { + phydev_err(phydev, "Failed to initialize leds: %d\n", ret); + return ret; + } + + return 0; +} + +static int en8811h_get_features(struct phy_device *phydev) +{ + linkmode_set_bit_array(phy_basic_ports_array, + ARRAY_SIZE(phy_basic_ports_array), + phydev->supported); + + return genphy_c45_pma_read_abilities(phydev); +} + +static int en8811h_get_rate_matching(struct phy_device *phydev, + phy_interface_t iface) +{ + return RATE_MATCH_PAUSE; +} + +static int en8811h_config_aneg(struct phy_device *phydev) +{ + bool changed = false; + int ret; + u32 adv; + + if (phydev->autoneg == AUTONEG_DISABLE) { + phydev_warn(phydev, "Disabling autoneg is not supported\n"); + return -EINVAL; + } + + adv = linkmode_adv_to_mii_10gbt_adv_t(phydev->advertising); + + ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL, + MDIO_AN_10GBT_CTRL_ADV2_5G, adv); + if (ret < 0) + return ret; + if (ret > 0) + changed = true; + + return __genphy_config_aneg(phydev, changed); +} + +static int en8811h_read_status(struct phy_device *phydev) +{ + struct en8811h_priv *priv = phydev->priv; + u32 pbus_value; + int ret, val; + + ret = genphy_update_link(phydev); + if (ret) + return ret; + + phydev->master_slave_get = MASTER_SLAVE_CFG_UNSUPPORTED; + phydev->master_slave_state = MASTER_SLAVE_STATE_UNSUPPORTED; + phydev->speed = SPEED_UNKNOWN; + phydev->duplex = DUPLEX_UNKNOWN; + phydev->pause = 0; + phydev->asym_pause = 0; + phydev->rate_matching = RATE_MATCH_PAUSE; + + ret = genphy_read_master_slave(phydev); + if (ret < 0) + return ret; + + ret = genphy_read_lpa(phydev); + if (ret < 0) + return ret; + + /* Get link partner 2.5GBASE-T ability from vendor register */ + ret = air_buckpbus_reg_read(phydev, EN8811H_2P5G_LPA, &pbus_value); + if (ret < 0) + return ret; + linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + phydev->lp_advertising, + pbus_value & EN8811H_2P5G_LPA_2P5G); + + if (phydev->autoneg_complete) + phy_resolve_aneg_pause(phydev); + + if (!phydev->link) + return 0; + + /* Get real speed from vendor register */ + val = phy_read(phydev, AIR_AUX_CTRL_STATUS); + if (val < 0) + return val; + switch (val & AIR_AUX_CTRL_STATUS_SPEED_MASK) { + case AIR_AUX_CTRL_STATUS_SPEED_2500: + phydev->speed = SPEED_2500; + break; + case AIR_AUX_CTRL_STATUS_SPEED_1000: + phydev->speed = SPEED_1000; + break; + case AIR_AUX_CTRL_STATUS_SPEED_100: + phydev->speed = SPEED_100; + break; + } + + /* Firmware before version 24011202 has no vendor register 2P5G_LPA. + * Assume link partner advertised it if connected at 2500Mbps. + */ + if (priv->firmware_version < 0x24011202) { + linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + phydev->lp_advertising, + phydev->speed == SPEED_2500); + } + + /* Only supports full duplex */ + phydev->duplex = DUPLEX_FULL; + + return 0; +} + +static int en8811h_clear_intr(struct phy_device *phydev) +{ + int ret; + + ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, AIR_PHY_MCU_CMD_3, + AIR_PHY_MCU_CMD_3_DOCMD); + if (ret < 0) + return ret; + + ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, AIR_PHY_MCU_CMD_4, + AIR_PHY_MCU_CMD_4_INTCLR); + if (ret < 0) + return ret; + + return 0; +} + +static irqreturn_t en8811h_handle_interrupt(struct phy_device *phydev) +{ + int ret; + + ret = en8811h_clear_intr(phydev); + if (ret < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; +} + +static struct phy_driver en8811h_driver[] = { +{ + PHY_ID_MATCH_MODEL(EN8811H_PHY_ID), + .name = "Airoha EN8811H", + .probe = en8811h_probe, + .get_features = en8811h_get_features, + .config_init = en8811h_config_init, + .get_rate_matching = en8811h_get_rate_matching, + .config_aneg = en8811h_config_aneg, + .read_status = en8811h_read_status, + .config_intr = en8811h_clear_intr, + .handle_interrupt = en8811h_handle_interrupt, + .led_hw_is_supported = en8811h_led_hw_is_supported, + .read_page = air_phy_read_page, + .write_page = air_phy_write_page, + .led_blink_set = air_led_blink_set, + .led_brightness_set = air_led_brightness_set, + .led_hw_control_set = air_led_hw_control_set, + .led_hw_control_get = air_led_hw_control_get, +} }; + +module_phy_driver(en8811h_driver); + +static struct mdio_device_id __maybe_unused en8811h_tbl[] = { + { PHY_ID_MATCH_MODEL(EN8811H_PHY_ID) }, + { } +}; + +MODULE_DEVICE_TABLE(mdio, en8811h_tbl); +MODULE_FIRMWARE(EN8811H_MD32_DM); +MODULE_FIRMWARE(EN8811H_MD32_DSP); + +MODULE_DESCRIPTION("Airoha EN8811H PHY drivers"); +MODULE_AUTHOR("Airoha"); +MODULE_AUTHOR("Eric Woudstra "); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index c3426a17e6d06..683ebb13bd4f5 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -140,10 +140,11 @@ struct dp83822_private { u16 fx_sd_enable; u8 cfg_dac_minus; u8 cfg_dac_plus; + struct ethtool_wolinfo wol; }; -static int dp83822_set_wol(struct phy_device *phydev, - struct ethtool_wolinfo *wol) +static int _dp83822_set_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol) { struct net_device *ndev = phydev->attached_dev; u16 value; @@ -197,10 +198,25 @@ static int dp83822_set_wol(struct phy_device *phydev, MII_DP83822_WOL_CFG, value); } else { return phy_clear_bits_mmd(phydev, DP83822_DEVADDR, - MII_DP83822_WOL_CFG, DP83822_WOL_EN); + MII_DP83822_WOL_CFG, + DP83822_WOL_EN | + DP83822_WOL_MAGIC_EN | + DP83822_WOL_SECURE_ON); } } +static int dp83822_set_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol) +{ + struct dp83822_private *dp83822 = phydev->priv; + int ret; + + ret = _dp83822_set_wol(phydev, wol); + if (!ret) + memcpy(&dp83822->wol, wol, sizeof(*wol)); + return ret; +} + static void dp83822_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol) { @@ -346,13 +362,6 @@ static irqreturn_t dp83822_handle_interrupt(struct phy_device *phydev) return IRQ_HANDLED; } -static int dp8382x_disable_wol(struct phy_device *phydev) -{ - return phy_clear_bits_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG, - DP83822_WOL_EN | DP83822_WOL_MAGIC_EN | - DP83822_WOL_SECURE_ON); -} - static int dp83822_read_status(struct phy_device *phydev) { struct dp83822_private *dp83822 = phydev->priv; @@ -496,7 +505,7 @@ static int dp83822_config_init(struct phy_device *phydev) return err; } } - return dp8382x_disable_wol(phydev); + return _dp83822_set_wol(phydev, &dp83822->wol); } static int dp83826_config_rmii_mode(struct phy_device *phydev) @@ -575,12 +584,14 @@ static int dp83826_config_init(struct phy_device *phydev) return ret; } - return dp8382x_disable_wol(phydev); + return _dp83822_set_wol(phydev, &dp83822->wol); } static int dp8382x_config_init(struct phy_device *phydev) { - return dp8382x_disable_wol(phydev); + struct dp83822_private *dp83822 = phydev->priv; + + return _dp83822_set_wol(phydev, &dp83822->wol); } static int dp83822_phy_reset(struct phy_device *phydev) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 8b8634600c519..0f8a8ad7ea0bd 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2537,7 +2537,7 @@ static void lan8814_txtstamp(struct mii_timestamper *mii_ts, } } -static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig) +static bool lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig) { struct ptp_header *ptp_header; u32 type; @@ -2547,7 +2547,11 @@ static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig) ptp_header = ptp_parse_header(skb, type); skb_pull_inline(skb, ETH_HLEN); + if (!ptp_header) + return false; + *sig = (__force u16)(ntohs(ptp_header->sequence_id)); + return true; } static bool lan8814_match_rx_skb(struct kszphy_ptp_priv *ptp_priv, @@ -2559,7 +2563,8 @@ static bool lan8814_match_rx_skb(struct kszphy_ptp_priv *ptp_priv, bool ret = false; u16 skb_sig; - lan8814_get_sig_rx(skb, &skb_sig); + if (!lan8814_get_sig_rx(skb, &skb_sig)) + return ret; /* Iterate over all RX timestamps and match it with the received skbs */ spin_lock_irqsave(&ptp_priv->rx_ts_lock, flags); @@ -2834,7 +2839,7 @@ static int lan8814_ptpci_adjfine(struct ptp_clock_info *ptpci, long scaled_ppm) return 0; } -static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig) +static bool lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig) { struct ptp_header *ptp_header; u32 type; @@ -2842,7 +2847,11 @@ static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig) type = ptp_classify_raw(skb); ptp_header = ptp_parse_header(skb, type); + if (!ptp_header) + return false; + *sig = (__force u16)(ntohs(ptp_header->sequence_id)); + return true; } static void lan8814_match_tx_skb(struct kszphy_ptp_priv *ptp_priv, @@ -2856,7 +2865,8 @@ static void lan8814_match_tx_skb(struct kszphy_ptp_priv *ptp_priv, spin_lock_irqsave(&ptp_priv->tx_queue.lock, flags); skb_queue_walk_safe(&ptp_priv->tx_queue, skb, skb_tmp) { - lan8814_get_sig_tx(skb, &skb_sig); + if (!lan8814_get_sig_tx(skb, &skb_sig)) + continue; if (memcmp(&skb_sig, &seq_id, sizeof(seq_id))) continue; @@ -2910,7 +2920,8 @@ static bool lan8814_match_skb(struct kszphy_ptp_priv *ptp_priv, spin_lock_irqsave(&ptp_priv->rx_queue.lock, flags); skb_queue_walk_safe(&ptp_priv->rx_queue, skb, skb_tmp) { - lan8814_get_sig_rx(skb, &skb_sig); + if (!lan8814_get_sig_rx(skb, &skb_sig)) + continue; if (memcmp(&skb_sig, &rx_ts->seq_id, sizeof(rx_ts->seq_id))) continue; diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 503fd7c405235..84a97088dfc6b 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1042,6 +1042,21 @@ static void phylink_pcs_poll_start(struct phylink *pl) mod_timer(&pl->link_poll, jiffies + HZ); } +int phylink_pcs_pre_init(struct phylink *pl, struct phylink_pcs *pcs) +{ + int ret = 0; + + /* Signal to PCS driver that MAC requires RX clock for init */ + if (pl->config->mac_requires_rxc) + pcs->rxc_always_on = true; + + if (pcs->ops->pcs_pre_init) + ret = pcs->ops->pcs_pre_init(pcs); + + return ret; +} +EXPORT_SYMBOL_GPL(phylink_pcs_pre_init); + static void phylink_mac_config(struct phylink *pl, const struct phylink_link_state *state) { @@ -1923,6 +1938,8 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy, static int phylink_attach_phy(struct phylink *pl, struct phy_device *phy, phy_interface_t interface) { + u32 flags = 0; + if (WARN_ON(pl->cfg_link_an_mode == MLO_AN_FIXED || (pl->cfg_link_an_mode == MLO_AN_INBAND && phy_interface_mode_is_8023z(interface) && !pl->sfp_bus))) @@ -1931,7 +1948,10 @@ static int phylink_attach_phy(struct phylink *pl, struct phy_device *phy, if (pl->phydev) return -EBUSY; - return phy_attach_direct(pl->netdev, phy, 0, interface); + if (pl->config->mac_requires_rxc) + flags |= PHY_F_RXC_ALWAYS_ON; + + return phy_attach_direct(pl->netdev, phy, flags, interface); } /** @@ -2034,6 +2054,9 @@ int phylink_fwnode_phy_connect(struct phylink *pl, pl->link_config.interface = pl->link_interface; } + if (pl->config->mac_requires_rxc) + flags |= PHY_F_RXC_ALWAYS_ON; + ret = phy_attach_direct(pl->netdev, phy_dev, flags, pl->link_interface); phy_device_free(phy_dev); diff --git a/drivers/net/phy/qcom/at803x.c b/drivers/net/phy/qcom/at803x.c index e79657f76bea2..c8f83e5f78ab9 100644 --- a/drivers/net/phy/qcom/at803x.c +++ b/drivers/net/phy/qcom/at803x.c @@ -426,7 +426,8 @@ static int at803x_hibernation_mode_config(struct phy_device *phydev) /* The default after hardware reset is hibernation mode enabled. After * software reset, the value is retained. */ - if (!(priv->flags & AT803X_DISABLE_HIBERNATION_MODE)) + if (!(priv->flags & AT803X_DISABLE_HIBERNATION_MODE) && + !(phydev->dev_flags & PHY_F_RXC_ALWAYS_ON)) return 0; return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_HIB_CTRL, diff --git a/drivers/net/team/Makefile b/drivers/net/team/Makefile index f582d81a50911..7a5aa20d286b7 100644 --- a/drivers/net/team/Makefile +++ b/drivers/net/team/Makefile @@ -3,6 +3,7 @@ # Makefile for the network team driver # +team-y:= team_core.o team_nl.o obj-$(CONFIG_NET_TEAM) += team.o obj-$(CONFIG_NET_TEAM_MODE_BROADCAST) += team_mode_broadcast.o obj-$(CONFIG_NET_TEAM_MODE_ROUNDROBIN) += team_mode_roundrobin.o diff --git a/drivers/net/team/team.c b/drivers/net/team/team_core.c similarity index 97% rename from drivers/net/team/team.c rename to drivers/net/team/team_core.c index 0a44bbdcfb7b9..4e3c8d4049571 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team_core.c @@ -27,6 +27,8 @@ #include #include +#include "team_nl.h" + #define DRV_NAME "team" @@ -2254,28 +2256,7 @@ static struct rtnl_link_ops team_link_ops __read_mostly = { static struct genl_family team_nl_family; -static const struct nla_policy team_nl_policy[TEAM_ATTR_MAX + 1] = { - [TEAM_ATTR_UNSPEC] = { .type = NLA_UNSPEC, }, - [TEAM_ATTR_TEAM_IFINDEX] = { .type = NLA_U32 }, - [TEAM_ATTR_LIST_OPTION] = { .type = NLA_NESTED }, - [TEAM_ATTR_LIST_PORT] = { .type = NLA_NESTED }, -}; - -static const struct nla_policy -team_nl_option_policy[TEAM_ATTR_OPTION_MAX + 1] = { - [TEAM_ATTR_OPTION_UNSPEC] = { .type = NLA_UNSPEC, }, - [TEAM_ATTR_OPTION_NAME] = { - .type = NLA_STRING, - .len = TEAM_STRING_MAX_LEN, - }, - [TEAM_ATTR_OPTION_CHANGED] = { .type = NLA_FLAG }, - [TEAM_ATTR_OPTION_TYPE] = { .type = NLA_U8 }, - [TEAM_ATTR_OPTION_DATA] = { .type = NLA_BINARY }, - [TEAM_ATTR_OPTION_PORT_IFINDEX] = { .type = NLA_U32 }, - [TEAM_ATTR_OPTION_ARRAY_INDEX] = { .type = NLA_U32 }, -}; - -static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) +int team_nl_noop_doit(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; void *hdr; @@ -2513,7 +2494,7 @@ static int team_nl_send_options_get(struct team *team, u32 portid, u32 seq, return err; } -static int team_nl_cmd_options_get(struct sk_buff *skb, struct genl_info *info) +int team_nl_options_get_doit(struct sk_buff *skb, struct genl_info *info) { struct team *team; struct team_option_inst *opt_inst; @@ -2538,7 +2519,7 @@ static int team_nl_cmd_options_get(struct sk_buff *skb, struct genl_info *info) static int team_nl_send_event_options_get(struct team *team, struct list_head *sel_opt_inst_list); -static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) +int team_nl_options_set_doit(struct sk_buff *skb, struct genl_info *info) { struct team *team; int err = 0; @@ -2579,7 +2560,7 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested_deprecated(opt_attrs, TEAM_ATTR_OPTION_MAX, nl_option, - team_nl_option_policy, + team_attr_option_nl_policy, info->extack); if (err) goto team_put; @@ -2802,8 +2783,8 @@ static int team_nl_send_port_list_get(struct team *team, u32 portid, u32 seq, return err; } -static int team_nl_cmd_port_list_get(struct sk_buff *skb, - struct genl_info *info) +int team_nl_port_list_get_doit(struct sk_buff *skb, + struct genl_info *info) { struct team *team; int err; @@ -2820,32 +2801,6 @@ static int team_nl_cmd_port_list_get(struct sk_buff *skb, return err; } -static const struct genl_small_ops team_nl_ops[] = { - { - .cmd = TEAM_CMD_NOOP, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = team_nl_cmd_noop, - }, - { - .cmd = TEAM_CMD_OPTIONS_SET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = team_nl_cmd_options_set, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = TEAM_CMD_OPTIONS_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = team_nl_cmd_options_get, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = TEAM_CMD_PORT_LIST_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = team_nl_cmd_port_list_get, - .flags = GENL_ADMIN_PERM, - }, -}; - static const struct genl_multicast_group team_nl_mcgrps[] = { { .name = TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME, }, }; @@ -2853,7 +2808,7 @@ static const struct genl_multicast_group team_nl_mcgrps[] = { static struct genl_family team_nl_family __ro_after_init = { .name = TEAM_GENL_NAME, .version = TEAM_GENL_VERSION, - .maxattr = TEAM_ATTR_MAX, + .maxattr = ARRAY_SIZE(team_nl_policy), .policy = team_nl_policy, .netnsok = true, .module = THIS_MODULE, diff --git a/drivers/net/team/team_nl.c b/drivers/net/team/team_nl.c new file mode 100644 index 0000000000000..208424ab78f5b --- /dev/null +++ b/drivers/net/team/team_nl.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/team.yaml */ +/* YNL-GEN kernel source */ + +#include +#include + +#include "team_nl.h" + +#include + +/* Common nested types */ +const struct nla_policy team_attr_option_nl_policy[TEAM_ATTR_OPTION_ARRAY_INDEX + 1] = { + [TEAM_ATTR_OPTION_NAME] = { .type = NLA_STRING, .len = TEAM_STRING_MAX_LEN, }, + [TEAM_ATTR_OPTION_CHANGED] = { .type = NLA_FLAG, }, + [TEAM_ATTR_OPTION_TYPE] = { .type = NLA_U8, }, + [TEAM_ATTR_OPTION_DATA] = { .type = NLA_BINARY, }, + [TEAM_ATTR_OPTION_REMOVED] = { .type = NLA_FLAG, }, + [TEAM_ATTR_OPTION_PORT_IFINDEX] = { .type = NLA_U32, }, + [TEAM_ATTR_OPTION_ARRAY_INDEX] = { .type = NLA_U32, }, +}; + +const struct nla_policy team_item_option_nl_policy[TEAM_ATTR_ITEM_OPTION + 1] = { + [TEAM_ATTR_ITEM_OPTION] = NLA_POLICY_NESTED(team_attr_option_nl_policy), +}; + +/* Global operation policy for team */ +const struct nla_policy team_nl_policy[TEAM_ATTR_LIST_OPTION + 1] = { + [TEAM_ATTR_TEAM_IFINDEX] = { .type = NLA_U32, }, + [TEAM_ATTR_LIST_OPTION] = NLA_POLICY_NESTED(team_item_option_nl_policy), +}; + +/* Ops table for team */ +const struct genl_small_ops team_nl_ops[4] = { + { + .cmd = TEAM_CMD_NOOP, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = team_nl_noop_doit, + }, + { + .cmd = TEAM_CMD_OPTIONS_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = team_nl_options_set_doit, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = TEAM_CMD_OPTIONS_GET, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = team_nl_options_get_doit, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = TEAM_CMD_PORT_LIST_GET, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = team_nl_port_list_get_doit, + .flags = GENL_ADMIN_PERM, + }, +}; diff --git a/drivers/net/team/team_nl.h b/drivers/net/team/team_nl.h new file mode 100644 index 0000000000000..c9ec1b22ac4d3 --- /dev/null +++ b/drivers/net/team/team_nl.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/team.yaml */ +/* YNL-GEN kernel header */ + +#ifndef _LINUX_TEAM_GEN_H +#define _LINUX_TEAM_GEN_H + +#include +#include + +#include + +/* Common nested types */ +extern const struct nla_policy team_attr_option_nl_policy[TEAM_ATTR_OPTION_ARRAY_INDEX + 1]; +extern const struct nla_policy team_item_option_nl_policy[TEAM_ATTR_ITEM_OPTION + 1]; + +/* Global operation policy for team */ +extern const struct nla_policy team_nl_policy[TEAM_ATTR_LIST_OPTION + 1]; + +/* Ops table for team */ +extern const struct genl_small_ops team_nl_ops[4]; + +int team_nl_noop_doit(struct sk_buff *skb, struct genl_info *info); +int team_nl_options_set_doit(struct sk_buff *skb, struct genl_info *info); +int team_nl_options_get_doit(struct sk_buff *skb, struct genl_info *info); +int team_nl_port_list_get_doit(struct sk_buff *skb, struct genl_info *info); + +#endif /* _LINUX_TEAM_GEN_H */ diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index c22d1118a1333..c4a21ec51adf1 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3807,6 +3807,7 @@ static int virtnet_set_rxfh(struct net_device *dev, struct netlink_ext_ack *extack) { struct virtnet_info *vi = netdev_priv(dev); + bool update = false; int i; if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && @@ -3814,13 +3815,24 @@ static int virtnet_set_rxfh(struct net_device *dev, return -EOPNOTSUPP; if (rxfh->indir) { + if (!vi->has_rss) + return -EOPNOTSUPP; + for (i = 0; i < vi->rss_indir_table_size; ++i) vi->ctrl->rss.indirection_table[i] = rxfh->indir[i]; + update = true; } - if (rxfh->key) + + if (rxfh->key) { + if (!vi->has_rss && !vi->has_rss_hash_report) + return -EOPNOTSUPP; + memcpy(vi->ctrl->rss.key, rxfh->key, vi->rss_key_size); + update = true; + } - virtnet_commit_rss_command(vi); + if (update) + virtnet_commit_rss_command(vi); return 0; } @@ -4729,13 +4741,15 @@ static int virtnet_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) vi->has_rss_hash_report = true; - if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) + if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) { vi->has_rss = true; - if (vi->has_rss || vi->has_rss_hash_report) { vi->rss_indir_table_size = virtio_cread16(vdev, offsetof(struct virtio_net_config, rss_max_indirection_table_length)); + } + + if (vi->has_rss || vi->has_rss_hash_report) { vi->rss_key_size = virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); diff --git a/drivers/net/wireguard/main.c b/drivers/net/wireguard/main.c index ee4da9ab8013c..a00671b58701f 100644 --- a/drivers/net/wireguard/main.c +++ b/drivers/net/wireguard/main.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include static int __init wg_mod_init(void) diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c index 3f72ae943b294..f2aef84fc08d2 100644 --- a/drivers/net/wwan/mhi_wwan_mbim.c +++ b/drivers/net/wwan/mhi_wwan_mbim.c @@ -648,7 +648,6 @@ static struct mhi_driver mhi_mbim_driver = { .id_table = mhi_mbim_id_table, .driver = { .name = "mhi_wwan_mbim", - .owner = THIS_MODULE, }, }; diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index ad29f370034e4..8d2aee88526c6 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -285,6 +285,7 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue) return NULL; } skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE); + skb_mark_for_recycle(skb); /* Align ip header to a 16 bytes boundary */ skb_reserve(skb, NET_IP_ALIGN); diff --git a/drivers/nfc/nfcmrvl/spi.c b/drivers/nfc/nfcmrvl/spi.c index ad3359a4942c7..9c8cde1250fb5 100644 --- a/drivers/nfc/nfcmrvl/spi.c +++ b/drivers/nfc/nfcmrvl/spi.c @@ -199,7 +199,6 @@ static struct spi_driver nfcmrvl_spi_driver = { .id_table = nfcmrvl_spi_id_table, .driver = { .name = "nfcmrvl_spi", - .owner = THIS_MODULE, .of_match_table = of_match_ptr(of_nfcmrvl_spi_match), }, }; diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c index ed704bb772264..ffe5b4eab457f 100644 --- a/drivers/nfc/st95hf/core.c +++ b/drivers/nfc/st95hf/core.c @@ -7,14 +7,13 @@ */ #include -#include +#include #include #include #include #include #include #include -#include #include #include #include @@ -196,7 +195,7 @@ struct st95_digital_cmd_complete_arg { * for spi communication between st95hf and host. * @ddev: nfc digital device object. * @nfcdev: nfc device object. - * @enable_gpio: gpio used to enable st95hf transceiver. + * @enable_gpiod: gpio used to enable st95hf transceiver. * @complete_cb_arg: structure to store various context information * that is passed from nfc requesting thread to the threaded ISR. * @st95hf_supply: regulator "consumer" for NFC device. @@ -219,7 +218,7 @@ struct st95hf_context { struct st95hf_spi_context spicontext; struct nfc_digital_dev *ddev; struct nfc_dev *nfcdev; - unsigned int enable_gpio; + struct gpio_desc *enable_gpiod; struct st95_digital_cmd_complete_arg complete_cb_arg; struct regulator *st95hf_supply; unsigned char sendrcv_trflag; @@ -451,19 +450,19 @@ static int st95hf_select_protocol(struct st95hf_context *stcontext, int type) static void st95hf_send_st95enable_negativepulse(struct st95hf_context *st95con) { /* First make irq_in pin high */ - gpio_set_value(st95con->enable_gpio, HIGH); + gpiod_set_value(st95con->enable_gpiod, HIGH); /* wait for 1 milisecond */ usleep_range(1000, 2000); /* Make irq_in pin low */ - gpio_set_value(st95con->enable_gpio, LOW); + gpiod_set_value(st95con->enable_gpiod, LOW); /* wait for minimum interrupt pulse to make st95 active */ usleep_range(1000, 2000); /* At end make it high */ - gpio_set_value(st95con->enable_gpio, HIGH); + gpiod_set_value(st95con->enable_gpiod, HIGH); } /* @@ -1063,6 +1062,7 @@ MODULE_DEVICE_TABLE(of, st95hf_spi_of_match); static int st95hf_probe(struct spi_device *nfc_spi_dev) { + struct device *dev = &nfc_spi_dev->dev; int ret; struct st95hf_context *st95context; @@ -1108,19 +1108,14 @@ static int st95hf_probe(struct spi_device *nfc_spi_dev) */ dev_set_drvdata(&nfc_spi_dev->dev, spicontext); - st95context->enable_gpio = - of_get_named_gpio(nfc_spi_dev->dev.of_node, - "enable-gpio", - 0); - if (!gpio_is_valid(st95context->enable_gpio)) { + st95context->enable_gpiod = devm_gpiod_get(dev, "enable", GPIOD_OUT_HIGH); + if (IS_ERR(st95context->enable_gpiod)) { + ret = PTR_ERR(st95context->enable_gpiod); dev_err(&nfc_spi_dev->dev, "No valid enable gpio\n"); - ret = st95context->enable_gpio; goto err_disable_regulator; } - ret = devm_gpio_request_one(&nfc_spi_dev->dev, st95context->enable_gpio, - GPIOF_DIR_OUT | GPIOF_INIT_HIGH, - "enable_gpio"); + ret = gpiod_set_consumer_name(st95context->enable_gpiod, "enable_gpio"); if (ret) goto err_disable_regulator; @@ -1242,7 +1237,6 @@ static void st95hf_remove(struct spi_device *nfc_spi_dev) static struct spi_driver st95hf_driver = { .driver = { .name = "st95hf", - .owner = THIS_MODULE, .of_match_table = of_match_ptr(st95hf_spi_of_match), }, .id_table = st95hf_id, diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index 2c8e964425dc3..25911b887e5ee 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include "ism.h" @@ -292,13 +294,15 @@ static int ism_read_local_gid(struct ism_dev *ism) static void ism_free_dmb(struct ism_dev *ism, struct ism_dmb *dmb) { clear_bit(dmb->sba_idx, ism->sba_bitmap); - dma_free_coherent(&ism->pdev->dev, dmb->dmb_len, - dmb->cpu_addr, dmb->dma_addr); + dma_unmap_page(&ism->pdev->dev, dmb->dma_addr, dmb->dmb_len, + DMA_FROM_DEVICE); + kfree(dmb->cpu_addr); } static int ism_alloc_dmb(struct ism_dev *ism, struct ism_dmb *dmb) { unsigned long bit; + int rc; if (PAGE_ALIGN(dmb->dmb_len) > dma_get_max_seg_size(&ism->pdev->dev)) return -EINVAL; @@ -315,14 +319,27 @@ static int ism_alloc_dmb(struct ism_dev *ism, struct ism_dmb *dmb) test_and_set_bit(dmb->sba_idx, ism->sba_bitmap)) return -EINVAL; - dmb->cpu_addr = dma_alloc_coherent(&ism->pdev->dev, dmb->dmb_len, - &dmb->dma_addr, - GFP_KERNEL | __GFP_NOWARN | - __GFP_NOMEMALLOC | __GFP_NORETRY); - if (!dmb->cpu_addr) - clear_bit(dmb->sba_idx, ism->sba_bitmap); + dmb->cpu_addr = kmalloc(dmb->dmb_len, GFP_KERNEL | __GFP_NOWARN | + __GFP_COMP | __GFP_NOMEMALLOC | __GFP_NORETRY); + if (!dmb->cpu_addr) { + rc = -ENOMEM; + goto out_bit; + } + dmb->dma_addr = dma_map_page(&ism->pdev->dev, + virt_to_page(dmb->cpu_addr), 0, + dmb->dmb_len, DMA_FROM_DEVICE); + if (dma_mapping_error(&ism->pdev->dev, dmb->dma_addr)) { + rc = -ENOMEM; + goto out_free; + } + + return 0; - return dmb->cpu_addr ? 0 : -ENOMEM; +out_free: + kfree(dmb->cpu_addr); +out_bit: + clear_bit(dmb->sba_idx, ism->sba_bitmap); + return rc; } int ism_register_dmb(struct ism_dev *ism, struct ism_dmb *dmb, diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 2abaa3a825a9e..a29ba6ef1e275 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -282,6 +282,17 @@ struct ftrace_likely_data { #define __no_sanitize_or_inline __always_inline #endif +/* + * Apply __counted_by() when the Endianness matches to increase test coverage. + */ +#ifdef __LITTLE_ENDIAN +#define __counted_by_le(member) __counted_by(member) +#define __counted_by_be(member) +#else +#define __counted_by_le(member) +#define __counted_by_be(member) __counted_by(member) +#endif + /* Do not trap wrapping arithmetic within an annotated function. */ #ifdef CONFIG_UBSAN_SIGNED_WRAP # define __signed_wrap __attribute__((no_sanitize("signed-integer-overflow"))) diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 9901e563f706e..74ed8997443a9 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -506,14 +506,22 @@ struct ethtool_module_eeprom { }; /** - * struct ethtool_module_power_mode_params - module power mode parameters + * struct ethtool_module_power_params - module power parameters * @policy: The power mode policy enforced by the host for the plug-in module. * @mode: The operational power mode of the plug-in module. Should be filled by * device drivers on get operations. + * @min_pwr_allowed: minimum power allowed in the cage reported by device + * @max_pwr_allowed: maximum power allowed in the cage reported by device + * @max_pwr_set: maximum power currently set in the cage + * @max_pwr_reset: restore default minimum power */ -struct ethtool_module_power_mode_params { +struct ethtool_module_power_params { enum ethtool_module_power_mode_policy policy; enum ethtool_module_power_mode mode; + u32 min_pwr_allowed; + u32 max_pwr_allowed; + u32 max_pwr_set; + u8 max_pwr_reset; }; /** @@ -804,11 +812,12 @@ struct ethtool_rxfh_param { * @get_eth_ctrl_stats: Query some of the IEEE 802.3 MAC Ctrl statistics. * @get_rmon_stats: Query some of the RMON (RFC 2819) statistics. * Set %ranges to a pointer to zero-terminated array of byte ranges. - * @get_module_power_mode: Get the power mode policy for the plug-in module - * used by the network device and its operational power mode, if - * plugged-in. - * @set_module_power_mode: Set the power mode policy for the plug-in module - * used by the network device. + * @get_module_power_cfg: Get the power configuration for the plug-in module + * used by the network device which includes: its power mode policy and + * operational power mode, if plugged-in; maximum power settings + * (min and max allowed power and max power currently set) + * @set_module_power_cfg: Set the power mode policy for the plug-in module + * used by the network device and its maximum power. * @get_mm: Query the 802.3 MAC Merge layer state. * @set_mm: Set the 802.3 MAC Merge layer parameters. * @get_mm_stats: Query the 802.3 MAC Merge layer statistics. @@ -940,12 +949,12 @@ struct ethtool_ops { void (*get_rmon_stats)(struct net_device *dev, struct ethtool_rmon_stats *rmon_stats, const struct ethtool_rmon_hist_range **ranges); - int (*get_module_power_mode)(struct net_device *dev, - struct ethtool_module_power_mode_params *params, - struct netlink_ext_ack *extack); - int (*set_module_power_mode)(struct net_device *dev, - const struct ethtool_module_power_mode_params *params, - struct netlink_ext_ack *extack); + int (*get_module_power_cfg)(struct net_device *dev, + struct ethtool_module_power_params *params, + struct netlink_ext_ack *extack); + int (*set_module_power_cfg)(struct net_device *dev, + const struct ethtool_module_power_params *params, + struct netlink_ext_ack *extack); int (*get_mm)(struct net_device *dev, struct ethtool_mm_state *state); int (*set_mm)(struct net_device *dev, struct ethtool_mm_cfg *cfg, struct netlink_ext_ack *extack); diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h deleted file mode 100644 index c285968e437a6..0000000000000 --- a/include/linux/genetlink.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_GENERIC_NETLINK_H -#define __LINUX_GENERIC_NETLINK_H - -#include - - -/* All generic netlink requests are serialized by a global lock. */ -extern void genl_lock(void); -extern void genl_unlock(void); - -/* for synchronisation between af_netlink and genetlink */ -extern atomic_t genl_sk_destructing_cnt; -extern wait_queue_head_t genl_sk_destructing_waitq; - -#define MODULE_ALIAS_GENL_FAMILY(family)\ - MODULE_ALIAS_NET_PF_PROTO_NAME(PF_NETLINK, NETLINK_GENERIC, "-family-" family) - -#endif /* __LINUX_GENERIC_NETLINK_H */ diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index a419d93789ff3..621b87a87d743 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -15,8 +15,8 @@ #endif #include -#include #include +#include extern int CONCATENATE(GENL_MAGIC_FAMILY, _genl_register)(void); extern void CONCATENATE(GENL_MAGIC_FAMILY, _genl_unregister)(void); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e41d30ebaca61..890dbbd3c11ec 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3203,6 +3203,7 @@ struct softnet_data { struct softnet_data *rps_ipi_list; #endif + unsigned int received_rps; bool in_net_rx_action; bool in_napi_threaded_poll; @@ -3235,11 +3236,11 @@ struct softnet_data { unsigned int cpu; unsigned int input_queue_tail; #endif - unsigned int received_rps; - unsigned int dropped; struct sk_buff_head input_pkt_queue; struct napi_struct backlog; + atomic_t dropped ____cacheline_aligned_in_smp; + /* Another possibly contended cache line */ spinlock_t defer_lock ____cacheline_aligned_in_smp; int defer_count; @@ -3248,21 +3249,6 @@ struct softnet_data { call_single_data_t defer_csd; }; -static inline void input_queue_head_incr(struct softnet_data *sd) -{ -#ifdef CONFIG_RPS - sd->input_queue_head++; -#endif -} - -static inline void input_queue_tail_incr_save(struct softnet_data *sd, - unsigned int *qtail) -{ -#ifdef CONFIG_RPS - *qtail = ++sd->input_queue_tail; -#endif -} - DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); static inline int dev_recursion_level(void) @@ -3270,24 +3256,6 @@ static inline int dev_recursion_level(void) return this_cpu_read(softnet_data.xmit.recursion); } -#define XMIT_RECURSION_LIMIT 8 -static inline bool dev_xmit_recursion(void) -{ - return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > - XMIT_RECURSION_LIMIT); -} - -static inline void dev_xmit_recursion_inc(void) -{ - __this_cpu_inc(softnet_data.xmit.recursion); -} - -static inline void dev_xmit_recursion_dec(void) -{ - __this_cpu_dec(softnet_data.xmit.recursion); -} - -void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu); void __netif_schedule(struct Qdisc *q); void netif_schedule_queue(struct netdev_queue *txq); @@ -4128,6 +4096,8 @@ static inline void dev_put(struct net_device *dev) netdev_put(dev, NULL); } +DEFINE_FREE(dev_put, struct net_device *, if (_T) dev_put(_T)) + static inline void netdev_ref_replace(struct net_device *odev, struct net_device *ndev, netdevice_tracker *tracker, diff --git a/include/linux/phy.h b/include/linux/phy.h index 3f68b8239bb11..e6e83304558e0 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -778,6 +778,7 @@ struct phy_device { /* Generic phy_device::dev_flags */ #define PHY_F_NO_IRQ 0x80000000 +#define PHY_F_RXC_ALWAYS_ON 0x40000000 static inline struct phy_device *to_phy_device(const struct device *dev) { diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 9a57deefcb078..5ea6b2ad23963 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -138,6 +138,9 @@ enum phylink_op_type { * @poll_fixed_state: if true, starts link_poll, * if MAC link is at %MLO_AN_FIXED mode. * @mac_managed_pm: if true, indicate the MAC driver is responsible for PHY PM. + * @mac_requires_rxc: if true, the MAC always requires a receive clock from PHY. + * The PHY driver should start the clock signal as soon as + * possible and avoid stopping it during suspend events. * @ovr_an_inband: if true, override PCS to MLO_AN_INBAND * @get_fixed_state: callback to execute to determine the fixed link state, * if MAC link is at %MLO_AN_FIXED mode. @@ -150,6 +153,7 @@ struct phylink_config { enum phylink_op_type type; bool poll_fixed_state; bool mac_managed_pm; + bool mac_requires_rxc; bool ovr_an_inband; void (*get_fixed_state)(struct phylink_config *config, struct phylink_link_state *state); @@ -392,6 +396,10 @@ struct phylink_pcs_ops; * @phylink: pointer to &struct phylink_config * @neg_mode: provide PCS neg mode via "mode" argument * @poll: poll the PCS for link changes + * @rxc_always_on: The MAC driver requires the reference clock + * to always be on. Standalone PCS drivers which + * do not have access to a PHY device can check + * this instead of PHY_F_RXC_ALWAYS_ON. * * This structure is designed to be embedded within the PCS private data, * and will be passed between phylink and the PCS. @@ -404,6 +412,7 @@ struct phylink_pcs { struct phylink *phylink; bool neg_mode; bool poll; + bool rxc_always_on; }; /** @@ -418,6 +427,8 @@ struct phylink_pcs { * @pcs_an_restart: restart 802.3z BaseX autonegotiation. * @pcs_link_up: program the PCS for the resolved link configuration * (where necessary). + * @pcs_pre_init: configure PCS components necessary for MAC hardware + * initialization e.g. RX clock for stmmac. */ struct phylink_pcs_ops { int (*pcs_validate)(struct phylink_pcs *pcs, unsigned long *supported, @@ -437,6 +448,7 @@ struct phylink_pcs_ops { void (*pcs_an_restart)(struct phylink_pcs *pcs); void (*pcs_link_up)(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, int speed, int duplex); + int (*pcs_pre_init)(struct phylink_pcs *pcs); }; #if 0 /* For kernel-doc purposes only. */ @@ -542,6 +554,34 @@ void pcs_an_restart(struct phylink_pcs *pcs); */ void pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, int speed, int duplex); + +/** + * pcs_pre_init() - Configure PCS components necessary for MAC initialization + * @pcs: a pointer to a &struct phylink_pcs. + * + * This function can be called by MAC drivers through the + * phylink_pcs_pre_init() wrapper, before their hardware is initialized. It + * should not be called after the link is brought up, as reconfiguring the PCS + * at this point could break the link. + * + * Some MAC devices require specific hardware initialization to be performed by + * their associated PCS device before they can properly initialize their own + * hardware. An example of this is the initialization of stmmac controllers, + * which requires an active REF_CLK signal to be provided by the PHY/PCS. + * + * By calling phylink_pcs_pre_init(), MAC drivers can ensure that the PCS is + * setup in a way that allows for successful hardware initialization. + * + * The specific configuration performed by pcs_pre_init() is dependent on the + * model of PCS and the requirements of the MAC device attached to it. PCS + * driver authors should consider whether their target device is to be used in + * conjunction with a MAC device whose driver calls phylink_pcs_pre_init(). MAC + * driver authors should document their requirements for the PCS + * pre-initialization. + * + */ +int pcs_pre_init(struct phylink_pcs *pcs); + #endif struct phylink *phylink_create(struct phylink_config *, @@ -561,6 +601,8 @@ void phylink_disconnect_phy(struct phylink *); void phylink_mac_change(struct phylink *, bool up); void phylink_pcs_change(struct phylink_pcs *, bool up); +int phylink_pcs_pre_init(struct phylink *pl, struct phylink_pcs *pcs); + void phylink_start(struct phylink *); void phylink_stop(struct phylink *); diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index cdfc897f1e3c6..a7da7dfc06a2a 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -7,6 +7,7 @@ #include #include #include +#include #include extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); @@ -46,6 +47,8 @@ extern int rtnl_is_locked(void); extern int rtnl_lock_killable(void); extern bool refcount_dec_and_rtnl_lock(refcount_t *r); +DEFINE_LOCK_GUARD_0(rtnl, rtnl_lock(), rtnl_unlock()) + extern wait_queue_head_t netdev_unregistering_wq; extern atomic_t dev_unreg_count; extern struct rw_semaphore pernet_ops_rwsem; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 517e546a120ae..03ea36a82cdd7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3350,13 +3350,7 @@ static inline void *napi_alloc_frag_align(unsigned int fragsz, return __napi_alloc_frag_align(fragsz, -align); } -struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, - unsigned int length, gfp_t gfp_mask); -static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi, - unsigned int length) -{ - return __napi_alloc_skb(napi, length, GFP_ATOMIC); -} +struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int length); void napi_consume_skb(struct sk_buff *skb, int budget); void napi_skb_free_stolen_head(struct sk_buff *skb); @@ -3516,25 +3510,25 @@ int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, unsigned int headroom); int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, struct bpf_prog *prog); -bool napi_pp_put_page(struct page *page, bool napi_safe); +bool napi_pp_put_page(struct page *page); static inline void -skb_page_unref(const struct sk_buff *skb, struct page *page, bool napi_safe) +skb_page_unref(const struct sk_buff *skb, struct page *page) { #ifdef CONFIG_PAGE_POOL - if (skb->pp_recycle && napi_pp_put_page(page, napi_safe)) + if (skb->pp_recycle && napi_pp_put_page(page)) return; #endif put_page(page); } static inline void -napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe) +napi_frag_unref(skb_frag_t *frag, bool recycle) { struct page *page = skb_frag_page(frag); #ifdef CONFIG_PAGE_POOL - if (recycle && napi_pp_put_page(page, napi_safe)) + if (recycle && napi_pp_put_page(page)) return; #endif put_page(page); @@ -3550,7 +3544,7 @@ napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe) */ static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle) { - napi_frag_unref(frag, recycle, false); + napi_frag_unref(frag, recycle); } /** diff --git a/include/linux/udp.h b/include/linux/udp.h index 3748e82b627b7..17539d0896661 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -150,6 +150,24 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, } } +DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key); +#if IS_ENABLED(CONFIG_IPV6) +DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +#endif + +static inline bool udp_encap_needed(void) +{ + if (static_branch_unlikely(&udp_encap_needed_key)) + return true; + +#if IS_ENABLED(CONFIG_IPV6) + if (static_branch_unlikely(&udpv6_encap_needed_key)) + return true; +#endif + + return false; +} + static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) { if (!skb_is_gso(skb)) @@ -163,6 +181,16 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) !udp_test_bit(ACCEPT_FRAGLIST, sk)) return true; + /* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still + * land in a tunnel as the socket check in udp_gro_receive cannot be + * foolproof. + */ + if (udp_encap_needed() && + READ_ONCE(udp_sk(sk)->encap_rcv) && + !(skb_shinfo(skb)->gso_type & + (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM))) + return true; + return false; } diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 627ea8e2d9159..226a8da2cbe35 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -19,12 +19,30 @@ static inline struct unix_sock *unix_get_socket(struct file *filp) extern spinlock_t unix_gc_lock; extern unsigned int unix_tot_inflight; - -void unix_inflight(struct user_struct *user, struct file *fp); -void unix_notinflight(struct user_struct *user, struct file *fp); +void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver); +void unix_del_edges(struct scm_fp_list *fpl); +void unix_update_edges(struct unix_sock *receiver); +int unix_prepare_fpl(struct scm_fp_list *fpl); +void unix_destroy_fpl(struct scm_fp_list *fpl); void unix_gc(void); void wait_for_unix_gc(struct scm_fp_list *fpl); +struct unix_vertex { + struct list_head edges; + struct list_head entry; + struct list_head scc_entry; + unsigned long out_degree; + unsigned long index; + unsigned long scc_index; +}; + +struct unix_edge { + struct unix_sock *predecessor; + struct unix_sock *successor; + struct list_head vertex_entry; + struct list_head stack_entry; +}; + struct sock *unix_peer_get(struct sock *sk); #define UNIX_HASH_MOD (256 - 1) @@ -62,12 +80,9 @@ struct unix_sock { struct path path; struct mutex iolock, bindlock; struct sock *peer; - struct list_head link; - unsigned long inflight; + struct sock *listener; + struct unix_vertex *vertex; spinlock_t lock; - unsigned long gc_flags; -#define UNIX_GC_CANDIDATE 0 -#define UNIX_GC_MAYBE_CYCLE 1 struct socket_wq peer_wq; wait_queue_entry_t peer_wake; struct scm_stat scm_stat; diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 8701ca5f31eec..5c12761cbc0e2 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -176,6 +176,15 @@ enum { */ HCI_QUIRK_USE_BDADDR_PROPERTY, + /* When this quirk is set, the Bluetooth Device Address provided by + * the 'local-bd-address' fwnode property is incorrectly specified in + * big-endian order. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_BDADDR_PROPERTY_BROKEN, + /* When this quirk is set, the duplicate filtering during * scanning is based on Bluetooth devices addresses. To allow * RSSI based updates, restart scanning if needed. diff --git a/include/net/espintcp.h b/include/net/espintcp.h index 0335bbd76552a..c70efd704b6d5 100644 --- a/include/net/espintcp.h +++ b/include/net/espintcp.h @@ -32,7 +32,7 @@ struct espintcp_ctx { static inline struct espintcp_ctx *espintcp_getctx(const struct sock *sk) { - struct inet_connection_sock *icsk = inet_csk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); /* RCU is only needed for diag */ return (__force void *)icsk->icsk_ulp_data; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 9ece6e5a3ea8c..9ab49bfeae789 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -2,12 +2,20 @@ #ifndef __NET_GENERIC_NETLINK_H #define __NET_GENERIC_NETLINK_H -#include +#include #include #include +#include #define GENLMSG_DEFAULT_SIZE (NLMSG_DEFAULT_SIZE - GENL_HDRLEN) +/* Non-parallel generic netlink requests are serialized by a global lock. */ +void genl_lock(void); +void genl_unlock(void); + +#define MODULE_ALIAS_GENL_FAMILY(family) \ + MODULE_ALIAS_NET_PF_PROTO_NAME(PF_NETLINK, NETLINK_GENERIC, "-family-" family) + /* Binding to multicast group requires %CAP_NET_ADMIN */ #define GENL_MCAST_CAP_NET_ADMIN BIT(0) /* Binding to multicast group requires %CAP_SYS_ADMIN */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index ccf171f7eb60d..d96c871b0d581 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -147,10 +147,7 @@ struct inet_connection_sock { #define ICSK_TIME_LOSS_PROBE 5 /* Tail loss probe timer */ #define ICSK_TIME_REO_TIMEOUT 6 /* Reordering timer */ -static inline struct inet_connection_sock *inet_csk(const struct sock *sk) -{ - return (struct inet_connection_sock *)sk; -} +#define inet_csk(ptr) container_of_const(ptr, struct inet_connection_sock, icsk_inet.sk) static inline void *inet_csk_ca(const struct sock *sk) { diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index f28da08a37b4e..2a536eea9424e 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -111,7 +111,7 @@ static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo void inet_twsk_deschedule_put(struct inet_timewait_sock *tw); -void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family); +void inet_twsk_purge(struct inet_hashinfo *hashinfo); static inline struct net *twsk_net(const struct inet_timewait_sock *twsk) diff --git a/include/net/netlink.h b/include/net/netlink.h index c19ff921b661a..1d2bbcc50212d 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -157,7 +157,11 @@ * nla_parse() parse and validate stream of attrs * nla_parse_nested() parse nested attributes * nla_for_each_attr() loop over all attributes + * nla_for_each_attr_type() loop over all attributes with the + * given type * nla_for_each_nested() loop over the nested attributes + * nla_for_each_nested_type() loop over the nested attributes with + * the given type *========================================================================= */ @@ -2070,6 +2074,18 @@ static inline int nla_total_size_64bit(int payload) nla_ok(pos, rem); \ pos = nla_next(pos, &(rem))) +/** + * nla_for_each_attr_type - iterate over a stream of attributes + * @pos: loop counter, set to current attribute + * @type: required attribute type for @pos + * @head: head of attribute stream + * @len: length of attribute stream + * @rem: initialized to len, holds bytes currently remaining in stream + */ +#define nla_for_each_attr_type(pos, type, head, len, rem) \ + nla_for_each_attr(pos, head, len, rem) \ + if (nla_type(pos) == type) + /** * nla_for_each_nested - iterate over nested attributes * @pos: loop counter, set to current attribute @@ -2079,6 +2095,17 @@ static inline int nla_total_size_64bit(int payload) #define nla_for_each_nested(pos, nla, rem) \ nla_for_each_attr(pos, nla_data(nla), nla_len(nla), rem) +/** + * nla_for_each_nested_type - iterate over nested attributes + * @pos: loop counter, set to current attribute + * @type: required attribute type for @pos + * @nla: attribute containing the nested attributes + * @rem: initialized to len, holds bytes currently remaining in stream + */ +#define nla_for_each_nested_type(pos, type, nla, rem) \ + nla_for_each_nested(pos, nla, rem) \ + if (nla_type(pos) == type) + /** * nla_is_last - Test if attribute is last in stream * @nla: attribute to test diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index a4ee43f493bbf..41297bd38dff7 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -74,6 +74,15 @@ static inline bool tcf_block_non_null_shared(struct tcf_block *block) return block && block->index; } +#ifdef CONFIG_NET_CLS_ACT +DECLARE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key); + +static inline bool tcf_block_bypass_sw(struct tcf_block *block) +{ + return block && block->bypass_wanted; +} +#endif + static inline struct Qdisc *tcf_block_q(struct tcf_block *block) { WARN_ON(tcf_block_shared(block)); diff --git a/include/net/rps.h b/include/net/rps.h index 7660243e905b9..a93401d23d66e 100644 --- a/include/net/rps.h +++ b/include/net/rps.h @@ -122,4 +122,32 @@ static inline void sock_rps_record_flow(const struct sock *sk) #endif } +static inline u32 rps_input_queue_tail_incr(struct softnet_data *sd) +{ +#ifdef CONFIG_RPS + return ++sd->input_queue_tail; +#else + return 0; +#endif +} + +static inline void rps_input_queue_tail_save(u32 *dest, u32 tail) +{ +#ifdef CONFIG_RPS + WRITE_ONCE(*dest, tail); +#endif +} + +static inline void rps_input_queue_head_add(struct softnet_data *sd, int val) +{ +#ifdef CONFIG_RPS + WRITE_ONCE(sd->input_queue_head, sd->input_queue_head + val); +#endif +} + +static inline void rps_input_queue_head_incr(struct softnet_data *sd) +{ + rps_input_queue_head_add(sd, 1); +} + #endif /* _NET_RPS_H */ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index cefe0c4bdae34..76db6be160831 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -422,6 +422,7 @@ struct tcf_proto { */ spinlock_t lock; bool deleting; + bool counted; refcount_t refcnt; struct rcu_head rcu; struct hlist_node destroy_ht_node; @@ -471,6 +472,9 @@ struct tcf_block { struct flow_block flow_block; struct list_head owner_list; bool keep_dst; + bool bypass_wanted; + atomic_t filtercnt; /* Number of filters */ + atomic_t skipswcnt; /* Number of skip_sw filters */ atomic_t offloadcnt; /* Number of oddloaded filters */ unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */ unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */ diff --git a/include/net/scm.h b/include/net/scm.h index 92276a2c55436..bbc5527809d1d 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -23,10 +23,19 @@ struct scm_creds { kgid_t gid; }; +#ifdef CONFIG_UNIX +struct unix_edge; +#endif + struct scm_fp_list { short count; short count_unix; short max; +#ifdef CONFIG_UNIX + bool inflight; + struct list_head vertices; + struct unix_edge *edges; +#endif struct user_struct *user; struct file *fp[SCM_MAX_FD]; }; diff --git a/include/net/smc.h b/include/net/smc.h index c9dcb30e3fd98..10684d0a33df3 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -26,9 +26,6 @@ struct smc_hashinfo { struct hlist_head ht; }; -int smc_hash_sk(struct sock *sk); -void smc_unhash_sk(struct sock *sk); - /* SMCD/ISM device driver interface */ struct smcd_dmb { u64 dmb_tok; diff --git a/include/net/sock.h b/include/net/sock.h index f57bfd8a2ad2d..2253eefe28488 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2513,6 +2513,12 @@ static inline void sk_wake_async(const struct sock *sk, int how, int band) } } +static inline void sk_wake_async_rcu(const struct sock *sk, int how, int band) +{ + if (unlikely(sock_flag(sk, SOCK_FASYNC))) + sock_wake_async(rcu_dereference(sk->sk_wq), how, band); +} + /* Since sk_{r,w}mem_alloc sums skb->truesize, even a small frame might * need sizeof(sk_buff) + MTU + padding, unless net driver perform copybreak. * Note: for send buffers, TCP works better if we can build two skbs at diff --git a/include/net/tcp.h b/include/net/tcp.h index 6ae35199d3b3c..9ab5b37e9d532 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -353,7 +353,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb); void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); void tcp_twsk_destructor(struct sock *sk); -void tcp_twsk_purge(struct list_head *net_exit_list, int family); +void tcp_twsk_purge(struct list_head *net_exit_list); ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); @@ -742,7 +742,7 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu); int tcp_mss_to_mtu(struct sock *sk, int mss); void tcp_mtup_init(struct sock *sk); -static inline void tcp_bound_rto(const struct sock *sk) +static inline void tcp_bound_rto(struct sock *sk) { if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) inet_csk(sk)->icsk_rto = TCP_RTO_MAX; diff --git a/include/net/tls.h b/include/net/tls.h index 340ad43971e47..400f4857dc117 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -361,7 +361,7 @@ static inline bool tls_is_skb_tx_device_offloaded(const struct sk_buff *skb) static inline struct tls_context *tls_get_ctx(const struct sock *sk) { - struct inet_connection_sock *icsk = inet_csk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); /* Use RCU on icsk_ulp_data only for sock diag code, * TLS data path doesn't need rcu_dereference(). diff --git a/include/trace/events/net_probe_common.h b/include/trace/events/net_probe_common.h index b1f9a4d3ee136..5e33f91bdea37 100644 --- a/include/trace/events/net_probe_common.h +++ b/include/trace/events/net_probe_common.h @@ -70,4 +70,44 @@ TP_STORE_V4MAPPED(__entry, saddr, daddr) #endif +#define TP_STORE_ADDR_PORTS_SKB_V4(__entry, skb, protoh) \ + do { \ + struct sockaddr_in *v4 = (void *)__entry->saddr; \ + \ + v4->sin_family = AF_INET; \ + v4->sin_port = protoh->source; \ + v4->sin_addr.s_addr = ip_hdr(skb)->saddr; \ + v4 = (void *)__entry->daddr; \ + v4->sin_family = AF_INET; \ + v4->sin_port = protoh->dest; \ + v4->sin_addr.s_addr = ip_hdr(skb)->daddr; \ + } while (0) + +#if IS_ENABLED(CONFIG_IPV6) + +#define TP_STORE_ADDR_PORTS_SKB(__entry, skb, protoh) \ + do { \ + const struct iphdr *iph = ip_hdr(skb); \ + \ + if (iph->version == 6) { \ + struct sockaddr_in6 *v6 = (void *)__entry->saddr; \ + \ + v6->sin6_family = AF_INET6; \ + v6->sin6_port = protoh->source; \ + v6->sin6_addr = ipv6_hdr(skb)->saddr; \ + v6 = (void *)__entry->daddr; \ + v6->sin6_family = AF_INET6; \ + v6->sin6_port = protoh->dest; \ + v6->sin6_addr = ipv6_hdr(skb)->daddr; \ + } else \ + TP_STORE_ADDR_PORTS_SKB_V4(__entry, skb, protoh); \ + } while (0) + +#else + +#define TP_STORE_ADDR_PORTS_SKB(__entry, skb, protoh) \ + TP_STORE_ADDR_PORTS_SKB_V4(__entry, skb, protoh) + +#endif + #endif diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 3c08a0846c47d..1db95175c1e52 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -273,48 +273,6 @@ TRACE_EVENT(tcp_probe, __entry->skbaddr, __entry->skaddr) ); -#define TP_STORE_ADDR_PORTS_SKB_V4(__entry, skb) \ - do { \ - const struct tcphdr *th = (const struct tcphdr *)skb->data; \ - struct sockaddr_in *v4 = (void *)__entry->saddr; \ - \ - v4->sin_family = AF_INET; \ - v4->sin_port = th->source; \ - v4->sin_addr.s_addr = ip_hdr(skb)->saddr; \ - v4 = (void *)__entry->daddr; \ - v4->sin_family = AF_INET; \ - v4->sin_port = th->dest; \ - v4->sin_addr.s_addr = ip_hdr(skb)->daddr; \ - } while (0) - -#if IS_ENABLED(CONFIG_IPV6) - -#define TP_STORE_ADDR_PORTS_SKB(__entry, skb) \ - do { \ - const struct iphdr *iph = ip_hdr(skb); \ - \ - if (iph->version == 6) { \ - const struct tcphdr *th = (const struct tcphdr *)skb->data; \ - struct sockaddr_in6 *v6 = (void *)__entry->saddr; \ - \ - v6->sin6_family = AF_INET6; \ - v6->sin6_port = th->source; \ - v6->sin6_addr = ipv6_hdr(skb)->saddr; \ - v6 = (void *)__entry->daddr; \ - v6->sin6_family = AF_INET6; \ - v6->sin6_port = th->dest; \ - v6->sin6_addr = ipv6_hdr(skb)->daddr; \ - } else \ - TP_STORE_ADDR_PORTS_SKB_V4(__entry, skb); \ - } while (0) - -#else - -#define TP_STORE_ADDR_PORTS_SKB(__entry, skb) \ - TP_STORE_ADDR_PORTS_SKB_V4(__entry, skb) - -#endif - /* * tcp event with only skb */ @@ -331,12 +289,13 @@ DECLARE_EVENT_CLASS(tcp_event_skb, ), TP_fast_assign( + const struct tcphdr *th = (const struct tcphdr *)skb->data; __entry->skbaddr = skb; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); - TP_STORE_ADDR_PORTS_SKB(__entry, skb); + TP_STORE_ADDR_PORTS_SKB(__entry, skb, th); ), TP_printk("skbaddr=%p src=%pISpc dest=%pISpc", diff --git a/include/trace/events/udp.h b/include/trace/events/udp.h index 336fe272889f5..62bebe2a6eceb 100644 --- a/include/trace/events/udp.h +++ b/include/trace/events/udp.h @@ -7,24 +7,43 @@ #include #include +#include TRACE_EVENT(udp_fail_queue_rcv_skb, - TP_PROTO(int rc, struct sock *sk), + TP_PROTO(int rc, struct sock *sk, struct sk_buff *skb), - TP_ARGS(rc, sk), + TP_ARGS(rc, sk, skb), TP_STRUCT__entry( __field(int, rc) - __field(__u16, lport) + + __field(__u16, sport) + __field(__u16, dport) + __field(__u16, family) + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( + const struct udphdr *uh = (const struct udphdr *)udp_hdr(skb); + __entry->rc = rc; - __entry->lport = inet_sk(sk)->inet_num; + + /* for filtering use */ + __entry->sport = ntohs(uh->source); + __entry->dport = ntohs(uh->dest); + __entry->family = sk->sk_family; + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + + TP_STORE_ADDR_PORTS_SKB(__entry, skb, uh); ), - TP_printk("rc=%d port=%hu", __entry->rc, __entry->lport) + TP_printk("rc=%d family=%s src=%pISpc dest=%pISpc", __entry->rc, + show_family_name(__entry->family), + __entry->saddr, __entry->daddr) ); #endif /* _TRACE_UDP_H */ diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 3f89074aa06c6..f7cd446b2a839 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -882,6 +882,10 @@ enum { ETHTOOL_A_MODULE_HEADER, /* nest - _A_HEADER_* */ ETHTOOL_A_MODULE_POWER_MODE_POLICY, /* u8 */ ETHTOOL_A_MODULE_POWER_MODE, /* u8 */ + ETHTOOL_A_MODULE_MAX_POWER_SET, /* u32 */ + ETHTOOL_A_MODULE_MIN_POWER_ALLOWED, /* u32 */ + ETHTOOL_A_MODULE_MAX_POWER_ALLOWED, /* u32 */ + ETHTOOL_A_MODULE_MAX_POWER_RESET, /* u8 */ /* add new constants above here */ __ETHTOOL_A_MODULE_CNT, diff --git a/include/uapi/linux/if_team.h b/include/uapi/linux/if_team.h index 13c61fecb78b8..a5c06243a4355 100644 --- a/include/uapi/linux/if_team.h +++ b/include/uapi/linux/if_team.h @@ -1,108 +1,78 @@ -/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ -/* - * include/linux/if_team.h - Network team device driver header - * Copyright (c) 2011 Jiri Pirko - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/team.yaml */ +/* YNL-GEN uapi header */ -#ifndef _UAPI_LINUX_IF_TEAM_H_ -#define _UAPI_LINUX_IF_TEAM_H_ +#ifndef _UAPI_LINUX_IF_TEAM_H +#define _UAPI_LINUX_IF_TEAM_H +#define TEAM_GENL_NAME "team" +#define TEAM_GENL_VERSION 1 -#define TEAM_STRING_MAX_LEN 32 - -/********************************** - * NETLINK_GENERIC netlink family. - **********************************/ - -enum { - TEAM_CMD_NOOP, - TEAM_CMD_OPTIONS_SET, - TEAM_CMD_OPTIONS_GET, - TEAM_CMD_PORT_LIST_GET, - - __TEAM_CMD_MAX, - TEAM_CMD_MAX = (__TEAM_CMD_MAX - 1), -}; +#define TEAM_STRING_MAX_LEN 32 +#define TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME "change_event" enum { TEAM_ATTR_UNSPEC, - TEAM_ATTR_TEAM_IFINDEX, /* u32 */ - TEAM_ATTR_LIST_OPTION, /* nest */ - TEAM_ATTR_LIST_PORT, /* nest */ + TEAM_ATTR_TEAM_IFINDEX, + TEAM_ATTR_LIST_OPTION, + TEAM_ATTR_LIST_PORT, __TEAM_ATTR_MAX, - TEAM_ATTR_MAX = __TEAM_ATTR_MAX - 1, + TEAM_ATTR_MAX = (__TEAM_ATTR_MAX - 1) }; -/* Nested layout of get/set msg: - * - * [TEAM_ATTR_LIST_OPTION] - * [TEAM_ATTR_ITEM_OPTION] - * [TEAM_ATTR_OPTION_*], ... - * [TEAM_ATTR_ITEM_OPTION] - * [TEAM_ATTR_OPTION_*], ... - * ... - * [TEAM_ATTR_LIST_PORT] - * [TEAM_ATTR_ITEM_PORT] - * [TEAM_ATTR_PORT_*], ... - * [TEAM_ATTR_ITEM_PORT] - * [TEAM_ATTR_PORT_*], ... - * ... - */ - enum { TEAM_ATTR_ITEM_OPTION_UNSPEC, - TEAM_ATTR_ITEM_OPTION, /* nest */ + TEAM_ATTR_ITEM_OPTION, __TEAM_ATTR_ITEM_OPTION_MAX, - TEAM_ATTR_ITEM_OPTION_MAX = __TEAM_ATTR_ITEM_OPTION_MAX - 1, + TEAM_ATTR_ITEM_OPTION_MAX = (__TEAM_ATTR_ITEM_OPTION_MAX - 1) }; enum { TEAM_ATTR_OPTION_UNSPEC, - TEAM_ATTR_OPTION_NAME, /* string */ - TEAM_ATTR_OPTION_CHANGED, /* flag */ - TEAM_ATTR_OPTION_TYPE, /* u8 */ - TEAM_ATTR_OPTION_DATA, /* dynamic */ - TEAM_ATTR_OPTION_REMOVED, /* flag */ - TEAM_ATTR_OPTION_PORT_IFINDEX, /* u32 */ /* for per-port options */ - TEAM_ATTR_OPTION_ARRAY_INDEX, /* u32 */ /* for array options */ + TEAM_ATTR_OPTION_NAME, + TEAM_ATTR_OPTION_CHANGED, + TEAM_ATTR_OPTION_TYPE, + TEAM_ATTR_OPTION_DATA, + TEAM_ATTR_OPTION_REMOVED, + TEAM_ATTR_OPTION_PORT_IFINDEX, + TEAM_ATTR_OPTION_ARRAY_INDEX, __TEAM_ATTR_OPTION_MAX, - TEAM_ATTR_OPTION_MAX = __TEAM_ATTR_OPTION_MAX - 1, + TEAM_ATTR_OPTION_MAX = (__TEAM_ATTR_OPTION_MAX - 1) }; enum { TEAM_ATTR_ITEM_PORT_UNSPEC, - TEAM_ATTR_ITEM_PORT, /* nest */ + TEAM_ATTR_ITEM_PORT, __TEAM_ATTR_ITEM_PORT_MAX, - TEAM_ATTR_ITEM_PORT_MAX = __TEAM_ATTR_ITEM_PORT_MAX - 1, + TEAM_ATTR_ITEM_PORT_MAX = (__TEAM_ATTR_ITEM_PORT_MAX - 1) }; enum { TEAM_ATTR_PORT_UNSPEC, - TEAM_ATTR_PORT_IFINDEX, /* u32 */ - TEAM_ATTR_PORT_CHANGED, /* flag */ - TEAM_ATTR_PORT_LINKUP, /* flag */ - TEAM_ATTR_PORT_SPEED, /* u32 */ - TEAM_ATTR_PORT_DUPLEX, /* u8 */ - TEAM_ATTR_PORT_REMOVED, /* flag */ + TEAM_ATTR_PORT_IFINDEX, + TEAM_ATTR_PORT_CHANGED, + TEAM_ATTR_PORT_LINKUP, + TEAM_ATTR_PORT_SPEED, + TEAM_ATTR_PORT_DUPLEX, + TEAM_ATTR_PORT_REMOVED, __TEAM_ATTR_PORT_MAX, - TEAM_ATTR_PORT_MAX = __TEAM_ATTR_PORT_MAX - 1, + TEAM_ATTR_PORT_MAX = (__TEAM_ATTR_PORT_MAX - 1) }; -/* - * NETLINK_GENERIC related info - */ -#define TEAM_GENL_NAME "team" -#define TEAM_GENL_VERSION 0x1 -#define TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME "change_event" +enum { + TEAM_CMD_NOOP, + TEAM_CMD_OPTIONS_SET, + TEAM_CMD_OPTIONS_GET, + TEAM_CMD_PORT_LIST_GET, + + __TEAM_CMD_MAX, + TEAM_CMD_MAX = (__TEAM_CMD_MAX - 1) +}; -#endif /* _UAPI_LINUX_IF_TEAM_H_ */ +#endif /* _UAPI_LINUX_IF_TEAM_H */ diff --git a/kernel/configs/x86_debug.config b/kernel/configs/x86_debug.config index 35f48671b8d5c..780788ef15457 100644 --- a/kernel/configs/x86_debug.config +++ b/kernel/configs/x86_debug.config @@ -1,5 +1,4 @@ # Help: Debugging options for tip tree testing -CONFIG_X86_DEBUG_FPU=y CONFIG_LOCK_STAT=y CONFIG_DEBUG_VM=y CONFIG_DEBUG_VM_VMACACHE=y @@ -10,9 +9,7 @@ CONFIG_DEBUG_PAGEALLOC=y CONFIG_SLUB_DEBUG_ON=y CONFIG_DEBUG_OBJECTS=y CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1 -CONFIG_GCOV_KERNEL=y CONFIG_LOCKDEP=y CONFIG_PROVE_LOCKING=y -CONFIG_SCHEDSTATS=y -CONFIG_NOINSTR_VALIDATION=y -CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y +CONFIG_NET_NS_REFCNT_TRACKER=y +CONFIG_DEBUG_NET=y diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index a3b68243fd4b1..cf5219df7903c 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -117,17 +117,15 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[], return err; } if (data[IFLA_VLAN_INGRESS_QOS]) { - nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) { - if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING) - continue; + nla_for_each_nested_type(attr, IFLA_VLAN_QOS_MAPPING, + data[IFLA_VLAN_INGRESS_QOS], rem) { m = nla_data(attr); vlan_dev_set_ingress_priority(dev, m->to, m->from); } } if (data[IFLA_VLAN_EGRESS_QOS]) { - nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) { - if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING) - continue; + nla_for_each_nested_type(attr, IFLA_VLAN_QOS_MAPPING, + data[IFLA_VLAN_EGRESS_QOS], rem) { m = nla_data(attr); err = vlan_dev_set_egress_priority(dev, m->from, m->to); if (err) diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c index d945b7c0176db..7aebfe9032425 100644 --- a/net/appletalk/sysctl_net_atalk.c +++ b/net/appletalk/sysctl_net_atalk.c @@ -40,7 +40,6 @@ static struct ctl_table atalk_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { }, }; static struct ctl_table_header *atalk_table_header; diff --git a/net/atm/common.c b/net/atm/common.c index 2a1ec014e901d..9b75699992ff9 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -116,7 +116,7 @@ static void vcc_write_space(struct sock *sk) if (skwq_has_sleeper(wq)) wake_up_interruptible(&wq->wait); - sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); + sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT); } rcu_read_unlock(); diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index c5462486dbca1..282ec581c0720 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -105,7 +105,7 @@ void ax25_dev_device_down(struct net_device *dev) spin_lock_bh(&ax25_dev_lock); #ifdef CONFIG_AX25_DAMA_SLAVE - ax25_ds_del_timer(ax25_dev); + timer_shutdown_sync(&ax25_dev->dama.slave_timer); #endif /* diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c index db66e11e7fe8b..e55be8817a1e4 100644 --- a/net/ax25/sysctl_net_ax25.c +++ b/net/ax25/sysctl_net_ax25.c @@ -141,8 +141,6 @@ static const struct ctl_table ax25_param_table[] = { .extra2 = &max_ds_timeout }, #endif - - { } /* that's all, folks! */ }; int ax25_register_dev_sysctl(ax25_dev *ax25_dev) @@ -155,7 +153,7 @@ int ax25_register_dev_sysctl(ax25_dev *ax25_dev) if (!table) return -ENOMEM; - for (k = 0; k < AX25_MAX_VALUES; k++) + for (k = 0; k < AX25_MAX_VALUES && k < ARRAY_SIZE(ax25_param_table); k++) table[k].data = &ax25_dev->values[k]; snprintf(path, sizeof(path), "net/ax25/%s", ax25_dev->dev->name); diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 75119f1ffcccf..8e0f44c71696f 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -38,6 +37,7 @@ #include #include #include +#include #include #include #include diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 0954757f0b8b8..9362cd9d6f3d3 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 1690ae57a09db..a7028d38c1f5c 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2874,7 +2874,7 @@ static void hci_cancel_cmd_sync(struct hci_dev *hdev, int err) cancel_delayed_work_sync(&hdev->ncmd_timer); atomic_set(&hdev->cmd_cnt, 1); - hci_cmd_sync_cancel_sync(hdev, -err); + hci_cmd_sync_cancel_sync(hdev, err); } /* Suspend HCI device */ @@ -2894,7 +2894,7 @@ int hci_suspend_dev(struct hci_dev *hdev) return 0; /* Cancel potentially blocking sync operation before suspend */ - hci_cancel_cmd_sync(hdev, -EHOSTDOWN); + hci_cancel_cmd_sync(hdev, EHOSTDOWN); hci_req_sync_lock(hdev); ret = hci_suspend_sync(hdev); @@ -4210,7 +4210,7 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) err = hci_send_frame(hdev, skb); if (err < 0) { - hci_cmd_sync_cancel_sync(hdev, err); + hci_cmd_sync_cancel_sync(hdev, -err); return; } diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index 233453807b509..ce3ff2fa72e58 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -218,10 +218,12 @@ static int conn_info_min_age_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val > hdev->conn_info_max_age) + hci_dev_lock(hdev); + if (val == 0 || val > hdev->conn_info_max_age) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->conn_info_min_age = val; hci_dev_unlock(hdev); @@ -246,10 +248,12 @@ static int conn_info_max_age_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val < hdev->conn_info_min_age) + hci_dev_lock(hdev); + if (val == 0 || val < hdev->conn_info_min_age) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->conn_info_max_age = val; hci_dev_unlock(hdev); @@ -567,10 +571,12 @@ static int sniff_min_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val % 2 || val > hdev->sniff_max_interval) + hci_dev_lock(hdev); + if (val == 0 || val % 2 || val > hdev->sniff_max_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->sniff_min_interval = val; hci_dev_unlock(hdev); @@ -595,10 +601,12 @@ static int sniff_max_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val % 2 || val < hdev->sniff_min_interval) + hci_dev_lock(hdev); + if (val == 0 || val % 2 || val < hdev->sniff_min_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->sniff_max_interval = val; hci_dev_unlock(hdev); @@ -850,10 +858,12 @@ static int conn_min_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) + hci_dev_lock(hdev); + if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_conn_min_interval = val; hci_dev_unlock(hdev); @@ -878,10 +888,12 @@ static int conn_max_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) + hci_dev_lock(hdev); + if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_conn_max_interval = val; hci_dev_unlock(hdev); @@ -990,10 +1002,12 @@ static int adv_min_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) + hci_dev_lock(hdev); + if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_adv_min_interval = val; hci_dev_unlock(hdev); @@ -1018,10 +1032,12 @@ static int adv_max_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) + hci_dev_lock(hdev); + if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_adv_max_interval = val; hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 4ae2248240121..a8b8cfebe0180 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3208,6 +3208,31 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, if (test_bit(HCI_ENCRYPT, &hdev->flags)) set_bit(HCI_CONN_ENCRYPT, &conn->flags); + /* "Link key request" completed ahead of "connect request" completes */ + if (ev->encr_mode == 1 && !test_bit(HCI_CONN_ENCRYPT, &conn->flags) && + ev->link_type == ACL_LINK) { + struct link_key *key; + struct hci_cp_read_enc_key_size cp; + + key = hci_find_link_key(hdev, &ev->bdaddr); + if (key) { + set_bit(HCI_CONN_ENCRYPT, &conn->flags); + + if (!(hdev->commands[20] & 0x10)) { + conn->enc_key_size = HCI_LINK_KEY_SIZE; + } else { + cp.handle = cpu_to_le16(conn->handle); + if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE, + sizeof(cp), &cp)) { + bt_dev_err(hdev, "sending read key size failed"); + conn->enc_key_size = HCI_LINK_KEY_SIZE; + } + } + + hci_encrypt_cfm(conn, ev->status); + } + } + /* Get remote features */ if (conn->type == ACL_LINK) { struct hci_cp_read_remote_features cp; diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index f6b662369322b..8fe02921adf15 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -617,7 +617,10 @@ void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err) bt_dev_dbg(hdev, "err 0x%2.2x", err); if (hdev->req_status == HCI_REQ_PEND) { - hdev->req_result = err; + /* req_result is __u32 so error must be positive to be properly + * propagated. + */ + hdev->req_result = err < 0 ? -err : err; hdev->req_status = HCI_REQ_CANCELED; wake_up_interruptible(&hdev->req_wait_q); @@ -3416,7 +3419,10 @@ static void hci_dev_get_bd_addr_from_property(struct hci_dev *hdev) if (ret < 0 || !bacmp(&ba, BDADDR_ANY)) return; - bacpy(&hdev->public_addr, &ba); + if (test_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks)) + baswap(&hdev->public_addr, &ba); + else + bacpy(&hdev->public_addr, &ba); } struct hci_init_stage { diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 35e10c5a766d5..d31f57ffe9859 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -1219,7 +1219,6 @@ static struct ctl_table brnf_table[] = { .mode = 0644, .proc_handler = brnf_sysctl_call_tables, }, - { } }; static inline void br_netfilter_sysctl_default(struct brnf_net *brnf) diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 6c4d90b24d467..bc01b3aa6b0fa 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -496,27 +496,22 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs) if (!bpf_capable()) return ERR_PTR(-EPERM); - nla_for_each_nested(nla, nla_stgs, rem) { - if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) { - if (nla_len(nla) != sizeof(u32)) - return ERR_PTR(-EINVAL); - nr_maps++; - } + nla_for_each_nested_type(nla, SK_DIAG_BPF_STORAGE_REQ_MAP_FD, + nla_stgs, rem) { + if (nla_len(nla) != sizeof(u32)) + return ERR_PTR(-EINVAL); + nr_maps++; } diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL); if (!diag) return ERR_PTR(-ENOMEM); - nla_for_each_nested(nla, nla_stgs, rem) { - struct bpf_map *map; - int map_fd; - - if (nla_type(nla) != SK_DIAG_BPF_STORAGE_REQ_MAP_FD) - continue; + nla_for_each_nested_type(nla, SK_DIAG_BPF_STORAGE_REQ_MAP_FD, + nla_stgs, rem) { + int map_fd = nla_get_u32(nla); + struct bpf_map *map = bpf_map_get(map_fd); - map_fd = nla_get_u32(nla); - map = bpf_map_get(map_fd); if (IS_ERR(map)) { err = PTR_ERR(map); goto err_free; diff --git a/net/core/dev.c b/net/core/dev.c index 5d36a634f468f..5a5b5683358a2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -455,7 +455,7 @@ EXPORT_PER_CPU_SYMBOL(softnet_data); * PP consumers must pay attention to run APIs in the appropriate context * (e.g. NAPI context). */ -static DEFINE_PER_CPU_ALIGNED(struct page_pool *, system_page_pool); +static DEFINE_PER_CPU(struct page_pool *, system_page_pool); #ifdef CONFIG_LOCKDEP /* @@ -2083,6 +2083,11 @@ void net_dec_egress_queue(void) EXPORT_SYMBOL_GPL(net_dec_egress_queue); #endif +#ifdef CONFIG_NET_CLS_ACT +DEFINE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key); +EXPORT_SYMBOL(tcf_bypass_check_needed_key); +#endif + DEFINE_STATIC_KEY_FALSE(netstamp_needed_key); EXPORT_SYMBOL(netstamp_needed_key); #ifdef CONFIG_JUMP_LABEL @@ -3937,6 +3942,11 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb, if (!miniq) return ret; + if (static_branch_unlikely(&tcf_bypass_check_needed_key)) { + if (tcf_block_bypass_sw(miniq->block)) + return ret; + } + tc_skb_cb(skb)->mru = 0; tc_skb_cb(skb)->post_ct = false; tcf_set_drop_reason(skb, *drop_reason); @@ -4518,7 +4528,7 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, out: #endif rflow->last_qtail = - per_cpu(softnet_data, next_cpu).input_queue_head; + READ_ONCE(per_cpu(softnet_data, next_cpu).input_queue_head); } rflow->cpu = next_cpu; @@ -4600,8 +4610,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, */ if (unlikely(tcpu != next_cpu) && (tcpu >= nr_cpu_ids || !cpu_online(tcpu) || - ((int)(per_cpu(softnet_data, tcpu).input_queue_head - - rflow->last_qtail)) >= 0)) { + ((int)(READ_ONCE(per_cpu(softnet_data, tcpu).input_queue_head) - + READ_ONCE(rflow->last_qtail))) >= 0)) { tcpu = next_cpu; rflow = set_rps_cpu(dev, skb, rflow, next_cpu); } @@ -4655,8 +4665,8 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, rflow = &flow_table->flows[flow_id]; cpu = READ_ONCE(rflow->cpu); if (rflow->filter == filter_id && cpu < nr_cpu_ids && - ((int)(per_cpu(softnet_data, cpu).input_queue_head - - rflow->last_qtail) < + ((int)(READ_ONCE(per_cpu(softnet_data, cpu).input_queue_head) - + READ_ONCE(rflow->last_qtail)) < (int)(10 * flow_table->mask))) expire = false; } @@ -4790,37 +4800,45 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, struct softnet_data *sd; unsigned long flags; unsigned int qlen; + int max_backlog; + u32 tail; + + reason = SKB_DROP_REASON_DEV_READY; + if (!netif_running(skb->dev)) + goto bad_dev; - reason = SKB_DROP_REASON_NOT_SPECIFIED; + reason = SKB_DROP_REASON_CPU_BACKLOG; sd = &per_cpu(softnet_data, cpu); + qlen = skb_queue_len_lockless(&sd->input_pkt_queue); + max_backlog = READ_ONCE(net_hotdata.max_backlog); + if (unlikely(qlen > max_backlog)) + goto cpu_backlog_drop; backlog_lock_irq_save(sd, &flags); - if (!netif_running(skb->dev)) - goto drop; qlen = skb_queue_len(&sd->input_pkt_queue); - if (qlen <= READ_ONCE(net_hotdata.max_backlog) && - !skb_flow_limit(skb, qlen)) { - if (qlen) { -enqueue: - __skb_queue_tail(&sd->input_pkt_queue, skb); - input_queue_tail_incr_save(sd, qtail); - backlog_unlock_irq_restore(sd, &flags); - return NET_RX_SUCCESS; + if (qlen <= max_backlog && !skb_flow_limit(skb, qlen)) { + if (!qlen) { + /* Schedule NAPI for backlog device. We can use + * non atomic operation as we own the queue lock. + */ + if (!__test_and_set_bit(NAPI_STATE_SCHED, + &sd->backlog.state)) + napi_schedule_rps(sd); } + __skb_queue_tail(&sd->input_pkt_queue, skb); + tail = rps_input_queue_tail_incr(sd); + backlog_unlock_irq_restore(sd, &flags); - /* Schedule NAPI for backlog device - * We can use non atomic operation since we own the queue lock - */ - if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) - napi_schedule_rps(sd); - goto enqueue; + /* save the tail outside of the critical section */ + rps_input_queue_tail_save(qtail, tail); + return NET_RX_SUCCESS; } - reason = SKB_DROP_REASON_CPU_BACKLOG; -drop: - sd->dropped++; backlog_unlock_irq_restore(sd, &flags); +cpu_backlog_drop: + atomic_inc(&sd->dropped); +bad_dev: dev_core_stats_rx_dropped_inc(skb->dev); kfree_skb_reason(skb, reason); return NET_RX_DROP; @@ -5890,7 +5908,7 @@ static void flush_backlog(struct work_struct *work) if (skb->dev->reg_state == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->input_pkt_queue); dev_kfree_skb_irq(skb); - input_queue_head_incr(sd); + rps_input_queue_head_incr(sd); } } backlog_unlock_irq_enable(sd); @@ -5899,7 +5917,7 @@ static void flush_backlog(struct work_struct *work) if (skb->dev->reg_state == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->process_queue); kfree_skb(skb); - input_queue_head_incr(sd); + rps_input_queue_head_incr(sd); } } local_bh_enable(); @@ -6027,9 +6045,10 @@ static int process_backlog(struct napi_struct *napi, int quota) rcu_read_lock(); __netif_receive_skb(skb); rcu_read_unlock(); - input_queue_head_incr(sd); - if (++work >= quota) + if (++work >= quota) { + rps_input_queue_head_add(sd, work); return work; + } } @@ -6052,6 +6071,8 @@ static int process_backlog(struct napi_struct *napi, int quota) backlog_unlock_irq_enable(sd); } + if (work) + rps_input_queue_head_add(sd, work); return work; } @@ -11441,11 +11462,11 @@ static int dev_cpu_dead(unsigned int oldcpu) /* Process offline CPU's input_pkt_queue */ while ((skb = __skb_dequeue(&oldsd->process_queue))) { netif_rx(skb); - input_queue_head_incr(oldsd); + rps_input_queue_head_incr(oldsd); } while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) { netif_rx(skb); - input_queue_head_incr(oldsd); + rps_input_queue_head_incr(oldsd); } return 0; diff --git a/net/core/dev.h b/net/core/dev.h index 2bcaf8eee50c1..8572d2c8dc4ad 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -4,11 +4,9 @@ #include #include +#include struct net; -struct net_device; -struct netdev_bpf; -struct netdev_phys_item_id; struct netlink_ext_ack; struct cpumask; @@ -150,4 +148,23 @@ static inline void xdp_do_check_flushed(struct napi_struct *napi) { } #endif struct napi_struct *napi_by_id(unsigned int napi_id); +void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu); + +#define XMIT_RECURSION_LIMIT 8 +static inline bool dev_xmit_recursion(void) +{ + return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > + XMIT_RECURSION_LIMIT); +} + +static inline void dev_xmit_recursion_inc(void) +{ + __this_cpu_inc(softnet_data.xmit.recursion); +} + +static inline void dev_xmit_recursion_dec(void) +{ + __this_cpu_dec(softnet_data.xmit.recursion); +} + #endif diff --git a/net/core/gro.c b/net/core/gro.c index ee30d4f0c0387..83f35d99a682c 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -192,8 +192,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) } merge: - /* sk owenrship - if any - completely transferred to the aggregated packet */ + /* sk ownership - if any - completely transferred to the aggregated packet */ skb->destructor = NULL; + skb->sk = NULL; delta_truesize = skb->truesize; if (offset > headlen) { unsigned int eat = offset - headlen; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 552719c3bbc3d..b0327402b3e66 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3728,7 +3728,7 @@ static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write, static struct neigh_sysctl_table { struct ctl_table_header *sysctl_header; - struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1]; + struct ctl_table neigh_vars[NEIGH_VAR_MAX]; } neigh_sysctl_template __read_mostly = { .neigh_vars = { NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"), @@ -3779,7 +3779,6 @@ static struct neigh_sysctl_table { .extra2 = SYSCTL_INT_MAX, .proc_handler = proc_dointvec_minmax, }, - {}, }, }; @@ -3807,8 +3806,6 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, if (dev) { dev_name_source = dev->name; /* Terminate the table early */ - memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0, - sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL])); neigh_vars_size = NEIGH_VAR_BASE_REACHABLE_TIME_MS + 1; } else { struct neigh_table *tbl = p->tbl; diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index a97eceb84e61e..fa6d3969734a6 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -144,7 +144,8 @@ static int softnet_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x " "%08x %08x\n", - sd->processed, sd->dropped, sd->time_squeeze, 0, + sd->processed, atomic_read(&sd->dropped), + sd->time_squeeze, 0, 0, 0, 0, 0, /* was fastroute */ 0, /* was cpu_collision */ sd->received_rps, flow_limit_count, diff --git a/net/core/page_pool.c b/net/core/page_pool.c index dd364d738c006..4c175091fc0ab 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -690,8 +690,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page, page_pool_dma_sync_for_device(pool, page, dma_sync_size); - if (allow_direct && in_softirq() && - page_pool_recycle_in_cache(page, pool)) + if (allow_direct && page_pool_recycle_in_cache(page, pool)) return NULL; /* Page found as candidate for recycling */ @@ -716,9 +715,35 @@ __page_pool_put_page(struct page_pool *pool, struct page *page, return NULL; } +static bool page_pool_napi_local(const struct page_pool *pool) +{ + const struct napi_struct *napi; + u32 cpuid; + + if (unlikely(!in_softirq())) + return false; + + /* Allow direct recycle if we have reasons to believe that we are + * in the same context as the consumer would run, so there's + * no possible race. + * __page_pool_put_page() makes sure we're not in hardirq context + * and interrupts are enabled prior to accessing the cache. + */ + cpuid = smp_processor_id(); + if (READ_ONCE(pool->cpuid) == cpuid) + return true; + + napi = READ_ONCE(pool->p.napi); + + return napi && READ_ONCE(napi->list_owner) == cpuid; +} + void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, unsigned int dma_sync_size, bool allow_direct) { + if (!allow_direct) + allow_direct = page_pool_napi_local(pool); + page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct); if (page && !page_pool_recycle_in_ring(pool, page)) { /* Cache full, fallback to free pages */ @@ -747,8 +772,11 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, int count) { int i, bulk_len = 0; + bool allow_direct; bool in_softirq; + allow_direct = page_pool_napi_local(pool); + for (i = 0; i < count; i++) { struct page *page = virt_to_head_page(data[i]); @@ -756,13 +784,13 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, if (!page_pool_is_last_ref(page)) continue; - page = __page_pool_put_page(pool, page, -1, false); + page = __page_pool_put_page(pool, page, -1, allow_direct); /* Approved for bulk recycling in ptr_ring cache */ if (page) data[bulk_len++] = page; } - if (unlikely(!bulk_len)) + if (!bulk_len) return; /* Bulk producer into ptr_ring page_pool cache */ @@ -969,7 +997,7 @@ void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), static void page_pool_disable_direct_recycling(struct page_pool *pool) { /* Disable direct recycling based on pool->cpuid. - * Paired with READ_ONCE() in napi_pp_put_page(). + * Paired with READ_ONCE() in page_pool_napi_local(). */ WRITE_ONCE(pool->cpuid, -1); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a3d7847ce69d3..283e42f48af68 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -5245,15 +5245,14 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (br_spec) { - nla_for_each_nested(attr, br_spec, rem) { - if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { - if (nla_len(attr) < sizeof(flags)) - return -EINVAL; + nla_for_each_nested_type(attr, IFLA_BRIDGE_FLAGS, br_spec, + rem) { + if (nla_len(attr) < sizeof(flags)) + return -EINVAL; - have_flags = true; - flags = nla_get_u16(attr); - break; - } + have_flags = true; + flags = nla_get_u16(attr); + break; } } diff --git a/net/core/scm.c b/net/core/scm.c index 9cd4b0a01cd60..5763f33203586 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -89,6 +89,11 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) fpl->count_unix = 0; fpl->max = SCM_MAX_FD; fpl->user = NULL; +#if IS_ENABLED(CONFIG_UNIX) + fpl->inflight = false; + fpl->edges = NULL; + INIT_LIST_HEAD(&fpl->vertices); +#endif } fpp = &fpl->fp[fpl->count]; @@ -376,8 +381,14 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) if (new_fpl) { for (i = 0; i < fpl->count; i++) get_file(fpl->fp[i]); + new_fpl->max = new_fpl->count; new_fpl->user = get_uid(fpl->user); +#if IS_ENABLED(CONFIG_UNIX) + new_fpl->inflight = false; + new_fpl->edges = NULL; + INIT_LIST_HEAD(&new_fpl->vertices); +#endif } return new_fpl; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 17617c29be2df..2a5ce6667bbb9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -775,10 +775,9 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, EXPORT_SYMBOL(__netdev_alloc_skb); /** - * __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance + * napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance * @napi: napi instance this buffer was allocated for * @len: length to allocate - * @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages * * Allocate a new sk_buff for use in NAPI receive. This buffer will * attempt to allocate the head from a special reserved region used @@ -787,9 +786,9 @@ EXPORT_SYMBOL(__netdev_alloc_skb); * * %NULL is returned if there is no free memory. */ -struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, - gfp_t gfp_mask) +struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len) { + gfp_t gfp_mask = GFP_ATOMIC | __GFP_NOWARN; struct napi_alloc_cache *nc; struct sk_buff *skb; bool pfmemalloc; @@ -860,7 +859,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, skb_fail: return skb; } -EXPORT_SYMBOL(__napi_alloc_skb); +EXPORT_SYMBOL(napi_alloc_skb); void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem, int off, int size, unsigned int truesize) @@ -1005,11 +1004,8 @@ int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, EXPORT_SYMBOL(skb_cow_data_for_xdp); #if IS_ENABLED(CONFIG_PAGE_POOL) -bool napi_pp_put_page(struct page *page, bool napi_safe) +bool napi_pp_put_page(struct page *page) { - bool allow_direct = false; - struct page_pool *pp; - page = compound_head(page); /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation @@ -1022,39 +1018,18 @@ bool napi_pp_put_page(struct page *page, bool napi_safe) if (unlikely(!is_pp_page(page))) return false; - pp = page->pp; - - /* Allow direct recycle if we have reasons to believe that we are - * in the same context as the consumer would run, so there's - * no possible race. - * __page_pool_put_page() makes sure we're not in hardirq context - * and interrupts are enabled prior to accessing the cache. - */ - if (napi_safe || in_softirq()) { - const struct napi_struct *napi = READ_ONCE(pp->p.napi); - unsigned int cpuid = smp_processor_id(); - - allow_direct = napi && READ_ONCE(napi->list_owner) == cpuid; - allow_direct |= READ_ONCE(pp->cpuid) == cpuid; - } - - /* Driver set this to memory recycling info. Reset it on recycle. - * This will *not* work for NIC using a split-page memory model. - * The page will be returned to the pool here regardless of the - * 'flipped' fragment being in use or not. - */ - page_pool_put_full_page(pp, page, allow_direct); + page_pool_put_full_page(page->pp, page, false); return true; } EXPORT_SYMBOL(napi_pp_put_page); #endif -static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe) +static bool skb_pp_recycle(struct sk_buff *skb, void *data) { if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle) return false; - return napi_pp_put_page(virt_to_page(data), napi_safe); + return napi_pp_put_page(virt_to_page(data)); } /** @@ -1096,12 +1071,12 @@ static void skb_kfree_head(void *head, unsigned int end_offset) kfree(head); } -static void skb_free_head(struct sk_buff *skb, bool napi_safe) +static void skb_free_head(struct sk_buff *skb) { unsigned char *head = skb->head; if (skb->head_frag) { - if (skb_pp_recycle(skb, head, napi_safe)) + if (skb_pp_recycle(skb, head)) return; skb_free_frag(head); } else { @@ -1109,8 +1084,7 @@ static void skb_free_head(struct sk_buff *skb, bool napi_safe) } } -static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason, - bool napi_safe) +static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason) { struct skb_shared_info *shinfo = skb_shinfo(skb); int i; @@ -1127,13 +1101,13 @@ static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason, } for (i = 0; i < shinfo->nr_frags; i++) - napi_frag_unref(&shinfo->frags[i], skb->pp_recycle, napi_safe); + napi_frag_unref(&shinfo->frags[i], skb->pp_recycle); free_head: if (shinfo->frag_list) kfree_skb_list_reason(shinfo->frag_list, reason); - skb_free_head(skb, napi_safe); + skb_free_head(skb); exit: /* When we clone an SKB we copy the reycling bit. The pp_recycle * bit is only set on the head though, so in order to avoid races @@ -1194,12 +1168,11 @@ void skb_release_head_state(struct sk_buff *skb) } /* Free everything but the sk_buff shell. */ -static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason, - bool napi_safe) +static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason) { skb_release_head_state(skb); if (likely(skb->head)) - skb_release_data(skb, reason, napi_safe); + skb_release_data(skb, reason); } /** @@ -1213,7 +1186,7 @@ static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason, void __kfree_skb(struct sk_buff *skb) { - skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED, false); + skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED); kfree_skbmem(skb); } EXPORT_SYMBOL(__kfree_skb); @@ -1270,7 +1243,7 @@ static void kfree_skb_add_bulk(struct sk_buff *skb, return; } - skb_release_all(skb, reason, false); + skb_release_all(skb, reason); sa->skb_array[sa->skb_count++] = skb; if (unlikely(sa->skb_count == KFREE_SKB_BULK_SIZE)) { @@ -1444,7 +1417,7 @@ EXPORT_SYMBOL(consume_skb); void __consume_stateless_skb(struct sk_buff *skb) { trace_consume_skb(skb, __builtin_return_address(0)); - skb_release_data(skb, SKB_CONSUMED, false); + skb_release_data(skb, SKB_CONSUMED); kfree_skbmem(skb); } @@ -1471,7 +1444,7 @@ static void napi_skb_cache_put(struct sk_buff *skb) void __napi_kfree_skb(struct sk_buff *skb, enum skb_drop_reason reason) { - skb_release_all(skb, reason, true); + skb_release_all(skb, reason); napi_skb_cache_put(skb); } @@ -1509,7 +1482,7 @@ void napi_consume_skb(struct sk_buff *skb, int budget) return; } - skb_release_all(skb, SKB_CONSUMED, !!budget); + skb_release_all(skb, SKB_CONSUMED); napi_skb_cache_put(skb); } EXPORT_SYMBOL(napi_consume_skb); @@ -1640,7 +1613,7 @@ EXPORT_SYMBOL_GPL(alloc_skb_for_msg); */ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) { - skb_release_all(dst, SKB_CONSUMED, false); + skb_release_all(dst, SKB_CONSUMED); return __skb_clone(dst, src); } EXPORT_SYMBOL_GPL(skb_morph); @@ -2272,9 +2245,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); - skb_release_data(skb, SKB_CONSUMED, false); + skb_release_data(skb, SKB_CONSUMED); } else { - skb_free_head(skb, false); + skb_free_head(skb); } off = (data + nhead) - skb->head; @@ -6575,12 +6548,12 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, skb_frag_ref(skb, i); if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); - skb_release_data(skb, SKB_CONSUMED, false); + skb_release_data(skb, SKB_CONSUMED); } else { /* we can reuse existing recount- all we did was * relocate values */ - skb_free_head(skb, false); + skb_free_head(skb); } skb->head = data; @@ -6715,7 +6688,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, skb_kfree_head(data, size); return -ENOMEM; } - skb_release_data(skb, SKB_CONSUMED, false); + skb_release_data(skb, SKB_CONSUMED); skb->head = data; skb->head_frag = 0; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 4d75ef9d24bfa..67c4c01c52359 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -552,7 +552,9 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, msg->skb = skb; sk_psock_queue_msg(psock, msg); + read_lock_bh(&sk->sk_callback_lock); sk_psock_data_ready(sk, psock); + read_unlock_bh(&sk->sk_callback_lock); return copied; } diff --git a/net/core/sock.c b/net/core/sock.c index 0963689a59506..5ed411231fc7b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3338,7 +3338,7 @@ static void sock_def_error_report(struct sock *sk) wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_poll(&wq->wait, EPOLLERR); - sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); + sk_wake_async_rcu(sk, SOCK_WAKE_IO, POLL_ERR); rcu_read_unlock(); } @@ -3353,7 +3353,7 @@ void sock_def_readable(struct sock *sk) if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI | EPOLLRDNORM | EPOLLRDBAND); - sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); + sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN); rcu_read_unlock(); } @@ -3373,7 +3373,7 @@ static void sock_def_write_space(struct sock *sk) EPOLLWRNORM | EPOLLWRBAND); /* Should agree with poll, otherwise some programs break */ - sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); + sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT); } rcu_read_unlock(); @@ -3398,7 +3398,7 @@ static void sock_def_write_space_wfree(struct sock *sk) EPOLLWRNORM | EPOLLWRBAND); /* Should agree with poll, otherwise some programs break */ - sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); + sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT); } } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 6973dda3abda6..46f5143e86be1 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -660,7 +660,6 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, }, - { } }; static struct ctl_table netns_core_table[] = { @@ -697,7 +696,6 @@ static struct ctl_table netns_core_table[] = { .extra2 = SYSCTL_ONE, .proc_handler = proc_dou8vec_minmax, }, - { } }; static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str) @@ -715,7 +713,8 @@ __setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup); static __net_init int sysctl_core_net_init(struct net *net) { - struct ctl_table *tbl, *tmp; + struct ctl_table *tbl; + size_t table_size = ARRAY_SIZE(netns_core_table); tbl = netns_core_table; if (!net_eq(net, &init_net)) { @@ -723,8 +722,8 @@ static __net_init int sysctl_core_net_init(struct net *net) if (tbl == NULL) goto err_dup; - for (tmp = tbl; tmp->procname; tmp++) - tmp->data += (char *)net - (char *)&init_net; + for (int i = 0; i < table_size; ++i) + (tbl + i)->data += (char *)net - (char *)&init_net; } net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 44b033fe1ef68..9fc9cea4c251b 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1039,7 +1039,7 @@ static void __net_exit dccp_v4_exit_net(struct net *net) static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list) { - inet_twsk_purge(&dccp_hashinfo, AF_INET); + inet_twsk_purge(&dccp_hashinfo); } static struct pernet_operations dccp_v4_ops = { diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index ded07e09f8135..c8ca703dc331a 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1119,15 +1119,9 @@ static void __net_exit dccp_v6_exit_net(struct net *net) inet_ctl_sock_destroy(pn->v6_ctl_sk); } -static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list) -{ - inet_twsk_purge(&dccp_hashinfo, AF_INET6); -} - static struct pernet_operations dccp_v6_ops = { .init = dccp_v6_init_net, .exit = dccp_v6_exit_net, - .exit_batch = dccp_v6_exit_batch, .id = &dccp_v6_pernet_id, .size = sizeof(struct dccp_v6_pernet), }; diff --git a/net/dccp/output.c b/net/dccp/output.c index fd2eb148d24de..5c2e24f3c39b7 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -204,7 +204,7 @@ void dccp_write_space(struct sock *sk) wake_up_interruptible(&wq->wait); /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) - sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); + sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT); rcu_read_unlock(); } diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index ee8d4f5afa722..3fc474d6e57dd 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -90,8 +90,6 @@ static struct ctl_table dccp_default_table[] = { .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, }, - - { } }; static struct ctl_table_header *dccp_table_header; diff --git a/net/devlink/core.c b/net/devlink/core.c index 7f0b093208d75..f49cd83f1955f 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -314,7 +314,7 @@ static void devlink_release(struct work_struct *work) mutex_destroy(&devlink->lock); lockdep_unregister_key(&devlink->lock_key); put_device(devlink->dev); - kfree(devlink); + kvfree(devlink); } void devlink_put(struct devlink *devlink) @@ -420,7 +420,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, if (!devlink_reload_actions_valid(ops)) return NULL; - devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL); + devlink = kvzalloc(struct_size(devlink, priv, priv_size), GFP_KERNEL); if (!devlink) return NULL; @@ -455,7 +455,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, return devlink; err_xa_alloc: - kfree(devlink); + kvfree(devlink); return NULL; } EXPORT_SYMBOL_GPL(devlink_alloc_ns); diff --git a/net/devlink/dev.c b/net/devlink/dev.c index 19dbf540748ab..c609deb42e889 100644 --- a/net/devlink/dev.c +++ b/net/devlink/dev.c @@ -1202,17 +1202,13 @@ static void __devlink_compat_running_version(struct devlink *devlink, if (err) goto free_msg; - nla_for_each_attr(nlattr, (void *)msg->data, msg->len, rem) { + nla_for_each_attr_type(nlattr, DEVLINK_ATTR_INFO_VERSION_RUNNING, + (void *)msg->data, msg->len, rem) { const struct nlattr *kv; int rem_kv; - if (nla_type(nlattr) != DEVLINK_ATTR_INFO_VERSION_RUNNING) - continue; - - nla_for_each_nested(kv, nlattr, rem_kv) { - if (nla_type(kv) != DEVLINK_ATTR_INFO_VERSION_VALUE) - continue; - + nla_for_each_nested_type(kv, DEVLINK_ATTR_INFO_VERSION_VALUE, + nlattr, rem_kv) { strlcat(buf, nla_data(kv), len); strlcat(buf, " ", len); } diff --git a/net/ethtool/module.c b/net/ethtool/module.c index ceb575efc290c..9f63a276357ec 100644 --- a/net/ethtool/module.c +++ b/net/ethtool/module.c @@ -12,7 +12,7 @@ struct module_req_info { struct module_reply_data { struct ethnl_reply_data base; - struct ethtool_module_power_mode_params power; + struct ethtool_module_power_params power; }; #define MODULE_REPDATA(__reply_base) \ @@ -24,16 +24,16 @@ const struct nla_policy ethnl_module_get_policy[ETHTOOL_A_MODULE_HEADER + 1] = { [ETHTOOL_A_MODULE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), }; -static int module_get_power_mode(struct net_device *dev, - struct module_reply_data *data, - struct netlink_ext_ack *extack) +static int module_get_power_cfg(struct net_device *dev, + struct module_reply_data *data, + struct netlink_ext_ack *extack) { const struct ethtool_ops *ops = dev->ethtool_ops; - if (!ops->get_module_power_mode) + if (!ops->get_module_power_cfg) return 0; - return ops->get_module_power_mode(dev, &data->power, extack); + return ops->get_module_power_cfg(dev, &data->power, extack); } static int module_prepare_data(const struct ethnl_req_info *req_base, @@ -48,7 +48,7 @@ static int module_prepare_data(const struct ethnl_req_info *req_base, if (ret < 0) return ret; - ret = module_get_power_mode(dev, data, info->extack); + ret = module_get_power_cfg(dev, data, info->extack); if (ret < 0) goto out_complete; @@ -69,6 +69,15 @@ static int module_reply_size(const struct ethnl_req_info *req_base, if (data->power.mode) len += nla_total_size(sizeof(u8)); /* _MODULE_POWER_MODE */ + if (data->power.min_pwr_allowed) + len += nla_total_size(sizeof(u32)); /* _MIN_POWER_ALLOWED */ + + if (data->power.max_pwr_allowed) + len += nla_total_size(sizeof(u32)); /* _MAX_POWER_ALLOWED */ + + if (data->power.max_pwr_set) + len += nla_total_size(sizeof(u32)); /* _MAX_POWER_SET */ + return len; } @@ -77,6 +86,7 @@ static int module_fill_reply(struct sk_buff *skb, const struct ethnl_reply_data *reply_base) { const struct module_reply_data *data = MODULE_REPDATA(reply_base); + u32 temp; if (data->power.policy && nla_put_u8(skb, ETHTOOL_A_MODULE_POWER_MODE_POLICY, @@ -87,16 +97,30 @@ static int module_fill_reply(struct sk_buff *skb, nla_put_u8(skb, ETHTOOL_A_MODULE_POWER_MODE, data->power.mode)) return -EMSGSIZE; + temp = data->power.min_pwr_allowed; + if (temp && nla_put_u32(skb, ETHTOOL_A_MODULE_MIN_POWER_ALLOWED, temp)) + return -EMSGSIZE; + + temp = data->power.max_pwr_allowed; + if (temp && nla_put_u32(skb, ETHTOOL_A_MODULE_MAX_POWER_ALLOWED, temp)) + return -EMSGSIZE; + + temp = data->power.max_pwr_set; + if (temp && nla_put_u32(skb, ETHTOOL_A_MODULE_MAX_POWER_SET, temp)) + return -EMSGSIZE; + return 0; } /* MODULE_SET */ -const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MODE_POLICY + 1] = { +const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_MAX + 1] = { [ETHTOOL_A_MODULE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), [ETHTOOL_A_MODULE_POWER_MODE_POLICY] = NLA_POLICY_RANGE(NLA_U8, ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH, ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO), + [ETHTOOL_A_MODULE_MAX_POWER_SET] = { .type = NLA_U32 }, + [ETHTOOL_A_MODULE_MAX_POWER_RESET] = { .type = NLA_U8 }, }; static int @@ -106,40 +130,78 @@ ethnl_set_module_validate(struct ethnl_req_info *req_info, const struct ethtool_ops *ops = req_info->dev->ethtool_ops; struct nlattr **tb = info->attrs; - if (!tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY]) + if (!tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY] && + !tb[ETHTOOL_A_MODULE_MAX_POWER_SET] && + !tb[ETHTOOL_A_MODULE_MAX_POWER_RESET]) return 0; - if (!ops->get_module_power_mode || !ops->set_module_power_mode) { - NL_SET_ERR_MSG_ATTR(info->extack, - tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY], - "Setting power mode policy is not supported by this device"); + if (!ops->get_module_power_cfg || !ops->set_module_power_cfg) { + NL_SET_ERR_MSG(info->extack, "Setting power config is not supported by this device"); return -EOPNOTSUPP; } return 1; } +static void +ethnl_update_policy(enum ethtool_module_power_mode_policy *dst, + const struct nlattr *attr, bool *mod) +{ + u8 val = *dst; + + ethnl_update_u8(&val, attr, mod); + + if (mod) + *dst = val; +} + static int ethnl_set_module(struct ethnl_req_info *req_info, struct genl_info *info) { - struct ethtool_module_power_mode_params power = {}; - struct ethtool_module_power_mode_params power_new; - const struct ethtool_ops *ops; + struct ethtool_module_power_params power = {}; + struct ethtool_module_power_params power_new; struct net_device *dev = req_info->dev; struct nlattr **tb = info->attrs; + const struct ethtool_ops *ops; int ret; + bool mod; ops = dev->ethtool_ops; - power_new.policy = nla_get_u8(tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY]); - ret = ops->get_module_power_mode(dev, &power, info->extack); + ret = ops->get_module_power_cfg(dev, &power, info->extack); if (ret < 0) return ret; - if (power_new.policy == power.policy) + power_new.max_pwr_set = power.max_pwr_set; + power_new.policy = power.policy; + + ethnl_update_u32(&power_new.max_pwr_set, + tb[ETHTOOL_A_MODULE_MAX_POWER_SET], &mod); + + if (mod) { + if (power_new.max_pwr_set > power.max_pwr_allowed) { + NL_SET_ERR_MSG(info->extack, "Provided value is higher than maximum allowed"); + return -EINVAL; + } else if (power_new.max_pwr_set < power.min_pwr_allowed) { + NL_SET_ERR_MSG(info->extack, "Provided value is lower than minimum allowed"); + return -EINVAL; + } + } + + ethnl_update_policy(&power_new.policy, + tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY], &mod); + ethnl_update_u8(&power_new.max_pwr_reset, + tb[ETHTOOL_A_MODULE_MAX_POWER_RESET], &mod); + + if (!mod) return 0; - ret = ops->set_module_power_mode(dev, &power_new, info->extack); + if (power_new.max_pwr_reset && power_new.max_pwr_set) { + NL_SET_ERR_MSG(info->extack, "Maximum power set and reset cannot be used at the same time"); + return 0; + } + + ret = ops->set_module_power_cfg(dev, &power_new, info->extack); return ret < 0 ? ret : 1; } diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 9a333a8d04c1f..6282f84811ce5 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -432,7 +432,7 @@ extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_E extern const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_SRC + 1]; extern const struct nla_policy ethnl_phc_vclocks_get_policy[ETHTOOL_A_PHC_VCLOCKS_HEADER + 1]; extern const struct nla_policy ethnl_module_get_policy[ETHTOOL_A_MODULE_HEADER + 1]; -extern const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MODE_POLICY + 1]; +extern const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_MAX + 1]; extern const struct nla_policy ethnl_pse_get_policy[ETHTOOL_A_PSE_HEADER + 1]; extern const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1]; extern const struct nla_policy ethnl_rss_get_policy[ETHTOOL_A_RSS_CONTEXT + 1]; diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index c98b5b71ad7c3..e9d45133d6412 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -132,30 +132,29 @@ static int hsr_dev_open(struct net_device *dev) { struct hsr_priv *hsr; struct hsr_port *port; - char designation; + const char *designation = NULL; hsr = netdev_priv(dev); - designation = '\0'; hsr_for_each_port(hsr, port) { if (port->type == HSR_PT_MASTER) continue; switch (port->type) { case HSR_PT_SLAVE_A: - designation = 'A'; + designation = "Slave A"; break; case HSR_PT_SLAVE_B: - designation = 'B'; + designation = "Slave B"; break; default: - designation = '?'; + designation = "Unknown"; } if (!is_slave_up(port->dev)) - netdev_warn(dev, "Slave %c (%s) is not up; please bring it up to get a fully working HSR network\n", + netdev_warn(dev, "%s (%s) is not up; please bring it up to get a fully working HSR network\n", designation, port->dev->name); } - if (designation == '\0') + if (!designation) netdev_warn(dev, "No slave devices configured\n"); return 0; diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 6dd960ec558cf..09b18ee6df003 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -338,7 +338,6 @@ static struct ctl_table lowpan_frags_ns_ctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { } }; /* secret interval has been deprecated */ @@ -351,7 +350,6 @@ static struct ctl_table lowpan_frags_ctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { } }; static int __net_init lowpan_frags_ns_sysctl_register(struct net *net) @@ -370,10 +368,8 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net) goto err_alloc; /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) { - table[0].procname = NULL; + if (net->user_ns != &init_user_ns) table_size = 0; - } } table[0].data = &ieee802154_lowpan->fqdir->high_thresh; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7a437f0d41905..6195cc5be1fcc 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2515,7 +2515,7 @@ static int ipv4_doint_and_flush(struct ctl_table *ctl, int write, static struct devinet_sysctl_table { struct ctl_table_header *sysctl_header; - struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX]; + struct ctl_table devinet_vars[IPV4_DEVCONF_MAX]; } devinet_sysctl = { .devinet_vars = { DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", @@ -2578,7 +2578,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, if (!t) goto out; - for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) { + for (i = 0; i < ARRAY_SIZE(t->devinet_vars); i++) { t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf; t->devinet_vars[i].extra1 = p; t->devinet_vars[i].extra2 = net; @@ -2652,7 +2652,6 @@ static struct ctl_table ctl_forward_entry[] = { .extra1 = &ipv4_devconf, .extra2 = &init_net, }, - { }, }; #endif diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index d33d124218140..3d647c9a7a21e 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -114,7 +114,7 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb) */ if (req->src != req->dst) for (sg = sg_next(req->src); sg; sg = sg_next(sg)) - skb_page_unref(skb, sg_page(sg), false); + skb_page_unref(skb, sg_page(sg)); } #ifdef CONFIG_INET_ESPINTCP diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index c038e28e2f1e6..3b38610958ee4 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -203,8 +203,15 @@ static bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2, kuid_t sk_uid, bool relax, bool reuseport_cb_ok, bool reuseport_ok) { - if (sk->sk_family == AF_INET && ipv6_only_sock(sk2)) - return false; + if (ipv6_only_sock(sk2)) { + if (sk->sk_family == AF_INET) + return false; + +#if IS_ENABLED(CONFIG_IPV6) + if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) + return false; +#endif + } return inet_bind_conflict(sk, sk2, sk_uid, relax, reuseport_cb_ok, reuseport_ok); @@ -287,6 +294,7 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l struct sock_reuseport *reuseport_cb; struct inet_bind_hashbucket *head2; struct inet_bind2_bucket *tb2; + bool conflict = false; bool reuseport_cb_ok; rcu_read_lock(); @@ -299,18 +307,20 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l spin_lock(&head2->lock); - inet_bind_bucket_for_each(tb2, &head2->chain) - if (inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk)) - break; + inet_bind_bucket_for_each(tb2, &head2->chain) { + if (!inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk)) + continue; - if (tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, - reuseport_ok)) { - spin_unlock(&head2->lock); - return true; + if (!inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, reuseport_ok)) + continue; + + conflict = true; + break; } spin_unlock(&head2->lock); - return false; + + return conflict; } /* diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index e8de45d34d56a..e28075f0006e3 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -264,14 +264,18 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm) EXPORT_SYMBOL_GPL(__inet_twsk_schedule); /* Remove all non full sockets (TIME_WAIT and NEW_SYN_RECV) for dead netns */ -void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family) +void inet_twsk_purge(struct inet_hashinfo *hashinfo) { + struct inet_ehash_bucket *head = &hashinfo->ehash[0]; + unsigned int ehash_mask = hashinfo->ehash_mask; struct hlist_nulls_node *node; unsigned int slot; struct sock *sk; - for (slot = 0; slot <= hashinfo->ehash_mask; slot++) { - struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; + for (slot = 0; slot <= ehash_mask; slot++, head++) { + if (hlist_nulls_empty(&head->chain)) + continue; + restart_rcu: cond_resched(); rcu_read_lock(); @@ -283,15 +287,13 @@ void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family) TCPF_NEW_SYN_RECV)) continue; - if (sk->sk_family != family || - refcount_read(&sock_net(sk)->ns.count)) + if (refcount_read(&sock_net(sk)->ns.count)) continue; if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) continue; - if (unlikely(sk->sk_family != family || - refcount_read(&sock_net(sk)->ns.count))) { + if (refcount_read(&sock_net(sk)->ns.count)) { sock_gen_put(sk); goto restart; } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index fb947d1613fe2..cb1be83748c38 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -580,7 +580,6 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &dist_min, }, - { } }; /* secret interval has been deprecated */ @@ -593,7 +592,6 @@ static struct ctl_table ip4_frags_ctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { } }; static int __net_init ip4_frags_ns_ctl_register(struct net *net) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 7b16c211b9044..57ddcd8c62f67 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -280,8 +280,13 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, tpi->flags | TUNNEL_NO_KEY, iph->saddr, iph->daddr, 0); } else { + if (unlikely(!pskb_may_pull(skb, + gre_hdr_len + sizeof(*ershdr)))) + return PACKET_REJECT; + ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); ver = ershdr->ver; + iph = ip_hdr(skb); tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags | TUNNEL_KEY, iph->saddr, iph->daddr, tpi->key); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c8f76f56dc165..deecfc0e5a91c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3509,7 +3509,6 @@ static struct ctl_table ipv4_route_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { } }; static const char ipv4_route_flush_procname[] = "flush"; @@ -3543,7 +3542,6 @@ static struct ctl_table ipv4_route_netns_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { }, }; static __net_init int sysctl_route_net_init(struct net *net) @@ -3561,16 +3559,14 @@ static __net_init int sysctl_route_net_init(struct net *net) /* Don't export non-whitelisted sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) { - if (tbl[0].procname != ipv4_route_flush_procname) { - tbl[0].procname = NULL; + if (tbl[0].procname != ipv4_route_flush_procname) table_size = 0; - } } /* Update the variables to point into the current struct net * except for the first element flush */ - for (i = 1; i < ARRAY_SIZE(ipv4_route_netns_table) - 1; i++) + for (i = 1; i < table_size; i++) tbl[i].data += (void *)net - (void *)&init_net; } tbl[0].extra1 = net; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 7e4f16a7dcc17..c8dae8fc682a0 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -575,7 +575,6 @@ static struct ctl_table ipv4_table[] = { .extra1 = &sysctl_fib_sync_mem_min, .extra2 = &sysctl_fib_sync_mem_max, }, - { } }; static struct ctl_table ipv4_net_table[] = { @@ -1502,12 +1501,12 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ONE, }, - { } }; static __net_init int ipv4_sysctl_init_net(struct net *net) { struct ctl_table *table; + size_t table_size = ARRAY_SIZE(ipv4_net_table); table = ipv4_net_table; if (!net_eq(net, &init_net)) { @@ -1517,7 +1516,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) if (!table) goto err_alloc; - for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) { + for (i = 0; i < table_size; i++) { if (table[i].data) { /* Update the variables to point into * the current struct net @@ -1533,7 +1532,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) } net->ipv4.ipv4_hdr = register_net_sysctl_sz(net, "net/ipv4", table, - ARRAY_SIZE(ipv4_net_table)); + table_size); if (!net->ipv4.ipv4_hdr) goto err_reg; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5d874817a78db..1b6cd38400120 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6999,7 +6999,7 @@ EXPORT_SYMBOL(inet_reqsk_alloc); /* * Return true if a syncookie should be sent */ -static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) +static bool tcp_syn_flood_action(struct sock *sk, const char *proto) { struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; const char *msg = "Dropping request"; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a22ee58387518..1e0a9762f92e6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3501,7 +3501,7 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) { struct net *net; - tcp_twsk_purge(net_exit_list, AF_INET); + tcp_twsk_purge(net_exit_list); list_for_each_entry(net, net_exit_list, exit_list) { inet_pernet_hashinfo_free(net->ipv4.tcp_death_row.hashinfo); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f0761f060a837..5b21a07ddf9aa 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -388,7 +388,7 @@ void tcp_twsk_destructor(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_twsk_destructor); -void tcp_twsk_purge(struct list_head *net_exit_list, int family) +void tcp_twsk_purge(struct list_head *net_exit_list) { bool purged_once = false; struct net *net; @@ -396,9 +396,9 @@ void tcp_twsk_purge(struct list_head *net_exit_list, int family) list_for_each_entry(net, net_exit_list, exit_list) { if (net->ipv4.tcp_death_row.hashinfo->pernet) { /* Even if tw_refcount == 1, we must clean up kernel reqsk */ - inet_twsk_purge(net->ipv4.tcp_death_row.hashinfo, family); + inet_twsk_purge(net->ipv4.tcp_death_row.hashinfo); } else if (!purged_once) { - inet_twsk_purge(&tcp_hashinfo, family); + inet_twsk_purge(&tcp_hashinfo); purged_once = true; } } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index d1ad20ce1c8c7..976db57b95d40 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -25,7 +25,7 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) { - struct inet_connection_sock *icsk = inet_csk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); u32 elapsed, user_timeout; s32 remaining; @@ -47,7 +47,7 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when) { - struct inet_connection_sock *icsk = inet_csk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); u32 remaining, user_timeout; s32 elapsed; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 661d0e0d273f6..7613daa339b0b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -582,6 +582,13 @@ static inline bool __udp_is_mcast_sock(struct net *net, const struct sock *sk, } DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); +EXPORT_SYMBOL(udp_encap_needed_key); + +#if IS_ENABLED(CONFIG_IPV6) +DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +EXPORT_SYMBOL(udpv6_encap_needed_key); +#endif + void udp_encap_enable(void) { static_branch_inc(&udp_encap_needed_key); @@ -1492,13 +1499,15 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) struct sk_buff_head *list = &sk->sk_receive_queue; int rmem, err = -ENOMEM; spinlock_t *busy = NULL; - int size; + bool becomes_readable; + int size, rcvbuf; - /* try to avoid the costly atomic add/sub pair when the receive - * queue is full; always allow at least a packet + /* Immediately drop when the receive queue is full. + * Always allow at least one packet. */ rmem = atomic_read(&sk->sk_rmem_alloc); - if (rmem > sk->sk_rcvbuf) + rcvbuf = READ_ONCE(sk->sk_rcvbuf); + if (rmem > rcvbuf) goto drop; /* Under mem pressure, it might be helpful to help udp_recvmsg() @@ -1507,7 +1516,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) * - Less cache line misses at copyout() time * - Less work at consume_skb() (less alien page frag freeing) */ - if (rmem > (sk->sk_rcvbuf >> 1)) { + if (rmem > (rcvbuf >> 1)) { skb_condense(skb); busy = busylock_acquire(sk); @@ -1515,12 +1524,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) size = skb->truesize; udp_set_dev_scratch(skb); - /* we drop only if the receive buf is full and the receive - * queue contains some other skb - */ - rmem = atomic_add_return(size, &sk->sk_rmem_alloc); - if (rmem > (size + (unsigned int)sk->sk_rcvbuf)) - goto uncharge_drop; + atomic_add(size, &sk->sk_rmem_alloc); spin_lock(&list->lock); err = udp_rmem_schedule(sk, size); @@ -1536,12 +1540,19 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) */ sock_skb_set_dropcount(sk, skb); + becomes_readable = skb_queue_empty(list); __skb_queue_tail(list, skb); spin_unlock(&list->lock); - if (!sock_flag(sk, SOCK_DEAD)) - INDIRECT_CALL_1(sk->sk_data_ready, sock_def_readable, sk); - + if (!sock_flag(sk, SOCK_DEAD)) { + if (becomes_readable || + sk->sk_data_ready != sock_def_readable || + READ_ONCE(sk->sk_peek_off) >= 0) + INDIRECT_CALL_1(sk->sk_data_ready, + sock_def_readable, sk); + else + sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN); + } busylock_release(busy); return 0; @@ -2049,8 +2060,8 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) drop_reason = SKB_DROP_REASON_PROTO_MEM; } UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + trace_udp_fail_queue_rcv_skb(rc, sk, skb); kfree_skb_reason(skb, drop_reason); - trace_udp_fail_queue_rcv_skb(rc, sk); return -1; } diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index b9880743765c6..3498dd1d0694d 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -449,8 +449,9 @@ static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) NAPI_GRO_CB(p)->count++; p->data_len += skb->len; - /* sk owenrship - if any - completely transferred to the aggregated packet */ + /* sk ownership - if any - completely transferred to the aggregated packet */ skb->destructor = NULL; + skb->sk = NULL; p->truesize += skb->truesize; p->len += skb->len; @@ -551,11 +552,19 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, unsigned int off = skb_gro_offset(skb); int flush = 1; - /* we can do L4 aggregation only if the packet can't land in a tunnel - * otherwise we could corrupt the inner stream + /* We can do L4 aggregation only if the packet can't land in a tunnel + * otherwise we could corrupt the inner stream. Detecting such packets + * cannot be foolproof and the aggregation might still happen in some + * cases. Such packets should be caught in udp_unexpected_gso later. */ NAPI_GRO_CB(skb)->is_flist = 0; if (!sk || !udp_sk(sk)->gro_receive) { + /* If the packet was locally encapsulated in a UDP tunnel that + * wasn't detected above, do not GRO. + */ + if (skb->encapsulation) + goto out; + if (skb->dev->features & NETIF_F_GRO_FRAGLIST) NAPI_GRO_CB(skb)->is_flist = sk ? !udp_test_bit(GRO_ENABLED, sk) : 1; @@ -719,13 +728,7 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff) skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4); skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; - if (skb->ip_summed == CHECKSUM_UNNECESSARY) { - if (skb->csum_level < SKB_MAX_CSUM_LEVEL) - skb->csum_level++; - } else { - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->csum_level = 0; - } + __skb_incr_checksum_unnecessary(skb); return 0; } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index c33bca2c38415..4c74fec034c5d 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -152,7 +152,6 @@ static struct ctl_table xfrm4_policy_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { } }; static __net_init int xfrm4_net_sysctl_init(struct net *net) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 92db9b474f2bd..841315de5cf25 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -7182,9 +7182,6 @@ static const struct ctl_table addrconf_sysctl[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, - { - /* sentinel */ - } }; static int __addrconf_sysctl_register(struct net *net, char *dev_name, @@ -7198,7 +7195,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, if (!table) goto out; - for (i = 0; table[i].data; i++) { + for (i = 0; i < ARRAY_SIZE(addrconf_sysctl); i++) { table[i].data += (char *)p - (char *)&ipv6_devconf; /* If one of these is already set, then it is not safe to * overwrite either of them: this makes proc_dointvec_minmax diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 7371886d4f9f4..fe8d53f5a5eec 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -131,7 +131,7 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb) */ if (req->src != req->dst) for (sg = sg_next(req->src); sg; sg = sg_next(sg)) - skb_page_unref(skb, sg_page(sg), false); + skb_page_unref(skb, sg_page(sg)); } #ifdef CONFIG_INET6_ESPINTCP diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 1635da07285f2..91cbf8e8009fe 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -1206,7 +1206,6 @@ static struct ctl_table ipv6_icmp_table_template[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, - { }, }; struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5c558dc1c6838..284002a2a8905 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -623,23 +623,22 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) struct rt6_rtnl_dump_arg arg = { .filter.dump_exceptions = true, .filter.dump_routes = true, - .filter.rtnl_held = true, + .filter.rtnl_held = false, }; const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); - unsigned int h, s_h; unsigned int e = 0, s_e; + struct hlist_head *head; struct fib6_walker *w; struct fib6_table *tb; - struct hlist_head *head; - int res = 0; + unsigned int h, s_h; + int err = 0; + rcu_read_lock(); if (cb->strict_check) { - int err; - err = ip_valid_fib_dump_req(net, nlh, &arg.filter, cb); if (err < 0) - return err; + goto unlock; } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) { struct rtmsg *rtm = nlmsg_data(nlh); @@ -660,8 +659,10 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) * 2. allocate and initialize walker. */ w = kzalloc(sizeof(*w), GFP_ATOMIC); - if (!w) - return -ENOMEM; + if (!w) { + err = -ENOMEM; + goto unlock; + } w->func = fib6_dump_node; cb->args[2] = (long)w; } @@ -675,46 +676,46 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) tb = fib6_get_table(net, arg.filter.table_id); if (!tb) { if (rtnl_msg_family(cb->nlh) != PF_INET6) - goto out; + goto unlock; NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist"); - return -ENOENT; + err = -ENOENT; + goto unlock; } if (!cb->args[0]) { - res = fib6_dump_table(tb, skb, cb); - if (!res) + err = fib6_dump_table(tb, skb, cb); + if (!err) cb->args[0] = 1; } - goto out; + goto unlock; } s_h = cb->args[0]; s_e = cb->args[1]; - rcu_read_lock(); for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { e = 0; head = &net->ipv6.fib_table_hash[h]; hlist_for_each_entry_rcu(tb, head, tb6_hlist) { if (e < s_e) goto next; - res = fib6_dump_table(tb, skb, cb); - if (res != 0) - goto out_unlock; + err = fib6_dump_table(tb, skb, cb); + if (err != 0) + goto out; next: e++; } } -out_unlock: - rcu_read_unlock(); +out: cb->args[1] = e; cb->args[0] = h; -out: - res = res < 0 ? res : skb->len; - if (res <= 0) + +unlock: + rcu_read_unlock(); + if (err <= 0) fib6_dump_end(cb); - return res; + return err; } void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val) @@ -2506,7 +2507,7 @@ int __init fib6_init(void) goto out_kmem_cache_create; ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL, - inet6_dump_fib, 0); + inet6_dump_fib, RTNL_FLAG_DUMP_UNLOCKED); if (ret) goto out_unregister_subsys; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index ca7e77e842835..c89aef524df9a 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -528,6 +528,9 @@ static int ip6erspan_rcv(struct sk_buff *skb, struct ip6_tnl *tunnel; u8 ver; + if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr)))) + return PACKET_REJECT; + ipv6h = ipv6_hdr(skb); ershdr = (struct erspan_base_hdr *)skb->data; ver = ershdr->ver; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index d0dcbaca19943..abf6e24ae4ecf 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -62,7 +62,6 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, - { } }; static int nf_ct_frag6_sysctl_register(struct net *net) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index acb4f119e11f0..afb343cb77ac6 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -436,7 +436,6 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { } }; /* secret interval has been deprecated */ @@ -449,7 +448,6 @@ static struct ctl_table ip6_frags_ctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { } }; static int __net_init ip6_frags_ns_sysctl_register(struct net *net) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1f4b935a0e57a..b49137c3031b2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -6428,7 +6428,6 @@ static struct ctl_table ipv6_route_table_template[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, - { } }; struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) @@ -6452,10 +6451,6 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down; - - /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) - table[1].procname = NULL; } return table; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 888676163e900..9f40bb12fbdd5 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -213,7 +213,6 @@ static struct ctl_table ipv6_table_template[] = { .proc_handler = proc_doulongvec_minmax, .extra2 = &ioam6_id_wide_max, }, - { } }; static struct ctl_table ipv6_rotable[] = { @@ -248,7 +247,6 @@ static struct ctl_table ipv6_rotable[] = { .proc_handler = proc_dointvec, }, #endif /* CONFIG_NETLABEL */ - { } }; static int __net_init ipv6_sysctl_net_init(struct net *net) @@ -264,7 +262,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) if (!ipv6_table) goto out; /* Update the variables to point into the current struct net */ - for (i = 0; i < ARRAY_SIZE(ipv6_table_template) - 1; i++) + for (i = 0; i < ARRAY_SIZE(ipv6_table_template); i++) ipv6_table[i].data += (void *)net - (void *)&init_net; ipv6_route_table = ipv6_route_sysctl_init(net); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3f4cba49e9ee6..5ae74f661d25f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2389,15 +2389,9 @@ static void __net_exit tcpv6_net_exit(struct net *net) inet_ctl_sock_destroy(net->ipv6.tcp_sk); } -static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) -{ - tcp_twsk_purge(net_exit_list, AF_INET6); -} - static struct pernet_operations tcpv6_net_ops = { .init = tcpv6_net_init, .exit = tcpv6_net_exit, - .exit_batch = tcpv6_net_exit_batch, }; int __init tcpv6_init(void) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7c1e6469d091d..a9466f881f1b5 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -447,7 +448,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, goto try_again; } -DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); void udpv6_encap_enable(void) { static_branch_inc(&udpv6_encap_needed_key); @@ -658,8 +659,8 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) drop_reason = SKB_DROP_REASON_PROTO_MEM; } UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + trace_udp_fail_queue_rcv_skb(rc, sk, skb); kfree_skb_reason(skb, drop_reason); - trace_udp_fail_queue_rcv_skb(rc, sk); return -1; } diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 312bcaeea96fb..bbd347de00b45 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -174,13 +174,7 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff) skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4); skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; - if (skb->ip_summed == CHECKSUM_UNNECESSARY) { - if (skb->csum_level < SKB_MAX_CSUM_LEVEL) - skb->csum_level++; - } else { - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->csum_level = 0; - } + __skb_incr_checksum_unnecessary(skb); return 0; } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 42fb6996b0777..499b5f5c19fc3 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -184,7 +184,6 @@ static struct ctl_table xfrm6_policy_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { } }; static int __net_init xfrm6_net_sysctl_init(struct net *net) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 7c8c3adcac6e9..c951bb9cc2e04 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -184,7 +184,7 @@ static void iucv_sock_wake_msglim(struct sock *sk) wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_all(&wq->wait); - sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); + sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT); rcu_read_unlock(); } diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index a4ab615ca3e3e..b51f46ec32f95 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -520,14 +520,19 @@ static void iucv_setmask_mp(void) */ static void iucv_setmask_up(void) { - cpumask_t cpumask; + cpumask_var_t cpumask; int cpu; + if (!alloc_cpumask_var(&cpumask, GFP_KERNEL)) + return; + /* Disable all cpu but the first in cpu_irq_cpumask. */ - cpumask_copy(&cpumask, &iucv_irq_cpumask); - cpumask_clear_cpu(cpumask_first(&iucv_irq_cpumask), &cpumask); - for_each_cpu(cpu, &cpumask) + cpumask_copy(cpumask, &iucv_irq_cpumask); + cpumask_clear_cpu(cpumask_first(&iucv_irq_cpumask), cpumask); + for_each_cpu(cpu, cpumask) smp_call_function_single(cpu, iucv_block_cpu, NULL, 1); + + free_cpumask_var(cpumask); } /* @@ -628,23 +633,33 @@ static int iucv_cpu_online(unsigned int cpu) static int iucv_cpu_down_prep(unsigned int cpu) { - cpumask_t cpumask; + cpumask_var_t cpumask; + int ret = 0; if (!iucv_path_table) return 0; - cpumask_copy(&cpumask, &iucv_buffer_cpumask); - cpumask_clear_cpu(cpu, &cpumask); - if (cpumask_empty(&cpumask)) + if (!alloc_cpumask_var(&cpumask, GFP_KERNEL)) + return -ENOMEM; + + cpumask_copy(cpumask, &iucv_buffer_cpumask); + cpumask_clear_cpu(cpu, cpumask); + if (cpumask_empty(cpumask)) { /* Can't offline last IUCV enabled cpu. */ - return -EINVAL; + ret = -EINVAL; + goto __free_cpumask; + } iucv_retrieve_cpu(NULL); if (!cpumask_empty(&iucv_irq_cpumask)) - return 0; + goto __free_cpumask; + smp_call_function_single(cpumask_first(&iucv_buffer_cpumask), iucv_allow_cpu, NULL, 1); - return 0; + +__free_cpumask: + free_cpumask_var(cpumask); + return ret; } /** diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c index 8443a6d841b06..72e101135f8cb 100644 --- a/net/llc/sysctl_net_llc.c +++ b/net/llc/sysctl_net_llc.c @@ -44,11 +44,6 @@ static struct ctl_table llc2_timeout_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { }, -}; - -static struct ctl_table llc_station_table[] = { - { }, }; static struct ctl_table_header *llc2_timeout_header; @@ -56,8 +51,9 @@ static struct ctl_table_header *llc_station_header; int __init llc_sysctl_init(void) { + struct ctl_table empty[1] = {}; llc2_timeout_header = register_net_sysctl(&init_net, "net/llc/llc2/timeout", llc2_timeout_table); - llc_station_header = register_net_sysctl(&init_net, "net/llc/station", llc_station_table); + llc_station_header = register_net_sysctl_sz(&init_net, "net/llc/station", empty, 0); if (!llc2_timeout_header || !llc_station_header) { llc_sysctl_exit(); diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 6dab883a08dda..e163fac55ffad 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1393,7 +1393,6 @@ static const struct ctl_table mpls_dev_table[] = { .proc_handler = mpls_conf_proc, .data = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled), }, - { } }; static int mpls_dev_sysctl_register(struct net_device *dev, @@ -2689,7 +2688,7 @@ static int mpls_net_init(struct net *net) /* Table data contains only offsets relative to the base of * the mdev at this point, so make them absolute. */ - for (i = 0; i < ARRAY_SIZE(mpls_table) - 1; i++) + for (i = 0; i < ARRAY_SIZE(mpls_table); i++) table[i].data = (char *)net + (uintptr_t)table[i].data; net->mpls.ctl = register_net_sysctl_sz(net, "net/mpls", table, diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 13fe0748dde83..8bf7c26a08780 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -156,7 +156,6 @@ static struct ctl_table mptcp_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - {} }; static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3a1967bc7bad6..7e74b812e366a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3937,8 +3937,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, mptcp_set_state(newsk, TCP_CLOSE); } } else { - MPTCP_INC_STATS(sock_net(ssk), - MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); tcpfallback: newsk->sk_kern_sock = kern; lock_sock(newsk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index a10ebf3ee10a1..46f4655b71236 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -558,7 +558,7 @@ struct mptcp_subflow_context { static inline struct mptcp_subflow_context * mptcp_subflow_ctx(const struct sock *sk) { - struct inet_connection_sock *icsk = inet_csk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); /* Use RCU on icsk_ulp_data only for sock diag code */ return (__force struct mptcp_subflow_context *)icsk->icsk_ulp_data; diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index dcd1c76d2a3ba..73fdf423de44e 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -1493,6 +1493,10 @@ int mptcp_set_rcvlowat(struct sock *sk, int val) struct mptcp_subflow_context *subflow; int space, cap; + /* bpf can land here with a wrong sk type */ + if (sk->sk_protocol == IPPROTO_TCP) + return -EINVAL; + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) cap = sk->sk_rcvbuf >> 1; else diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 1626dd20c68f1..6042a47da61be 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -905,6 +905,8 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, return child; fallback: + if (fallback) + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); mptcp_subflow_drop_ctx(child); return child; } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 143a341bbc0a4..50b5dbe40eb85 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2263,7 +2263,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = proc_dointvec, }, #endif - { } }; #endif @@ -4286,10 +4285,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) return -ENOMEM; /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) { - tbl[0].procname = NULL; + if (net->user_ns != &init_user_ns) ctl_table_size = 0; - } } else tbl = vs_vars; /* Initialize sysctl defaults */ diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 8ceec7a2fa8f3..2423513d701d4 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -123,7 +123,6 @@ static struct ctl_table vs_vars_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { } }; #endif @@ -563,10 +562,8 @@ static int __net_init __ip_vs_lblc_init(struct net *net) return -ENOMEM; /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) { - ipvs->lblc_ctl_table[0].procname = NULL; + if (net->user_ns != &init_user_ns) vars_table_size = 0; - } } else ipvs->lblc_ctl_table = vs_vars_table; diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 0fb64707213f8..cdb1d4bf6761c 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -294,7 +294,6 @@ static struct ctl_table vs_vars_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { } }; #endif @@ -749,10 +748,8 @@ static int __net_init __ip_vs_lblcr_init(struct net *net) return -ENOMEM; /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) { - ipvs->lblcr_ctl_table[0].procname = NULL; + if (net->user_ns != &init_user_ns) vars_table_size = 0; - } } else ipvs->lblcr_ctl_table = vs_vars_table; ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 0ee98ce5b8165..2f226cfb32d04 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -616,11 +616,9 @@ enum nf_ct_sysctl_index { NF_SYSCTL_CT_LWTUNNEL, #endif - __NF_SYSCTL_CT_LAST_SYSCTL, + NF_SYSCTL_CT_LAST_SYSCTL, }; -#define NF_SYSCTL_CT_LAST_SYSCTL (__NF_SYSCTL_CT_LAST_SYSCTL + 1) - static struct ctl_table nf_ct_sysctl_table[] = { [NF_SYSCTL_CT_MAX] = { .procname = "nf_conntrack_max", @@ -957,7 +955,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { .proc_handler = nf_hooks_lwtunnel_sysctl_handler, }, #endif - {} }; static struct ctl_table nf_ct_netfilter_table[] = { @@ -968,7 +965,6 @@ static struct ctl_table nf_ct_netfilter_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { } }; static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net, diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 370f8231385ca..d42ba733496b2 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -395,7 +395,7 @@ static const struct seq_operations nflog_seq_ops = { #ifdef CONFIG_SYSCTL static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3]; -static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; +static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO]; static struct ctl_table_header *nf_log_sysctl_fhdr; static struct ctl_table nf_log_sysctl_ftable[] = { @@ -406,7 +406,6 @@ static struct ctl_table nf_log_sysctl_ftable[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { } }; static int nf_log_proc_dostring(struct ctl_table *table, int write, diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 7554803218a25..dc8c3c01d51b7 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -59,7 +59,6 @@ #include #include #include -#include #include #include #include @@ -73,6 +72,7 @@ #include #include "af_netlink.h" +#include "genetlink.h" struct listeners { struct rcu_head rcu; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 3b7666944b11c..feb54c63a1165 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -22,6 +22,8 @@ #include #include +#include "genetlink.h" + static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */ static DECLARE_RWSEM(cb_lock); diff --git a/net/netlink/genetlink.h b/net/netlink/genetlink.h new file mode 100644 index 0000000000000..89bd9d2631c38 --- /dev/null +++ b/net/netlink/genetlink.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NET_GENETLINK_H +#define __NET_GENETLINK_H + +#include + +/* for synchronisation between af_netlink and genetlink */ +extern atomic_t genl_sk_destructing_cnt; +extern wait_queue_head_t genl_sk_destructing_waitq; + +#endif /* __LINUX_GENERIC_NETLINK_H */ diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c index 79fb2d3f477b5..7dc0fa628f2e7 100644 --- a/net/netrom/sysctl_net_netrom.c +++ b/net/netrom/sysctl_net_netrom.c @@ -140,7 +140,6 @@ static struct ctl_table nr_table[] = { .extra1 = &min_reset, .extra2 = &max_reset }, - { } }; int __init nr_register_sysctl(void) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 11c69415c6052..99d72543abd3a 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h index ed11cd12b512f..8bbf983cd2441 100644 --- a/net/openvswitch/meter.h +++ b/net/openvswitch/meter.h @@ -11,7 +11,6 @@ #include #include #include -#include #include #include diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c index 0d0bf41381c22..82fc22467a095 100644 --- a/net/phonet/sysctl.c +++ b/net/phonet/sysctl.c @@ -81,7 +81,6 @@ static struct ctl_table phonet_table[] = { .mode = 0644, .proc_handler = proc_local_port_range, }, - { } }; int __init phonet_sysctl_init(void) diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c index e4e41b3afce71..2af678e71e3c6 100644 --- a/net/rds/ib_sysctl.c +++ b/net/rds/ib_sysctl.c @@ -103,7 +103,6 @@ static struct ctl_table rds_ib_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { } }; void rds_ib_sysctl_exit(void) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index a4e3c5de998be..00dbcd4d28e68 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -302,7 +302,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, } ret = PTR_ERR(trans_private); /* Trigger connection so that its ready for the next retry */ - if (ret == -ENODEV) + if (ret == -ENODEV && cp) rds_conn_connect_if_down(cp->cp_conn); goto out; } diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c index e381bbcd9cc1c..025f518a43495 100644 --- a/net/rds/sysctl.c +++ b/net/rds/sysctl.c @@ -89,7 +89,6 @@ static struct ctl_table rds_sysctl_rds_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { } }; void rds_sysctl_exit(void) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 2dba7505b4148..d8111ac83bb6a 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -86,7 +86,6 @@ static struct ctl_table rds_tcp_sysctl_table[] = { .proc_handler = rds_tcp_skbuf_handler, .extra1 = &rds_tcp_min_rcvbuf, }, - { } }; u32 rds_tcp_write_seq(struct rds_tcp_connection *tc) diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c index d391d7758f52a..d801315b70839 100644 --- a/net/rose/sysctl_net_rose.c +++ b/net/rose/sysctl_net_rose.c @@ -112,7 +112,6 @@ static struct ctl_table rose_table[] = { .extra1 = &min_window, .extra2 = &max_window }, - { } }; void __init rose_register_sysctl(void) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 5222bc97d192e..f4844683e1203 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -65,7 +65,7 @@ static void rxrpc_write_space(struct sock *sk) if (skwq_has_sleeper(wq)) wake_up_interruptible(&wq->wait); - sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); + sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT); } rcu_read_unlock(); } diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index c9bedd0e2d867..9bf9a1f6e4cb2 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -127,7 +127,6 @@ static struct ctl_table rxrpc_sysctl_table[] = { .extra1 = (void *)SYSCTL_ONE, .extra2 = (void *)&four, }, - { } }; int __init rxrpc_sysctl_init(void) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index ca5676b2668e1..db06539936323 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -410,12 +410,48 @@ static void tcf_proto_get(struct tcf_proto *tp) refcount_inc(&tp->refcnt); } +static void tcf_maintain_bypass(struct tcf_block *block) +{ + int filtercnt = atomic_read(&block->filtercnt); + int skipswcnt = atomic_read(&block->skipswcnt); + bool bypass_wanted = filtercnt > 0 && filtercnt == skipswcnt; + + if (bypass_wanted != block->bypass_wanted) { +#ifdef CONFIG_NET_CLS_ACT + if (bypass_wanted) + static_branch_inc(&tcf_bypass_check_needed_key); + else + static_branch_dec(&tcf_bypass_check_needed_key); +#endif + block->bypass_wanted = bypass_wanted; + } +} + +static void tcf_block_filter_cnt_update(struct tcf_block *block, bool *counted, bool add) +{ + lockdep_assert_not_held(&block->cb_lock); + + down_write(&block->cb_lock); + if (*counted != add) { + if (add) { + atomic_inc(&block->filtercnt); + *counted = true; + } else { + atomic_dec(&block->filtercnt); + *counted = false; + } + } + tcf_maintain_bypass(block); + up_write(&block->cb_lock); +} + static void tcf_chain_put(struct tcf_chain *chain); static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held, bool sig_destroy, struct netlink_ext_ack *extack) { tp->ops->destroy(tp, rtnl_held, extack); + tcf_block_filter_cnt_update(tp->chain->block, &tp->counted, false); if (sig_destroy) tcf_proto_signal_destroyed(tp->chain, tp); tcf_chain_put(tp->chain); @@ -2364,6 +2400,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, flags, extack); if (err == 0) { + tcf_block_filter_cnt_update(block, &tp->counted, true); tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_NEWTFILTER, false, rtnl_held, extack); tfilter_put(tp, fh); @@ -3483,6 +3520,8 @@ static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) if (*flags & TCA_CLS_FLAGS_IN_HW) return; *flags |= TCA_CLS_FLAGS_IN_HW; + if (tc_skip_sw(*flags)) + atomic_inc(&block->skipswcnt); atomic_inc(&block->offloadcnt); } @@ -3491,6 +3530,8 @@ static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) if (!(*flags & TCA_CLS_FLAGS_IN_HW)) return; *flags &= ~TCA_CLS_FLAGS_IN_HW; + if (tc_skip_sw(*flags)) + atomic_dec(&block->skipswcnt); atomic_dec(&block->offloadcnt); } diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 225353fbb3f15..51d4013b61219 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -215,10 +215,8 @@ static int mqprio_parse_tc_entries(struct Qdisc *sch, struct nlattr *nlattr_opt, for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) fp[tc] = priv->fp[tc]; - nla_for_each_attr(n, nlattr_opt, nlattr_opt_len, rem) { - if (nla_type(n) != TCA_MQPRIO_TC_ENTRY) - continue; - + nla_for_each_attr_type(n, TCA_MQPRIO_TC_ENTRY, nlattr_opt, + nlattr_opt_len, rem) { err = mqprio_parse_tc_entry(fp, n, &seen_tcs, extack); if (err) goto out; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index a0d54b422186f..1ab17e8a72605 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1752,10 +1752,7 @@ static int taprio_parse_tc_entries(struct Qdisc *sch, fp[tc] = q->fp[tc]; } - nla_for_each_nested(n, opt, rem) { - if (nla_type(n) != TCA_TAPRIO_ATTR_TC_ENTRY) - continue; - + nla_for_each_nested_type(n, TCA_TAPRIO_ATTR_TC_ENTRY, opt, rem) { err = taprio_parse_tc_entry(sch, n, max_sdu, fp, &seen_tcs, extack); if (err) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index c67679a41044f..e416b6d3d2705 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -9276,7 +9276,7 @@ void sctp_data_ready(struct sock *sk) if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLRDNORM | EPOLLRDBAND); - sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); + sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN); rcu_read_unlock(); } diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index f65d6f92afcbc..6894072af210c 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -80,8 +80,6 @@ static struct ctl_table sctp_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - - { /* sentinel */ } }; /* The following index defines are used in sctp_sysctl_net_register(). @@ -384,8 +382,6 @@ static struct ctl_table sctp_net_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = &pf_expose_max, }, - - { /* sentinel */ } }; static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, @@ -604,7 +600,7 @@ int sctp_sysctl_net_register(struct net *net) if (!table) return -ENOMEM; - for (i = 0; table[i].data; i++) + for (i = 0; i < ARRAY_SIZE(sctp_net_table); i++) table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp; table[SCTP_RTO_MIN_IDX].extra2 = &net->sctp.rto_max; diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 4b52b3b159c0e..e8dcd28a554ce 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -177,7 +177,7 @@ static struct smc_hashinfo smc_v6_hashinfo = { .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock), }; -int smc_hash_sk(struct sock *sk) +static int smc_hash_sk(struct sock *sk) { struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; struct hlist_head *head; @@ -191,9 +191,8 @@ int smc_hash_sk(struct sock *sk) return 0; } -EXPORT_SYMBOL_GPL(smc_hash_sk); -void smc_unhash_sk(struct sock *sk) +static void smc_unhash_sk(struct sock *sk) { struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; @@ -202,7 +201,6 @@ void smc_unhash_sk(struct sock *sk) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); write_unlock_bh(&h->lock); } -EXPORT_SYMBOL_GPL(smc_unhash_sk); /* This will be called before user really release sock_lock. So do the * work which we didn't do because of user hold the sock_lock in the diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index e55026c7529cf..33fa787c28ebb 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -853,8 +853,10 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) pclc_smcd = &pclc->pclc_smcd; pclc_prfx = &pclc->pclc_prfx; ipv6_prfx = pclc->pclc_prfx_ipv6; - v2_ext = &pclc->pclc_v2_ext; - smcd_v2_ext = &pclc->pclc_smcd_v2_ext; + v2_ext = container_of(&pclc->pclc_v2_ext, + struct smc_clc_v2_extension, fixed); + smcd_v2_ext = container_of(&pclc->pclc_smcd_v2_ext, + struct smc_clc_smcd_v2_extension, fixed); gidchids = pclc->pclc_gidchids; trl = &pclc->pclc_trl; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 7cc7070b9772d..467effb50cd65 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -134,12 +134,15 @@ struct smc_clc_smcd_gid_chid { */ struct smc_clc_v2_extension { - struct smc_clnt_opts_area_hdr hdr; - u8 roce[16]; /* RoCEv2 GID */ - u8 max_conns; - u8 max_links; - __be16 feature_mask; - u8 reserved[12]; + /* New members must be added within the struct_group() macro below. */ + struct_group_tagged(smc_clc_v2_extension_fixed, fixed, + struct smc_clnt_opts_area_hdr hdr; + u8 roce[16]; /* RoCEv2 GID */ + u8 max_conns; + u8 max_links; + __be16 feature_mask; + u8 reserved[12]; + ); u8 user_eids[][SMC_MAX_EID_LEN]; }; @@ -159,8 +162,11 @@ struct smc_clc_msg_smcd { /* SMC-D GID information */ }; struct smc_clc_smcd_v2_extension { - u8 system_eid[SMC_MAX_EID_LEN]; - u8 reserved[16]; + /* New members must be added within the struct_group() macro below. */ + struct_group_tagged(smc_clc_smcd_v2_extension_fixed, fixed, + u8 system_eid[SMC_MAX_EID_LEN]; + u8 reserved[16]; + ); struct smc_clc_smcd_gid_chid gidchid[]; }; @@ -183,9 +189,9 @@ struct smc_clc_msg_proposal_area { struct smc_clc_msg_smcd pclc_smcd; struct smc_clc_msg_proposal_prefix pclc_prfx; struct smc_clc_ipv6_prefix pclc_prfx_ipv6[SMC_CLC_MAX_V6_PREFIX]; - struct smc_clc_v2_extension pclc_v2_ext; + struct smc_clc_v2_extension_fixed pclc_v2_ext; u8 user_eids[SMC_CLC_MAX_UEID][SMC_MAX_EID_LEN]; - struct smc_clc_smcd_v2_extension pclc_smcd_v2_ext; + struct smc_clc_smcd_v2_extension_fixed pclc_smcd_v2_ext; struct smc_clc_smcd_gid_chid pclc_gidchids[SMCD_CLC_MAX_V2_GID_ENTRIES]; struct smc_clc_msg_trail pclc_trl; diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index 9a2f3638d161d..f0cbe77a80b44 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -42,10 +42,10 @@ static void smc_rx_wake_up(struct sock *sk) if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI | EPOLLRDNORM | EPOLLRDBAND); - sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); + sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN); if ((sk->sk_shutdown == SHUTDOWN_MASK) || (sk->sk_state == SMC_CLOSED)) - sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); + sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_HUP); rcu_read_unlock(); } diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c index a5946d1b9d60c..bd0b7e2f88245 100644 --- a/net/smc/smc_sysctl.c +++ b/net/smc/smc_sysctl.c @@ -90,7 +90,6 @@ static struct ctl_table smc_table[] = { .extra1 = &conns_per_lgr_min, .extra2 = &conns_per_lgr_max, }, - { } }; int __net_init smc_sysctl_net_init(struct net *net) diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 93941ab12549f..5f3170a1c9bbe 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -160,7 +160,6 @@ static struct ctl_table debug_table[] = { .mode = 0444, .proc_handler = proc_do_xprt, }, - { } }; void diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index f86970733eb0a..474f7a98fe9ee 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -209,7 +209,6 @@ static struct ctl_table svcrdma_parm_table[] = { .extra1 = &zero, .extra2 = &zero, }, - { }, }; static void svc_rdma_proc_cleanup(void) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 29b0562d62e75..9a8ce5df83cad 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -137,7 +137,6 @@ static struct ctl_table xr_tunables_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { }, }; #endif diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index bb9b747d58a1a..f62f7b65455b8 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -160,7 +160,6 @@ static struct ctl_table xs_tunables_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { }, }; /* diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c index 9fb65c988f7f3..30d2e06e3d8c7 100644 --- a/net/tipc/sysctl.c +++ b/net/tipc/sysctl.c @@ -91,7 +91,6 @@ static struct ctl_table tipc_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, - {} }; int tipc_register_sysctl(void) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 5b41e2321209a..533fb682c9547 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -546,7 +546,7 @@ static void unix_write_space(struct sock *sk) if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); - sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); + sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT); } rcu_read_unlock(); } @@ -979,11 +979,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; sk->sk_destruct = unix_sock_destructor; u = unix_sk(sk); - u->inflight = 0; + u->listener = NULL; + u->vertex = NULL; u->path.dentry = NULL; u->path.mnt = NULL; spin_lock_init(&u->lock); - INIT_LIST_HEAD(&u->link); mutex_init(&u->iolock); /* single task reading lock */ mutex_init(&u->bindlock); /* single task binding lock */ init_waitqueue_head(&u->peer_wait); @@ -1597,6 +1597,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, newsk->sk_type = sk->sk_type; init_peercred(newsk); newu = unix_sk(newsk); + newu->listener = other; RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); otheru = unix_sk(other); @@ -1692,8 +1693,8 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, bool kern) { struct sock *sk = sock->sk; - struct sock *tsk; struct sk_buff *skb; + struct sock *tsk; int err; err = -EOPNOTSUPP; @@ -1718,6 +1719,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, } tsk = skb->sk; + unix_update_edges(unix_sk(tsk)); skb_free_datagram(sk, skb); wake_up_interruptible(&unix_sk(sk)->peer_wait); @@ -1789,8 +1791,6 @@ static inline bool too_many_unix_fds(struct task_struct *p) static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) { - int i; - if (too_many_unix_fds(current)) return -ETOOMANYREFS; @@ -1802,21 +1802,18 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) if (!UNIXCB(skb).fp) return -ENOMEM; - for (i = scm->fp->count - 1; i >= 0; i--) - unix_inflight(scm->fp->user, scm->fp->fp[i]); + if (unix_prepare_fpl(UNIXCB(skb).fp)) + return -ENOMEM; return 0; } static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) { - int i; - scm->fp = UNIXCB(skb).fp; UNIXCB(skb).fp = NULL; - for (i = scm->fp->count - 1; i >= 0; i--) - unix_notinflight(scm->fp->user, scm->fp->fp[i]); + unix_destroy_fpl(scm->fp); } static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) @@ -1937,8 +1934,10 @@ static void scm_stat_add(struct sock *sk, struct sk_buff *skb) struct scm_fp_list *fp = UNIXCB(skb).fp; struct unix_sock *u = unix_sk(sk); - if (unlikely(fp && fp->count)) + if (unlikely(fp && fp->count)) { atomic_add(fp->count, &u->scm_stat.nr_fds); + unix_add_edges(fp, u); + } } static void scm_stat_del(struct sock *sk, struct sk_buff *skb) @@ -1946,8 +1945,10 @@ static void scm_stat_del(struct sock *sk, struct sk_buff *skb) struct scm_fp_list *fp = UNIXCB(skb).fp; struct unix_sock *u = unix_sk(sk); - if (unlikely(fp && fp->count)) + if (unlikely(fp && fp->count)) { atomic_sub(fp->count, &u->scm_stat.nr_fds); + unix_del_edges(fp); + } } /* diff --git a/net/unix/garbage.c b/net/unix/garbage.c index fa39b62652385..89ea71d9297ba 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -101,261 +101,468 @@ struct unix_sock *unix_get_socket(struct file *filp) return NULL; } +static struct unix_vertex *unix_edge_successor(struct unix_edge *edge) +{ + /* If an embryo socket has a fd, + * the listener indirectly holds the fd's refcnt. + */ + if (edge->successor->listener) + return unix_sk(edge->successor->listener)->vertex; + + return edge->successor->vertex; +} + +static bool unix_graph_maybe_cyclic; +static bool unix_graph_grouped; + +static void unix_update_graph(struct unix_vertex *vertex) +{ + /* If the receiver socket is not inflight, no cyclic + * reference could be formed. + */ + if (!vertex) + return; + + unix_graph_maybe_cyclic = true; + unix_graph_grouped = false; +} + +static LIST_HEAD(unix_unvisited_vertices); + +enum unix_vertex_index { + UNIX_VERTEX_INDEX_MARK1, + UNIX_VERTEX_INDEX_MARK2, + UNIX_VERTEX_INDEX_START, +}; + +static unsigned long unix_vertex_unvisited_index = UNIX_VERTEX_INDEX_MARK1; + +static void unix_add_edge(struct scm_fp_list *fpl, struct unix_edge *edge) +{ + struct unix_vertex *vertex = edge->predecessor->vertex; + + if (!vertex) { + vertex = list_first_entry(&fpl->vertices, typeof(*vertex), entry); + vertex->index = unix_vertex_unvisited_index; + vertex->out_degree = 0; + INIT_LIST_HEAD(&vertex->edges); + INIT_LIST_HEAD(&vertex->scc_entry); + + list_move_tail(&vertex->entry, &unix_unvisited_vertices); + edge->predecessor->vertex = vertex; + } + + vertex->out_degree++; + list_add_tail(&edge->vertex_entry, &vertex->edges); + + unix_update_graph(unix_edge_successor(edge)); +} + +static void unix_del_edge(struct scm_fp_list *fpl, struct unix_edge *edge) +{ + struct unix_vertex *vertex = edge->predecessor->vertex; + + unix_update_graph(unix_edge_successor(edge)); + + list_del(&edge->vertex_entry); + vertex->out_degree--; + + if (!vertex->out_degree) { + edge->predecessor->vertex = NULL; + list_move_tail(&vertex->entry, &fpl->vertices); + } +} + +static void unix_free_vertices(struct scm_fp_list *fpl) +{ + struct unix_vertex *vertex, *next_vertex; + + list_for_each_entry_safe(vertex, next_vertex, &fpl->vertices, entry) { + list_del(&vertex->entry); + kfree(vertex); + } +} + DEFINE_SPINLOCK(unix_gc_lock); unsigned int unix_tot_inflight; -static LIST_HEAD(gc_candidates); -static LIST_HEAD(gc_inflight_list); -/* Keep the number of times in flight count for the file - * descriptor if it is for an AF_UNIX socket. - */ -void unix_inflight(struct user_struct *user, struct file *filp) +void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver) { - struct unix_sock *u = unix_get_socket(filp); + int i = 0, j = 0; spin_lock(&unix_gc_lock); - if (u) { - if (!u->inflight) { - WARN_ON_ONCE(!list_empty(&u->link)); - list_add_tail(&u->link, &gc_inflight_list); - } else { - WARN_ON_ONCE(list_empty(&u->link)); - } - u->inflight++; + if (!fpl->count_unix) + goto out; - /* Paired with READ_ONCE() in wait_for_unix_gc() */ - WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1); - } + do { + struct unix_sock *inflight = unix_get_socket(fpl->fp[j++]); + struct unix_edge *edge; + + if (!inflight) + continue; + + edge = fpl->edges + i++; + edge->predecessor = inflight; + edge->successor = receiver; - WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1); + unix_add_edge(fpl, edge); + } while (i < fpl->count_unix); + + WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + fpl->count_unix); +out: + WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight + fpl->count); spin_unlock(&unix_gc_lock); + + fpl->inflight = true; + + unix_free_vertices(fpl); } -void unix_notinflight(struct user_struct *user, struct file *filp) +void unix_del_edges(struct scm_fp_list *fpl) { - struct unix_sock *u = unix_get_socket(filp); + int i = 0; spin_lock(&unix_gc_lock); - if (u) { - WARN_ON_ONCE(!u->inflight); - WARN_ON_ONCE(list_empty(&u->link)); + if (!fpl->count_unix) + goto out; - u->inflight--; - if (!u->inflight) - list_del_init(&u->link); + do { + struct unix_edge *edge = fpl->edges + i++; - /* Paired with READ_ONCE() in wait_for_unix_gc() */ - WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1); - } + unix_del_edge(fpl, edge); + } while (i < fpl->count_unix); - WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1); + WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - fpl->count_unix); +out: + WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight - fpl->count); spin_unlock(&unix_gc_lock); + + fpl->inflight = false; } -static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), - struct sk_buff_head *hitlist) +void unix_update_edges(struct unix_sock *receiver) { - struct sk_buff *skb; - struct sk_buff *next; - - spin_lock(&x->sk_receive_queue.lock); - skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { - /* Do we have file descriptors ? */ - if (UNIXCB(skb).fp) { - bool hit = false; - /* Process the descriptors of this socket */ - int nfd = UNIXCB(skb).fp->count; - struct file **fp = UNIXCB(skb).fp->fp; - - while (nfd--) { - /* Get the socket the fd matches if it indeed does so */ - struct unix_sock *u = unix_get_socket(*fp++); - - /* Ignore non-candidates, they could have been added - * to the queues after starting the garbage collection - */ - if (u && test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) { - hit = true; - - func(u); - } - } - if (hit && hitlist != NULL) { - __skb_unlink(skb, &x->sk_receive_queue); - __skb_queue_tail(hitlist, skb); - } - } - } - spin_unlock(&x->sk_receive_queue.lock); + spin_lock(&unix_gc_lock); + unix_update_graph(unix_sk(receiver->listener)->vertex); + receiver->listener = NULL; + spin_unlock(&unix_gc_lock); } -static void scan_children(struct sock *x, void (*func)(struct unix_sock *), - struct sk_buff_head *hitlist) +int unix_prepare_fpl(struct scm_fp_list *fpl) { - if (x->sk_state != TCP_LISTEN) { - scan_inflight(x, func, hitlist); - } else { - struct sk_buff *skb; - struct sk_buff *next; - struct unix_sock *u; - LIST_HEAD(embryos); + struct unix_vertex *vertex; + int i; - /* For a listening socket collect the queued embryos - * and perform a scan on them as well. - */ - spin_lock(&x->sk_receive_queue.lock); - skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { - u = unix_sk(skb->sk); + if (!fpl->count_unix) + return 0; - /* An embryo cannot be in-flight, so it's safe - * to use the list link. - */ - WARN_ON_ONCE(!list_empty(&u->link)); - list_add_tail(&u->link, &embryos); - } - spin_unlock(&x->sk_receive_queue.lock); + for (i = 0; i < fpl->count_unix; i++) { + vertex = kmalloc(sizeof(*vertex), GFP_KERNEL); + if (!vertex) + goto err; - while (!list_empty(&embryos)) { - u = list_entry(embryos.next, struct unix_sock, link); - scan_inflight(&u->sk, func, hitlist); - list_del_init(&u->link); - } + list_add(&vertex->entry, &fpl->vertices); } -} -static void dec_inflight(struct unix_sock *usk) -{ - usk->inflight--; + fpl->edges = kvmalloc_array(fpl->count_unix, sizeof(*fpl->edges), + GFP_KERNEL_ACCOUNT); + if (!fpl->edges) + goto err; + + return 0; + +err: + unix_free_vertices(fpl); + return -ENOMEM; } -static void inc_inflight(struct unix_sock *usk) +void unix_destroy_fpl(struct scm_fp_list *fpl) { - usk->inflight++; + if (fpl->inflight) + unix_del_edges(fpl); + + kvfree(fpl->edges); + unix_free_vertices(fpl); } -static void inc_inflight_move_tail(struct unix_sock *u) +static bool unix_vertex_dead(struct unix_vertex *vertex) { - u->inflight++; + struct unix_edge *edge; + struct unix_sock *u; + long total_ref; - /* If this still might be part of a cycle, move it to the end - * of the list, so that it's checked even if it was already - * passed over - */ - if (test_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags)) - list_move_tail(&u->link, &gc_candidates); + list_for_each_entry(edge, &vertex->edges, vertex_entry) { + struct unix_vertex *next_vertex = unix_edge_successor(edge); + + /* The vertex's fd can be received by a non-inflight socket. */ + if (!next_vertex) + return false; + + /* The vertex's fd can be received by an inflight socket in + * another SCC. + */ + if (next_vertex->scc_index != vertex->scc_index) + return false; + } + + /* No receiver exists out of the same SCC. */ + + edge = list_first_entry(&vertex->edges, typeof(*edge), vertex_entry); + u = edge->predecessor; + total_ref = file_count(u->sk.sk_socket->file); + + /* If not close()d, total_ref > out_degree. */ + if (total_ref != vertex->out_degree) + return false; + + return true; } -static bool gc_in_progress; +enum unix_recv_queue_lock_class { + U_RECVQ_LOCK_NORMAL, + U_RECVQ_LOCK_EMBRYO, +}; -static void __unix_gc(struct work_struct *work) +static void unix_collect_skb(struct list_head *scc, struct sk_buff_head *hitlist) { - struct sk_buff_head hitlist; - struct unix_sock *u, *next; - LIST_HEAD(not_cycle_list); - struct list_head cursor; + struct unix_vertex *vertex; - spin_lock(&unix_gc_lock); + list_for_each_entry_reverse(vertex, scc, scc_entry) { + struct sk_buff_head *queue; + struct unix_edge *edge; + struct unix_sock *u; - /* First, select candidates for garbage collection. Only - * in-flight sockets are considered, and from those only ones - * which don't have any external reference. - * - * Holding unix_gc_lock will protect these candidates from - * being detached, and hence from gaining an external - * reference. Since there are no possible receivers, all - * buffers currently on the candidates' queues stay there - * during the garbage collection. - * - * We also know that no new candidate can be added onto the - * receive queues. Other, non candidate sockets _can_ be - * added to queue, so we must make sure only to touch - * candidates. - */ - list_for_each_entry_safe(u, next, &gc_inflight_list, link) { - long total_refs; + edge = list_first_entry(&vertex->edges, typeof(*edge), vertex_entry); + u = edge->predecessor; + queue = &u->sk.sk_receive_queue; + + spin_lock(&queue->lock); + + if (u->sk.sk_state == TCP_LISTEN) { + struct sk_buff *skb; - total_refs = file_count(u->sk.sk_socket->file); + skb_queue_walk(queue, skb) { + struct sk_buff_head *embryo_queue = &skb->sk->sk_receive_queue; - WARN_ON_ONCE(!u->inflight); - WARN_ON_ONCE(total_refs < u->inflight); - if (total_refs == u->inflight) { - list_move_tail(&u->link, &gc_candidates); - __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); - __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); + /* listener -> embryo order, the inversion never happens. */ + spin_lock_nested(&embryo_queue->lock, U_RECVQ_LOCK_EMBRYO); + skb_queue_splice_init(embryo_queue, hitlist); + spin_unlock(&embryo_queue->lock); + } + } else { + skb_queue_splice_init(queue, hitlist); + +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (u->oob_skb) { + kfree_skb(u->oob_skb); + u->oob_skb = NULL; + } +#endif } + + spin_unlock(&queue->lock); } +} - /* Now remove all internal in-flight reference to children of - * the candidates. - */ - list_for_each_entry(u, &gc_candidates, link) - scan_children(&u->sk, dec_inflight, NULL); +static bool unix_scc_cyclic(struct list_head *scc) +{ + struct unix_vertex *vertex; + struct unix_edge *edge; - /* Restore the references for children of all candidates, - * which have remaining references. Do this recursively, so - * only those remain, which form cyclic references. - * - * Use a "cursor" link, to make the list traversal safe, even - * though elements might be moved about. + /* SCC containing multiple vertices ? */ + if (!list_is_singular(scc)) + return true; + + vertex = list_first_entry(scc, typeof(*vertex), scc_entry); + + /* Self-reference or a embryo-listener circle ? */ + list_for_each_entry(edge, &vertex->edges, vertex_entry) { + if (unix_edge_successor(edge) == vertex) + return true; + } + + return false; +} + +static LIST_HEAD(unix_visited_vertices); +static unsigned long unix_vertex_grouped_index = UNIX_VERTEX_INDEX_MARK2; + +static void __unix_walk_scc(struct unix_vertex *vertex, unsigned long *last_index, + struct sk_buff_head *hitlist) +{ + LIST_HEAD(vertex_stack); + struct unix_edge *edge; + LIST_HEAD(edge_stack); + +next_vertex: + /* Push vertex to vertex_stack and mark it as on-stack + * (index >= UNIX_VERTEX_INDEX_START). + * The vertex will be popped when finalising SCC later. */ - list_add(&cursor, &gc_candidates); - while (cursor.next != &gc_candidates) { - u = list_entry(cursor.next, struct unix_sock, link); + list_add(&vertex->scc_entry, &vertex_stack); + + vertex->index = *last_index; + vertex->scc_index = *last_index; + (*last_index)++; + + /* Explore neighbour vertices (receivers of the current vertex's fd). */ + list_for_each_entry(edge, &vertex->edges, vertex_entry) { + struct unix_vertex *next_vertex = unix_edge_successor(edge); + + if (!next_vertex) + continue; + + if (next_vertex->index == unix_vertex_unvisited_index) { + /* Iterative deepening depth first search + * + * 1. Push a forward edge to edge_stack and set + * the successor to vertex for the next iteration. + */ + list_add(&edge->stack_entry, &edge_stack); - /* Move cursor to after the current position. */ - list_move(&cursor, &u->link); + vertex = next_vertex; + goto next_vertex; - if (u->inflight) { - list_move_tail(&u->link, ¬_cycle_list); - __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); - scan_children(&u->sk, inc_inflight_move_tail, NULL); + /* 2. Pop the edge directed to the current vertex + * and restore the ancestor for backtracking. + */ +prev_vertex: + edge = list_first_entry(&edge_stack, typeof(*edge), stack_entry); + list_del_init(&edge->stack_entry); + + next_vertex = vertex; + vertex = edge->predecessor->vertex; + + /* If the successor has a smaller scc_index, two vertices + * are in the same SCC, so propagate the smaller scc_index + * to skip SCC finalisation. + */ + vertex->scc_index = min(vertex->scc_index, next_vertex->scc_index); + } else if (next_vertex->index != unix_vertex_grouped_index) { + /* Loop detected by a back/cross edge. + * + * The successor is on vertex_stack, so two vertices are in + * the same SCC. If the successor has a smaller *scc_index*, + * propagate it to skip SCC finalisation. + */ + vertex->scc_index = min(vertex->scc_index, next_vertex->scc_index); + } else { + /* The successor was already grouped as another SCC */ } } - list_del(&cursor); - /* Now gc_candidates contains only garbage. Restore original - * inflight counters for these as well, and remove the skbuffs - * which are creating the cycle(s). - */ - skb_queue_head_init(&hitlist); - list_for_each_entry(u, &gc_candidates, link) { - scan_children(&u->sk, inc_inflight, &hitlist); + if (vertex->index == vertex->scc_index) { + struct list_head scc; + bool scc_dead = true; -#if IS_ENABLED(CONFIG_AF_UNIX_OOB) - if (u->oob_skb) { - kfree_skb(u->oob_skb); - u->oob_skb = NULL; + /* SCC finalised. + * + * If the scc_index was not updated, all the vertices above on + * vertex_stack are in the same SCC. Group them using scc_entry. + */ + __list_cut_position(&scc, &vertex_stack, &vertex->scc_entry); + + list_for_each_entry_reverse(vertex, &scc, scc_entry) { + /* Don't restart DFS from this vertex in unix_walk_scc(). */ + list_move_tail(&vertex->entry, &unix_visited_vertices); + + /* Mark vertex as off-stack. */ + vertex->index = unix_vertex_grouped_index; + + if (scc_dead) + scc_dead = unix_vertex_dead(vertex); } -#endif + + if (scc_dead) + unix_collect_skb(&scc, hitlist); + else if (!unix_graph_maybe_cyclic) + unix_graph_maybe_cyclic = unix_scc_cyclic(&scc); + + list_del(&scc); } - /* not_cycle_list contains those sockets which do not make up a - * cycle. Restore these to the inflight list. + /* Need backtracking ? */ + if (!list_empty(&edge_stack)) + goto prev_vertex; +} + +static void unix_walk_scc(struct sk_buff_head *hitlist) +{ + unsigned long last_index = UNIX_VERTEX_INDEX_START; + + unix_graph_maybe_cyclic = false; + + /* Visit every vertex exactly once. + * __unix_walk_scc() moves visited vertices to unix_visited_vertices. */ - while (!list_empty(¬_cycle_list)) { - u = list_entry(not_cycle_list.next, struct unix_sock, link); - __clear_bit(UNIX_GC_CANDIDATE, &u->gc_flags); - list_move_tail(&u->link, &gc_inflight_list); + while (!list_empty(&unix_unvisited_vertices)) { + struct unix_vertex *vertex; + + vertex = list_first_entry(&unix_unvisited_vertices, typeof(*vertex), entry); + __unix_walk_scc(vertex, &last_index, hitlist); } - spin_unlock(&unix_gc_lock); + list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices); + swap(unix_vertex_unvisited_index, unix_vertex_grouped_index); - /* Here we are. Hitlist is filled. Die. */ - __skb_queue_purge(&hitlist); + unix_graph_grouped = true; +} + +static void unix_walk_scc_fast(struct sk_buff_head *hitlist) +{ + while (!list_empty(&unix_unvisited_vertices)) { + struct unix_vertex *vertex; + struct list_head scc; + bool scc_dead = true; + + vertex = list_first_entry(&unix_unvisited_vertices, typeof(*vertex), entry); + list_add(&scc, &vertex->scc_entry); + + list_for_each_entry_reverse(vertex, &scc, scc_entry) { + list_move_tail(&vertex->entry, &unix_visited_vertices); + + if (scc_dead) + scc_dead = unix_vertex_dead(vertex); + } + + if (scc_dead) + unix_collect_skb(&scc, hitlist); + + list_del(&scc); + } + + list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices); +} + +static bool gc_in_progress; + +static void __unix_gc(struct work_struct *work) +{ + struct sk_buff_head hitlist; spin_lock(&unix_gc_lock); - /* All candidates should have been detached by now. */ - WARN_ON_ONCE(!list_empty(&gc_candidates)); + if (!unix_graph_maybe_cyclic) { + spin_unlock(&unix_gc_lock); + goto skip_gc; + } - /* Paired with READ_ONCE() in wait_for_unix_gc(). */ - WRITE_ONCE(gc_in_progress, false); + __skb_queue_head_init(&hitlist); + + if (unix_graph_grouped) + unix_walk_scc_fast(&hitlist); + else + unix_walk_scc(&hitlist); spin_unlock(&unix_gc_lock); + + __skb_queue_purge(&hitlist); +skip_gc: + WRITE_ONCE(gc_in_progress, false); } static DECLARE_WORK(unix_gc_work, __unix_gc); diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c index 3e84b31c355a2..ae45d4cfac394 100644 --- a/net/unix/sysctl_net_unix.c +++ b/net/unix/sysctl_net_unix.c @@ -19,7 +19,6 @@ static struct ctl_table unix_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { } }; int __net_init unix_sysctl_register(struct net *net) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 1748268e0694f..ee5d306a96d0f 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -120,7 +120,6 @@ virtio_transport_send_pkt_work(struct work_struct *work) if (!skb) break; - virtio_transport_deliver_tap_pkt(skb); reply = virtio_vsock_skb_reply(skb); sgs = vsock->out_sgs; sg_init_one(sgs[out_sg], virtio_vsock_hdr(skb), @@ -170,6 +169,8 @@ virtio_transport_send_pkt_work(struct work_struct *work) break; } + virtio_transport_deliver_tap_pkt(skb); + if (reply) { struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; int val; diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c index e9802afa43d0d..643f50874dfe6 100644 --- a/net/x25/sysctl_net_x25.c +++ b/net/x25/sysctl_net_x25.c @@ -71,7 +71,6 @@ static struct ctl_table x25_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { }, }; int __init x25_register_sysctl(void) diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c index 7fdeafc838a7c..b0f542805e6eb 100644 --- a/net/xfrm/xfrm_sysctl.c +++ b/net/xfrm/xfrm_sysctl.c @@ -38,7 +38,6 @@ static struct ctl_table xfrm_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - {} }; int __net_init xfrm_sysctl_init(struct net *net) @@ -57,10 +56,8 @@ int __net_init xfrm_sysctl_init(struct net *net) table[3].data = &net->xfrm.sysctl_acq_expires; /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) { - table[0].procname = NULL; + if (net->user_ns != &init_user_ns) table_size = 0; - } net->xfrm.sysctl_hdr = register_net_sysctl_sz(net, "net/core", table, table_size); diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 967f1abb0edbd..1474e95dbe4f0 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1143,6 +1143,7 @@ sub dump_struct($$) { $members =~ s/\s*$attribute/ /gi; $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; $members =~ s/\s*__counted_by\s*\([^;]*\)/ /gos; + $members =~ s/\s*__counted_by_(le|be)\s*\([^;]*\)/ /gos; $members =~ s/\s*__packed\s*/ /gos; $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h index 9842e85a8c57d..eef7c6324ed48 100644 --- a/tools/net/ynl/lib/ynl.h +++ b/tools/net/ynl/lib/ynl.h @@ -91,6 +91,18 @@ void ynl_sock_destroy(struct ynl_sock *ys); !ynl_dump_obj_is_last(iter); \ iter = ynl_dump_obj_next(iter)) +/** + * ynl_dump_empty() - does the dump have no entries + * @dump: pointer to the dump list, as returned by a dump call + * + * Check if the dump is empty, i.e. contains no objects. + * Dump calls return NULL on error, and terminator element if empty. + */ +static inline bool ynl_dump_empty(void *dump) +{ + return dump == (void *)YNL_LIST_END; +} + int ynl_subscribe(struct ynl_sock *ys, const char *grp_name); int ynl_socket_get_fd(struct ynl_sock *ys); int ynl_ntf_check(struct ynl_sock *ys); diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index e73b027c5624f..82d3c98067aa0 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause from collections import namedtuple +from enum import Enum import functools import os import random @@ -76,6 +77,25 @@ class Netlink: NLMSGERR_ATTR_MISS_TYPE = 5 NLMSGERR_ATTR_MISS_NEST = 6 + # Policy types + NL_POLICY_TYPE_ATTR_TYPE = 1 + NL_POLICY_TYPE_ATTR_MIN_VALUE_S = 2 + NL_POLICY_TYPE_ATTR_MAX_VALUE_S = 3 + NL_POLICY_TYPE_ATTR_MIN_VALUE_U = 4 + NL_POLICY_TYPE_ATTR_MAX_VALUE_U = 5 + NL_POLICY_TYPE_ATTR_MIN_LENGTH = 6 + NL_POLICY_TYPE_ATTR_MAX_LENGTH = 7 + NL_POLICY_TYPE_ATTR_POLICY_IDX = 8 + NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE = 9 + NL_POLICY_TYPE_ATTR_BITFIELD32_MASK = 10 + NL_POLICY_TYPE_ATTR_PAD = 11 + NL_POLICY_TYPE_ATTR_MASK = 12 + + AttrType = Enum('AttrType', ['flag', 'u8', 'u16', 'u32', 'u64', + 's8', 's16', 's32', 's64', + 'binary', 'string', 'nul-string', + 'nested', 'nested-array', + 'bitfield32', 'sint', 'uint']) class NlError(Exception): def __init__(self, nl_msg): @@ -198,6 +218,8 @@ def __init__(self, msg, offset, attr_space=None): self.extack['miss-nest'] = extack.as_scalar('u32') elif extack.type == Netlink.NLMSGERR_ATTR_OFFS: self.extack['bad-attr-offs'] = extack.as_scalar('u32') + elif extack.type == Netlink.NLMSGERR_ATTR_POLICY: + self.extack['policy'] = self._decode_policy(extack.raw) else: if 'unknown' not in self.extack: self.extack['unknown'] = [] @@ -214,6 +236,30 @@ def __init__(self, msg, offset, attr_space=None): desc += f" ({spec['doc']})" self.extack['miss-type'] = desc + def _decode_policy(self, raw): + policy = {} + for attr in NlAttrs(raw): + if attr.type == Netlink.NL_POLICY_TYPE_ATTR_TYPE: + type = attr.as_scalar('u32') + policy['type'] = Netlink.AttrType(type).name + elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MIN_VALUE_S: + policy['min-value'] = attr.as_scalar('s64') + elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MAX_VALUE_S: + policy['max-value'] = attr.as_scalar('s64') + elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MIN_VALUE_U: + policy['min-value'] = attr.as_scalar('u64') + elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MAX_VALUE_U: + policy['max-value'] = attr.as_scalar('u64') + elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MIN_LENGTH: + policy['min-length'] = attr.as_scalar('u32') + elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MAX_LENGTH: + policy['max-length'] = attr.as_scalar('u32') + elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_BITFIELD32_MASK: + policy['bitfield32-mask'] = attr.as_scalar('u32') + elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MASK: + policy['mask'] = attr.as_scalar('u64') + return policy + def cmd(self): return self.nl_type diff --git a/tools/net/ynl/samples/netdev.c b/tools/net/ynl/samples/netdev.c index 591b90e21890c..3e7b29bd55d5d 100644 --- a/tools/net/ynl/samples/netdev.c +++ b/tools/net/ynl/samples/netdev.c @@ -100,6 +100,8 @@ int main(int argc, char **argv) if (!devs) goto err_close; + if (ynl_dump_empty(devs)) + fprintf(stderr, "Error: no devices reported\n"); ynl_dump_foreach(devs, d) netdev_print_device(d, 0); netdev_dev_get_list_free(devs); diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py index 927407b3efb3d..657e881d2ea4a 100755 --- a/tools/net/ynl/ynl-gen-rst.py +++ b/tools/net/ynl/ynl-gen-rst.py @@ -82,9 +82,9 @@ def rst_subsubsection(title: str) -> str: return f"{title}\n" + "~" * len(title) -def rst_section(title: str) -> str: +def rst_section(namespace: str, prefix: str, title: str) -> str: """Add a section to the document""" - return f"\n{title}\n" + "=" * len(title) + return f".. _{namespace}-{prefix}-{title}:\n\n{title}\n" + "=" * len(title) def rst_subtitle(title: str) -> str: @@ -102,6 +102,17 @@ def rst_list_inline(list_: List[str], level: int = 0) -> str: return headroom(level) + "[" + ", ".join(inline(i) for i in list_) + "]" +def rst_ref(namespace: str, prefix: str, name: str) -> str: + """Add a hyperlink to the document""" + mappings = {'enum': 'definition', + 'fixed-header': 'definition', + 'nested-attributes': 'attribute-set', + 'struct': 'definition'} + if prefix in mappings: + prefix = mappings[prefix] + return f":ref:`{namespace}-{prefix}-{name}`" + + def rst_header() -> str: """The headers for all the auto generated RST files""" lines = [] @@ -159,20 +170,24 @@ def parse_do_attributes(attrs: Dict[str, Any], level: int = 0) -> str: return "\n".join(lines) -def parse_operations(operations: List[Dict[str, Any]]) -> str: +def parse_operations(operations: List[Dict[str, Any]], namespace: str) -> str: """Parse operations block""" preprocessed = ["name", "doc", "title", "do", "dump"] + linkable = ["fixed-header", "attribute-set"] lines = [] for operation in operations: - lines.append(rst_section(operation["name"])) + lines.append(rst_section(namespace, 'operation', operation["name"])) lines.append(rst_paragraph(sanitize(operation["doc"])) + "\n") for key in operation.keys(): if key in preprocessed: # Skip the special fields continue - lines.append(rst_fields(key, operation[key], 0)) + value = operation[key] + if key in linkable: + value = rst_ref(namespace, key, value) + lines.append(rst_fields(key, value, 0)) if "do" in operation: lines.append(rst_paragraph(":do:", 0)) @@ -212,14 +227,14 @@ def parse_entries(entries: List[Dict[str, Any]], level: int) -> str: return "\n".join(lines) -def parse_definitions(defs: Dict[str, Any]) -> str: +def parse_definitions(defs: Dict[str, Any], namespace: str) -> str: """Parse definitions section""" preprocessed = ["name", "entries", "members"] ignored = ["render-max"] # This is not printed lines = [] for definition in defs: - lines.append(rst_section(definition["name"])) + lines.append(rst_section(namespace, 'definition', definition["name"])) for k in definition.keys(): if k in preprocessed + ignored: continue @@ -237,14 +252,15 @@ def parse_definitions(defs: Dict[str, Any]) -> str: return "\n".join(lines) -def parse_attr_sets(entries: List[Dict[str, Any]]) -> str: +def parse_attr_sets(entries: List[Dict[str, Any]], namespace: str) -> str: """Parse attribute from attribute-set""" preprocessed = ["name", "type"] + linkable = ["enum", "nested-attributes", "struct", "sub-message"] ignored = ["checks"] lines = [] for entry in entries: - lines.append(rst_section(entry["name"])) + lines.append(rst_section(namespace, 'attribute-set', entry["name"])) for attr in entry["attributes"]: type_ = attr.get("type") attr_line = attr["name"] @@ -257,25 +273,31 @@ def parse_attr_sets(entries: List[Dict[str, Any]]) -> str: for k in attr.keys(): if k in preprocessed + ignored: continue - lines.append(rst_fields(k, sanitize(attr[k]), 0)) + if k in linkable: + value = rst_ref(namespace, k, attr[k]) + else: + value = sanitize(attr[k]) + lines.append(rst_fields(k, value, 0)) lines.append("\n") return "\n".join(lines) -def parse_sub_messages(entries: List[Dict[str, Any]]) -> str: +def parse_sub_messages(entries: List[Dict[str, Any]], namespace: str) -> str: """Parse sub-message definitions""" lines = [] for entry in entries: - lines.append(rst_section(entry["name"])) + lines.append(rst_section(namespace, 'sub-message', entry["name"])) for fmt in entry["formats"]: value = fmt["value"] lines.append(rst_bullet(bold(value))) for attr in ['fixed-header', 'attribute-set']: if attr in fmt: - lines.append(rst_fields(attr, fmt[attr], 1)) + lines.append(rst_fields(attr, + rst_ref(namespace, attr, fmt[attr]), + 1)) lines.append("\n") return "\n".join(lines) @@ -289,9 +311,11 @@ def parse_yaml(obj: Dict[str, Any]) -> str: lines.append(rst_header()) - title = f"Family ``{obj['name']}`` netlink specification" + family = obj['name'] + + title = f"Family ``{family}`` netlink specification" lines.append(rst_title(title)) - lines.append(rst_paragraph(".. contents::\n")) + lines.append(rst_paragraph(".. contents:: :depth: 3\n")) if "doc" in obj: lines.append(rst_subtitle("Summary")) @@ -300,7 +324,7 @@ def parse_yaml(obj: Dict[str, Any]) -> str: # Operations if "operations" in obj: lines.append(rst_subtitle("Operations")) - lines.append(parse_operations(obj["operations"]["list"])) + lines.append(parse_operations(obj["operations"]["list"], family)) # Multicast groups if "mcast-groups" in obj: @@ -310,17 +334,17 @@ def parse_yaml(obj: Dict[str, Any]) -> str: # Definitions if "definitions" in obj: lines.append(rst_subtitle("Definitions")) - lines.append(parse_definitions(obj["definitions"])) + lines.append(parse_definitions(obj["definitions"], family)) # Attributes set if "attribute-sets" in obj: lines.append(rst_subtitle("Attribute sets")) - lines.append(parse_attr_sets(obj["attribute-sets"])) + lines.append(parse_attr_sets(obj["attribute-sets"], family)) # Sub-messages if "sub-messages" in obj: lines.append(rst_subtitle("Sub-messages")) - lines.append(parse_sub_messages(obj["sub-messages"])) + lines.append(parse_sub_messages(obj["sub-messages"], family)) return "\n".join(lines) diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index c5914f4e75e1b..173a006ddbb26 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -39,6 +39,7 @@ CONFIG_DAMON=y CONFIG_DAMON_VADDR=y CONFIG_DAMON_PADDR=y CONFIG_DEBUG_FS=y +CONFIG_DAMON_DBGFS_DEPRECATED=y CONFIG_DAMON_DBGFS=y CONFIG_REGMAP_BUILD=y diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 2f9d378edec33..d996a0ab0765e 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -31,6 +31,7 @@ reuseport_dualstack rxtimestamp sctp_hello scm_pidfd +scm_rights sk_bind_sendto_listen sk_connect_zero_addr socket diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 221c387a7d7f6..3b83c797650d4 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,4 +1,4 @@ CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd +TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd scm_rights include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c new file mode 100644 index 0000000000000..bab606c9f1eb0 --- /dev/null +++ b/tools/testing/selftests/net/af_unix/scm_rights.c @@ -0,0 +1,286 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ +#define _GNU_SOURCE +#include + +#include +#include +#include +#include +#include +#include + +#include "../../kselftest_harness.h" + +FIXTURE(scm_rights) +{ + int fd[16]; +}; + +FIXTURE_VARIANT(scm_rights) +{ + char name[16]; + int type; + int flags; + bool test_listener; +}; + +FIXTURE_VARIANT_ADD(scm_rights, dgram) +{ + .name = "UNIX ", + .type = SOCK_DGRAM, + .flags = 0, + .test_listener = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = 0, + .test_listener = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_oob) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = MSG_OOB, + .test_listener = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_listener) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = 0, + .test_listener = true, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = MSG_OOB, + .test_listener = true, +}; + +static int count_sockets(struct __test_metadata *_metadata, + const FIXTURE_VARIANT(scm_rights) *variant) +{ + int sockets = -1, len, ret; + char *line = NULL; + size_t unused; + FILE *f; + + f = fopen("/proc/net/protocols", "r"); + ASSERT_NE(NULL, f); + + len = strlen(variant->name); + + while (getline(&line, &unused, f) != -1) { + int unused2; + + if (strncmp(line, variant->name, len)) + continue; + + ret = sscanf(line + len, "%d %d", &unused2, &sockets); + ASSERT_EQ(2, ret); + + break; + } + + free(line); + + ret = fclose(f); + ASSERT_EQ(0, ret); + + return sockets; +} + +FIXTURE_SETUP(scm_rights) +{ + int ret; + + ret = unshare(CLONE_NEWNET); + ASSERT_EQ(0, ret); + + ret = count_sockets(_metadata, variant); + ASSERT_EQ(0, ret); +} + +FIXTURE_TEARDOWN(scm_rights) +{ + int ret; + + sleep(1); + + ret = count_sockets(_metadata, variant); + ASSERT_EQ(0, ret); +} + +static void create_listeners(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_rights) *self, + int n) +{ + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + }; + socklen_t addrlen; + int i, ret; + + for (i = 0; i < n * 2; i += 2) { + self->fd[i] = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_LE(0, self->fd[i]); + + addrlen = sizeof(addr.sun_family); + ret = bind(self->fd[i], (struct sockaddr *)&addr, addrlen); + ASSERT_EQ(0, ret); + + ret = listen(self->fd[i], -1); + ASSERT_EQ(0, ret); + + addrlen = sizeof(addr); + ret = getsockname(self->fd[i], (struct sockaddr *)&addr, &addrlen); + ASSERT_EQ(0, ret); + + self->fd[i + 1] = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_LE(0, self->fd[i + 1]); + + ret = connect(self->fd[i + 1], (struct sockaddr *)&addr, addrlen); + ASSERT_EQ(0, ret); + } +} + +static void create_socketpairs(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_rights) *self, + const FIXTURE_VARIANT(scm_rights) *variant, + int n) +{ + int i, ret; + + ASSERT_GE(sizeof(self->fd) / sizeof(int), n); + + for (i = 0; i < n * 2; i += 2) { + ret = socketpair(AF_UNIX, variant->type, 0, self->fd + i); + ASSERT_EQ(0, ret); + } +} + +static void __create_sockets(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_rights) *self, + const FIXTURE_VARIANT(scm_rights) *variant, + int n) +{ + if (variant->test_listener) + create_listeners(_metadata, self, n); + else + create_socketpairs(_metadata, self, variant, n); +} + +static void __close_sockets(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_rights) *self, + int n) +{ + int i, ret; + + ASSERT_GE(sizeof(self->fd) / sizeof(int), n); + + for (i = 0; i < n * 2; i++) { + ret = close(self->fd[i]); + ASSERT_EQ(0, ret); + } +} + +void __send_fd(struct __test_metadata *_metadata, + const FIXTURE_DATA(scm_rights) *self, + const FIXTURE_VARIANT(scm_rights) *variant, + int inflight, int receiver) +{ +#define MSG "nop" +#define MSGLEN 3 + struct { + struct cmsghdr cmsghdr; + int fd[2]; + } cmsg = { + .cmsghdr = { + .cmsg_len = CMSG_LEN(sizeof(cmsg.fd)), + .cmsg_level = SOL_SOCKET, + .cmsg_type = SCM_RIGHTS, + }, + .fd = { + self->fd[inflight * 2], + self->fd[inflight * 2], + }, + }; + struct iovec iov = { + .iov_base = MSG, + .iov_len = MSGLEN, + }; + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = &cmsg, + .msg_controllen = CMSG_SPACE(sizeof(cmsg.fd)), + }; + int ret; + + ret = sendmsg(self->fd[receiver * 2 + 1], &msg, variant->flags); + ASSERT_EQ(MSGLEN, ret); +} + +#define create_sockets(n) \ + __create_sockets(_metadata, self, variant, n) +#define close_sockets(n) \ + __close_sockets(_metadata, self, n) +#define send_fd(inflight, receiver) \ + __send_fd(_metadata, self, variant, inflight, receiver) + +TEST_F(scm_rights, self_ref) +{ + create_sockets(2); + + send_fd(0, 0); + + send_fd(1, 1); + + close_sockets(2); +} + +TEST_F(scm_rights, triangle) +{ + create_sockets(6); + + send_fd(0, 1); + send_fd(1, 2); + send_fd(2, 0); + + send_fd(3, 4); + send_fd(4, 5); + send_fd(5, 3); + + close_sockets(6); +} + +TEST_F(scm_rights, cross_edge) +{ + create_sockets(8); + + send_fd(0, 1); + send_fd(1, 2); + send_fd(2, 0); + send_fd(1, 3); + send_fd(3, 2); + + send_fd(4, 5); + send_fd(5, 6); + send_fd(6, 4); + send_fd(5, 7); + send_fd(7, 6); + + close_sockets(8); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c index a2662348cdb1a..b7b54d646b937 100644 --- a/tools/testing/selftests/net/bind_wildcard.c +++ b/tools/testing/selftests/net/bind_wildcard.c @@ -6,7 +6,9 @@ #include "../kselftest_harness.h" -struct in6_addr in6addr_v4mapped_any = { +static const __u32 in4addr_any = INADDR_ANY; +static const __u32 in4addr_loopback = INADDR_LOOPBACK; +static const struct in6_addr in6addr_v4mapped_any = { .s6_addr = { 0, 0, 0, 0, 0, 0, 0, 0, @@ -14,8 +16,7 @@ struct in6_addr in6addr_v4mapped_any = { 0, 0, 0, 0 } }; - -struct in6_addr in6addr_v4mapped_loopback = { +static const struct in6_addr in6addr_v4mapped_loopback = { .s6_addr = { 0, 0, 0, 0, 0, 0, 0, 0, @@ -24,137 +25,785 @@ struct in6_addr in6addr_v4mapped_loopback = { } }; +#define NR_SOCKETS 8 + FIXTURE(bind_wildcard) { - struct sockaddr_in addr4; - struct sockaddr_in6 addr6; + int fd[NR_SOCKETS]; + socklen_t addrlen[NR_SOCKETS]; + union { + struct sockaddr addr; + struct sockaddr_in addr4; + struct sockaddr_in6 addr6; + } addr[NR_SOCKETS]; }; FIXTURE_VARIANT(bind_wildcard) { - const __u32 addr4_const; - const struct in6_addr *addr6_const; - int expected_errno; + sa_family_t family[2]; + const void *addr[2]; + bool ipv6_only[2]; + + /* 6 bind() calls below follow two bind() for the defined 2 addresses: + * + * 0.0.0.0 + * 127.0.0.1 + * :: + * ::1 + * ::ffff:0.0.0.0 + * ::ffff:127.0.0.1 + */ + int expected_errno[NR_SOCKETS]; + int expected_reuse_errno[NR_SOCKETS]; +}; + +/* (IPv4, IPv4) */ +FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v4_local) +{ + .family = {AF_INET, AF_INET}, + .addr = {&in4addr_any, &in4addr_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v4_any) +{ + .family = {AF_INET, AF_INET}, + .addr = {&in4addr_loopback, &in4addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, }; +/* (IPv4, IPv6) */ FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any) { - .addr4_const = INADDR_ANY, - .addr6_const = &in6addr_any, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_any, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any_only) +{ + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_any, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_local) { - .addr4_const = INADDR_ANY, - .addr6_const = &in6addr_loopback, - .expected_errno = 0, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_any, &in6addr_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_any) { - .addr4_const = INADDR_ANY, - .addr6_const = &in6addr_v4mapped_any, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_any, &in6addr_v4mapped_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_local) { - .addr4_const = INADDR_ANY, - .addr6_const = &in6addr_v4mapped_loopback, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_any, &in6addr_v4mapped_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any) { - .addr4_const = INADDR_LOOPBACK, - .addr6_const = &in6addr_any, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_loopback, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any_only) +{ + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_loopback, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_local) { - .addr4_const = INADDR_LOOPBACK, - .addr6_const = &in6addr_loopback, - .expected_errno = 0, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_loopback, &in6addr_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_any) { - .addr4_const = INADDR_LOOPBACK, - .addr6_const = &in6addr_v4mapped_any, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_loopback, &in6addr_v4mapped_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_local) { - .addr4_const = INADDR_LOOPBACK, - .addr6_const = &in6addr_v4mapped_loopback, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_loopback, &in6addr_v4mapped_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +/* (IPv6, IPv4) */ +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v4_any) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_any, &in4addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, }; +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v4_any) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_any, &in4addr_any}, + .ipv6_only = {true, false}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v4_local) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_any, &in4addr_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v4_local) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_any, &in4addr_loopback}, + .ipv6_only = {true, false}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v4_any) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_loopback, &in4addr_any}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v4_local) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_loopback, &in4addr_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v4_any) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_v4mapped_any, &in4addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v4_local) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_v4mapped_any, &in4addr_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_local_v4_any) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_v4mapped_loopback, &in4addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_local_v4_local) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_v4mapped_loopback, &in4addr_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +/* (IPv6, IPv6) */ +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_any}, + .ipv6_only = {true, false}, + .expected_errno = {0, EADDRINUSE, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_any_only) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_any_only) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_any}, + .ipv6_only = {true, true}, + .expected_errno = {0, EADDRINUSE, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_loopback}, + .ipv6_only = {true, false}, + .expected_errno = {0, EADDRINUSE, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_v4mapped_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_v4mapped_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_v4mapped_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_v4mapped_any}, + .ipv6_only = {true, false}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_v4mapped_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_v4mapped_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_v4mapped_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_v4mapped_loopback}, + .ipv6_only = {true, false}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_loopback, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_any_only) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_loopback, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, EADDRINUSE, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_v4mapped_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_loopback, &in6addr_v4mapped_any}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_v4mapped_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_loopback, &in6addr_v4mapped_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_any, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_any_only) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_any, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_any, &in6addr_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_v4mapped_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_any, &in6addr_v4mapped_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_loopback, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_any_only) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_loopback, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_loopback, &in6addr_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_v4mapped_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_loopback, &in6addr_v4mapped_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +static void setup_addr(FIXTURE_DATA(bind_wildcard) *self, int i, + int family, const void *addr_const) +{ + if (family == AF_INET) { + struct sockaddr_in *addr4 = &self->addr[i].addr4; + const __u32 *addr4_const = addr_const; + + addr4->sin_family = AF_INET; + addr4->sin_port = htons(0); + addr4->sin_addr.s_addr = htonl(*addr4_const); + + self->addrlen[i] = sizeof(struct sockaddr_in); + } else { + struct sockaddr_in6 *addr6 = &self->addr[i].addr6; + const struct in6_addr *addr6_const = addr_const; + + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(0); + addr6->sin6_addr = *addr6_const; + + self->addrlen[i] = sizeof(struct sockaddr_in6); + } +} + FIXTURE_SETUP(bind_wildcard) { - self->addr4.sin_family = AF_INET; - self->addr4.sin_port = htons(0); - self->addr4.sin_addr.s_addr = htonl(variant->addr4_const); + setup_addr(self, 0, variant->family[0], variant->addr[0]); + setup_addr(self, 1, variant->family[1], variant->addr[1]); + + setup_addr(self, 2, AF_INET, &in4addr_any); + setup_addr(self, 3, AF_INET, &in4addr_loopback); - self->addr6.sin6_family = AF_INET6; - self->addr6.sin6_port = htons(0); - self->addr6.sin6_addr = *variant->addr6_const; + setup_addr(self, 4, AF_INET6, &in6addr_any); + setup_addr(self, 5, AF_INET6, &in6addr_loopback); + setup_addr(self, 6, AF_INET6, &in6addr_v4mapped_any); + setup_addr(self, 7, AF_INET6, &in6addr_v4mapped_loopback); } FIXTURE_TEARDOWN(bind_wildcard) { + int i; + + for (i = 0; i < NR_SOCKETS; i++) + close(self->fd[i]); } -void bind_sockets(struct __test_metadata *_metadata, - FIXTURE_DATA(bind_wildcard) *self, - int expected_errno, - struct sockaddr *addr1, socklen_t addrlen1, - struct sockaddr *addr2, socklen_t addrlen2) +void bind_socket(struct __test_metadata *_metadata, + FIXTURE_DATA(bind_wildcard) *self, + const FIXTURE_VARIANT(bind_wildcard) *variant, + int i, int reuse) { - int fd[2]; int ret; - fd[0] = socket(addr1->sa_family, SOCK_STREAM, 0); - ASSERT_GT(fd[0], 0); + self->fd[i] = socket(self->addr[i].addr.sa_family, SOCK_STREAM, 0); + ASSERT_GT(self->fd[i], 0); - ret = bind(fd[0], addr1, addrlen1); - ASSERT_EQ(ret, 0); + if (i < 2 && variant->ipv6_only[i]) { + ret = setsockopt(self->fd[i], SOL_IPV6, IPV6_V6ONLY, &(int){1}, sizeof(int)); + ASSERT_EQ(ret, 0); + } - ret = getsockname(fd[0], addr1, &addrlen1); - ASSERT_EQ(ret, 0); + if (i < 2 && reuse) { + ret = setsockopt(self->fd[i], SOL_SOCKET, reuse, &(int){1}, sizeof(int)); + ASSERT_EQ(ret, 0); + } - ((struct sockaddr_in *)addr2)->sin_port = ((struct sockaddr_in *)addr1)->sin_port; + self->addr[i].addr4.sin_port = self->addr[0].addr4.sin_port; - fd[1] = socket(addr2->sa_family, SOCK_STREAM, 0); - ASSERT_GT(fd[1], 0); + ret = bind(self->fd[i], &self->addr[i].addr, self->addrlen[i]); - ret = bind(fd[1], addr2, addrlen2); - if (expected_errno) { - ASSERT_EQ(ret, -1); - ASSERT_EQ(errno, expected_errno); + if (reuse) { + if (variant->expected_reuse_errno[i]) { + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, variant->expected_reuse_errno[i]); + } else { + ASSERT_EQ(ret, 0); + } } else { + if (variant->expected_errno[i]) { + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, variant->expected_errno[i]); + } else { + ASSERT_EQ(ret, 0); + } + } + + if (i == 0) { + ret = getsockname(self->fd[0], &self->addr[0].addr, &self->addrlen[0]); ASSERT_EQ(ret, 0); } +} - close(fd[1]); - close(fd[0]); +TEST_F(bind_wildcard, plain) +{ + int i; + + for (i = 0; i < NR_SOCKETS; i++) + bind_socket(_metadata, self, variant, i, 0); } -TEST_F(bind_wildcard, v4_v6) +TEST_F(bind_wildcard, reuseaddr) { - bind_sockets(_metadata, self, variant->expected_errno, - (struct sockaddr *)&self->addr4, sizeof(self->addr4), - (struct sockaddr *)&self->addr6, sizeof(self->addr6)); + int i; + + for (i = 0; i < NR_SOCKETS; i++) + bind_socket(_metadata, self, variant, i, SO_REUSEADDR); } -TEST_F(bind_wildcard, v6_v4) +TEST_F(bind_wildcard, reuseport) { - bind_sockets(_metadata, self, variant->expected_errno, - (struct sockaddr *)&self->addr6, sizeof(self->addr6), - (struct sockaddr *)&self->addr4, sizeof(self->addr4)); + int i; + + for (i = 0; i < NR_SOCKETS; i++) + bind_socket(_metadata, self, variant, i, SO_REUSEPORT); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/forwarding/settings b/tools/testing/selftests/net/forwarding/settings index e7b9417537fbc..ad6a1699dd1bd 100644 --- a/tools/testing/selftests/net/forwarding/settings +++ b/tools/testing/selftests/net/forwarding/settings @@ -1 +1 @@ -timeout=0 +timeout=10800 diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 4c42485548262..4131f3263a482 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -383,12 +383,14 @@ do_transfer() local stat_cookierx_last local stat_csum_err_s local stat_csum_err_c + local stat_tcpfb_last_l stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") + stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") timeout ${timeout_test} \ ip netns exec ${listener_ns} \ @@ -457,11 +459,13 @@ do_transfer() local stat_cookietx_now local stat_cookierx_now local stat_ooo_now + local stat_tcpfb_now_l stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue") + stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") expect_synrx=$((stat_synrx_last_l)) expect_ackrx=$((stat_ackrx_last_l)) @@ -508,6 +512,11 @@ do_transfer() fi fi + if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then + mptcp_lib_pr_fail "unexpected fallback to TCP" + rets=1 + fi + if [ $cookies -eq 2 ];then if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then extra+=" WARN: CookieSent: did not advance" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 5e9211e898256..e4403236f6554 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -729,7 +729,7 @@ pm_nl_check_endpoint() [ -n "$_flags" ]; flags="flags $_flags" shift elif [ $1 = "dev" ]; then - [ -n "$2" ]; dev="dev $1" + [ -n "$2" ]; dev="dev $2" shift elif [ $1 = "id" ]; then _id=$2 @@ -3610,6 +3610,8 @@ endpoint_tests() local tests_pid=$! wait_mpj $ns2 + pm_nl_check_endpoint "creation" \ + $ns2 10.0.2.2 id 2 flags subflow dev ns2eth2 chk_subflow_nr "before delete" 2 chk_mptcp_info subflows 1 subflows 1 diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c index 7c5b12664b03b..bfb07dc495186 100644 --- a/tools/testing/selftests/net/reuseaddr_conflict.c +++ b/tools/testing/selftests/net/reuseaddr_conflict.c @@ -109,6 +109,6 @@ int main(void) fd1 = open_port(0, 1); if (fd1 >= 0) error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6"); - fprintf(stderr, "Success"); + fprintf(stderr, "Success\n"); return 0; } diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 380cb15e942e4..83ed987cff340 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -244,7 +244,7 @@ for family in 4 6; do create_vxlan_pair ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on - run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1 + run_test "GRO frag list over UDP tunnel" $OL_NET$DST 10 10 cleanup # use NAT to circumvent GRO FWD check @@ -258,13 +258,7 @@ for family in 4 6; do # load arp cache before running the test to reduce the amount of # stray traffic on top of the UDP tunnel ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null - run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST - cleanup - - create_vxlan_pair - run_bench "UDP tunnel fwd perf" $OL_NET$DST - ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on - run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST + run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 10 10 $OL_NET$DST cleanup done