From 84a58e60038fa0366006977dba85eae16b2e3d78 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Mon, 21 Aug 2023 16:05:54 -0700 Subject: [PATCH 01/24] net/mlx5: Dynamic cyclecounter shift calculation for PTP free running clock Use a dynamic calculation to determine the shift value for the internal timer cyclecounter that will lead to the highest precision frequency adjustments. Previously used a constant for the shift value assuming all devices supported by the driver had a nominal frequency of 1GHz. However, there are devices that operate at different frequencies. The previous shift value constant would break the PHC functionality for those devices. Reported-by: Vadim Fedorenko Closes: https://lore.kernel.org/netdev/20230815151507.3028503-1-vadfed@meta.com/ Fixes: 6a4010927562 ("net/mlx5: Update cyclecounter shift value to improve ptp free running mode precision") Signed-off-by: Rahul Rameshbabu Tested-by: Vadim Fedorenko Reviewed-by: Jacob Keller Reviewed-by: Simon Horman Acked-by: Saeed Mahameed Link: https://lore.kernel.org/r/20230821230554.236210-1-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/lib/clock.c | 32 ++++++++++++++++--- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 377372f0578ae..aa29f09e83564 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -32,16 +32,13 @@ #include #include +#include #include #include #include "lib/eq.h" #include "en.h" #include "clock.h" -enum { - MLX5_CYCLES_SHIFT = 31 -}; - enum { MLX5_PIN_MODE_IN = 0x0, MLX5_PIN_MODE_OUT = 0x1, @@ -93,6 +90,31 @@ static bool mlx5_modify_mtutc_allowed(struct mlx5_core_dev *mdev) return MLX5_CAP_MCAM_FEATURE(mdev, ptpcyc2realtime_modify); } +static u32 mlx5_ptp_shift_constant(u32 dev_freq_khz) +{ + /* Optimal shift constant leads to corrections above just 1 scaled ppm. + * + * Two sets of equations are needed to derive the optimal shift + * constant for the cyclecounter. + * + * dev_freq_khz * 1000 / 2^shift_constant = 1 scaled_ppm + * ppb = scaled_ppm * 1000 / 2^16 + * + * Using the two equations together + * + * dev_freq_khz * 1000 / 1 scaled_ppm = 2^shift_constant + * dev_freq_khz * 2^16 / 1 ppb = 2^shift_constant + * dev_freq_khz = 2^(shift_constant - 16) + * + * then yields + * + * shift_constant = ilog2(dev_freq_khz) + 16 + */ + + return min(ilog2(dev_freq_khz) + 16, + ilog2((U32_MAX / NSEC_PER_MSEC) * dev_freq_khz)); +} + static s32 mlx5_ptp_getmaxphase(struct ptp_clock_info *ptp) { struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); @@ -909,7 +931,7 @@ static void mlx5_timecounter_init(struct mlx5_core_dev *mdev) dev_freq = MLX5_CAP_GEN(mdev, device_frequency_khz); timer->cycles.read = read_internal_timer; - timer->cycles.shift = MLX5_CYCLES_SHIFT; + timer->cycles.shift = mlx5_ptp_shift_constant(dev_freq); timer->cycles.mult = clocksource_khz2mult(dev_freq, timer->cycles.shift); timer->nominal_c_mult = timer->cycles.mult; From a4f39c9f14a634e4cd35fcd338c239d11fcc73fc Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 23 Aug 2023 15:41:02 +0200 Subject: [PATCH 02/24] net: handle ARPHRD_PPP in dev_is_mac_header_xmit() The goal is to support a bpf_redirect() from an ethernet device (ingress) to a ppp device (egress). The l2 header is added automatically by the ppp driver, thus the ethernet header should be removed. CC: stable@vger.kernel.org Fixes: 27b29f63058d ("bpf: add bpf_redirect() helper") Signed-off-by: Nicolas Dichtel Tested-by: Siwar Zitouni Reviewed-by: Guillaume Nault Signed-off-by: David S. Miller --- include/linux/if_arp.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 1ed52441972f9..10a1e81434cb9 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -53,6 +53,10 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev) case ARPHRD_NONE: case ARPHRD_RAWIP: case ARPHRD_PIMREG: + /* PPP adds its l2 header automatically in ppp_start_xmit(). + * This makes it look like an l3 device to __bpf_redirect() and tcf_mirred_init(). + */ + case ARPHRD_PPP: return false; default: return true; From 0aacec49c29e7c5b1487e859b0c0a42388c34092 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 23 Aug 2023 08:18:14 -0700 Subject: [PATCH 03/24] ice: avoid executing commands on other ports when driving sync The ice hardware has a synchronization mechanism used to drive the simultaneous application of commands on both PHY ports and the source timer in the MAC. When issuing a sync via ice_ptp_exec_tmr_cmd(), the hardware will simultaneously apply the commands programmed for the main timer and each PHY port. Neither the main timer command register, nor the PHY port command registers auto clear on command execution. During the execution of a timer command intended for a single port on E822 devices, such as those used to configure a PHY during link up, the driver is not correctly clearing the previous commands. This results in unintentionally executing the last programmed command on the main timer and other PHY ports whenever performing reconfiguration on E822 ports after link up. This results in unintended side effects on other timers, depending on what command was previously programmed. To fix this, the driver must ensure that the main timer and all other PHY ports are properly initialized to perform no action. The enumeration for timer commands does not include an enumeration value for doing nothing. Introduce ICE_PTP_NOP for this purpose. When writing a timer command to hardware, leave the command bits set to zero which indicates that no operation should be performed on that port. Modify ice_ptp_one_port_cmd() to always initialize all ports. For all ports other than the one being configured, write their timer command register to ICE_PTP_NOP. This ensures that no side effect happens on the timer command. To fix this for the PHY ports, modify ice_ptp_one_port_cmd() to always initialize all other ports to ICE_PTP_NOP. This ensures that no side effects happen on the other ports. Call ice_ptp_src_cmd() with a command value if ICE_PTP_NOP in ice_sync_phy_timer_e822() and ice_start_phy_timer_e822(). With both of these changes, the driver should no longer execute a stale command on the main timer or another PHY port when reconfiguring one of the PHY ports after link up. Fixes: 3a7496234d17 ("ice: implement basic E822 PTP support") Signed-off-by: Siddaraju DH Signed-off-by: Jacob Keller Tested-by: Sunitha Mekala (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ice/ice_ptp_hw.c | 55 +++++++++++++++++++-- drivers/net/ethernet/intel/ice/ice_ptp_hw.h | 3 +- 2 files changed, 52 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index a38614d21ea8f..de1d83300481d 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -131,6 +131,8 @@ static void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) case READ_TIME: cmd_val |= GLTSYN_CMD_READ_TIME; break; + case ICE_PTP_NOP: + break; } wr32(hw, GLTSYN_CMD, cmd_val); @@ -1226,18 +1228,18 @@ ice_ptp_read_port_capture(struct ice_hw *hw, u8 port, u64 *tx_ts, u64 *rx_ts) } /** - * ice_ptp_one_port_cmd - Prepare a single PHY port for a timer command + * ice_ptp_write_port_cmd_e822 - Prepare a single PHY port for a timer command * @hw: pointer to HW struct * @port: Port to which cmd has to be sent * @cmd: Command to be sent to the port * * Prepare the requested port for an upcoming timer sync command. * - * Note there is no equivalent of this operation on E810, as that device - * always handles all external PHYs internally. + * Do not use this function directly. If you want to configure exactly one + * port, use ice_ptp_one_port_cmd() instead. */ static int -ice_ptp_one_port_cmd(struct ice_hw *hw, u8 port, enum ice_ptp_tmr_cmd cmd) +ice_ptp_write_port_cmd_e822(struct ice_hw *hw, u8 port, enum ice_ptp_tmr_cmd cmd) { u32 cmd_val, val; u8 tmr_idx; @@ -1261,6 +1263,8 @@ ice_ptp_one_port_cmd(struct ice_hw *hw, u8 port, enum ice_ptp_tmr_cmd cmd) case ADJ_TIME_AT_TIME: cmd_val |= PHY_CMD_ADJ_TIME_AT_TIME; break; + case ICE_PTP_NOP: + break; } /* Tx case */ @@ -1306,6 +1310,39 @@ ice_ptp_one_port_cmd(struct ice_hw *hw, u8 port, enum ice_ptp_tmr_cmd cmd) return 0; } +/** + * ice_ptp_one_port_cmd - Prepare one port for a timer command + * @hw: pointer to the HW struct + * @configured_port: the port to configure with configured_cmd + * @configured_cmd: timer command to prepare on the configured_port + * + * Prepare the configured_port for the configured_cmd, and prepare all other + * ports for ICE_PTP_NOP. This causes the configured_port to execute the + * desired command while all other ports perform no operation. + */ +static int +ice_ptp_one_port_cmd(struct ice_hw *hw, u8 configured_port, + enum ice_ptp_tmr_cmd configured_cmd) +{ + u8 port; + + for (port = 0; port < ICE_NUM_EXTERNAL_PORTS; port++) { + enum ice_ptp_tmr_cmd cmd; + int err; + + if (port == configured_port) + cmd = configured_cmd; + else + cmd = ICE_PTP_NOP; + + err = ice_ptp_write_port_cmd_e822(hw, port, cmd); + if (err) + return err; + } + + return 0; +} + /** * ice_ptp_port_cmd_e822 - Prepare all ports for a timer command * @hw: pointer to the HW struct @@ -1322,7 +1359,7 @@ ice_ptp_port_cmd_e822(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) for (port = 0; port < ICE_NUM_EXTERNAL_PORTS; port++) { int err; - err = ice_ptp_one_port_cmd(hw, port, cmd); + err = ice_ptp_write_port_cmd_e822(hw, port, cmd); if (err) return err; } @@ -2252,6 +2289,9 @@ static int ice_sync_phy_timer_e822(struct ice_hw *hw, u8 port) if (err) goto err_unlock; + /* Do not perform any action on the main timer */ + ice_ptp_src_cmd(hw, ICE_PTP_NOP); + /* Issue the sync to activate the time adjustment */ ice_ptp_exec_tmr_cmd(hw); @@ -2372,6 +2412,9 @@ int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port) if (err) return err; + /* Do not perform any action on the main timer */ + ice_ptp_src_cmd(hw, ICE_PTP_NOP); + ice_ptp_exec_tmr_cmd(hw); err = ice_read_phy_reg_e822(hw, port, P_REG_PS, &val); @@ -2847,6 +2890,8 @@ static int ice_ptp_port_cmd_e810(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) case ADJ_TIME_AT_TIME: cmd_val = GLTSYN_CMD_ADJ_INIT_TIME; break; + case ICE_PTP_NOP: + return 0; } /* Read, modify, write */ diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h index 3b68cb91bd819..096685237ca61 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -9,7 +9,8 @@ enum ice_ptp_tmr_cmd { INIT_INCVAL, ADJ_TIME, ADJ_TIME_AT_TIME, - READ_TIME + READ_TIME, + ICE_PTP_NOP, }; enum ice_ptp_serdes { From 49fa4b0d06705a24a81bb8be6eb175059b77f0a7 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Thu, 24 Aug 2023 08:33:01 +0530 Subject: [PATCH 04/24] octeontx2-pf: fix page_pool creation fail for rings > 32k octeontx2 driver calls page_pool_create() during driver probe() and fails if queue size > 32k. Page pool infra uses these buffers as shock absorbers for burst traffic. These pages are pinned down over time as working sets varies, due to the recycling nature of page pool, given page pool (currently) don't have a shrinker mechanism, the pages remain pinned down in ptr_ring. Instead of clamping page_pool size to 32k at most, limit it even more to 2k to avoid wasting memory. This have been tested on octeontx2 CN10KA hardware. TCP and UDP tests using iperf shows no performance regressions. Fixes: b2e3406a38f0 ("octeontx2-pf: Add support for page pool") Suggested-by: Alexander Lobakin Reviewed-by: Sunil Goutham Signed-off-by: Ratheesh Kannoth Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c | 2 +- drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 77c8f650f7ac1..3e1c70c746227 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -1432,7 +1432,7 @@ int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, } pp_params.flags = PP_FLAG_PAGE_FRAG | PP_FLAG_DMA_MAP; - pp_params.pool_size = numptrs; + pp_params.pool_size = min(OTX2_PAGE_POOL_SZ, numptrs); pp_params.nid = NUMA_NO_NODE; pp_params.dev = pfvf->dev; pp_params.dma_dir = DMA_FROM_DEVICE; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h index b5d689eeff80b..9e3bfbe5c4809 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h @@ -23,6 +23,8 @@ #define OTX2_ETH_HLEN (VLAN_ETH_HLEN + VLAN_HLEN) #define OTX2_MIN_MTU 60 +#define OTX2_PAGE_POOL_SZ 2048 + #define OTX2_MAX_GSO_SEGS 255 #define OTX2_MAX_FRAGS_IN_SQE 9 From 786c96e92fb9e854cb8b0cb7399bb2fb28e15c4b Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Thu, 24 Aug 2023 14:43:36 +0800 Subject: [PATCH 05/24] net: arcnet: Do not call kfree_skb() under local_irq_disable() It is not allowed to call kfree_skb() from hardware interrupt context or with hardware interrupts being disabled. So replace kfree_skb() with dev_kfree_skb_irq() under local_irq_disable(). Compile tested only. Fixes: 05fcd31cc472 ("arcnet: add err_skb package for package status feedback") Signed-off-by: Jinjie Ruan Signed-off-by: David S. Miller --- drivers/net/arcnet/arcnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c index 99265667538c3..d9e052c49ba1a 100644 --- a/drivers/net/arcnet/arcnet.c +++ b/drivers/net/arcnet/arcnet.c @@ -464,7 +464,7 @@ static void arcnet_reply_tasklet(struct tasklet_struct *t) ret = sock_queue_err_skb(sk, ackskb); if (ret) - kfree_skb(ackskb); + dev_kfree_skb_irq(ackskb); local_irq_enable(); }; From 146c7c330507c0384bf29d567186632bfe975927 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Thu, 24 Aug 2023 15:43:08 +0200 Subject: [PATCH 06/24] mlxsw: i2c: Fix chunk size setting in output mailbox buffer The driver reads commands output from the output mailbox. If the size of the output mailbox is not a multiple of the transaction / block size, then the driver will not issue enough read transactions to read the entire output, which can result in driver initialization errors. Fix by determining the number of transactions using DIV_ROUND_UP(). Fixes: 3029a693beda ("mlxsw: i2c: Allow flexible setting of I2C transactions size") Signed-off-by: Vadim Pasternak Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index 41298835a11e1..47af7ef7e4eee 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -444,7 +444,7 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size, } else { /* No input mailbox is case of initialization query command. */ reg_size = MLXSW_I2C_MAX_DATA_SIZE; - num = reg_size / mlxsw_i2c->block_size; + num = DIV_ROUND_UP(reg_size, mlxsw_i2c->block_size); if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) { dev_err(&client->dev, "Could not acquire lock"); From d7248f1cc835bd80e936dc5b2d94b149bdd0077d Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Thu, 24 Aug 2023 15:43:09 +0200 Subject: [PATCH 07/24] mlxsw: i2c: Limit single transaction buffer size Maximum size of buffer is obtained from underlying I2C adapter and in case adapter allows I2C transaction buffer size greater than 100 bytes, transaction will fail due to firmware limitation. As a result driver will fail initialization. Limit the maximum size of transaction buffer by 100 bytes to fit to firmware. Remove unnecessary calculation: max_t(u16, MLXSW_I2C_BLK_DEF, quirk_size). This condition can not happened. Fixes: 3029a693beda ("mlxsw: i2c: Allow flexible setting of I2C transactions size") Signed-off-by: Vadim Pasternak Reviewed-by: Petr Machata Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/i2c.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index 47af7ef7e4eee..d23f293e285cb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -48,6 +48,7 @@ #define MLXSW_I2C_MBOX_SIZE_BITS 12 #define MLXSW_I2C_ADDR_BUF_SIZE 4 #define MLXSW_I2C_BLK_DEF 32 +#define MLXSW_I2C_BLK_MAX 100 #define MLXSW_I2C_RETRY 5 #define MLXSW_I2C_TIMEOUT_MSECS 5000 #define MLXSW_I2C_MAX_DATA_SIZE 256 @@ -653,7 +654,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client) return -EOPNOTSUPP; } - mlxsw_i2c->block_size = max_t(u16, MLXSW_I2C_BLK_DEF, + mlxsw_i2c->block_size = min_t(u16, MLXSW_I2C_BLK_MAX, min_t(u16, quirks->max_read_len, quirks->max_write_len)); } else { From 3fc134a07438055fc93ce1bbacf2702ddd09500c Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Thu, 24 Aug 2023 15:43:10 +0200 Subject: [PATCH 08/24] mlxsw: core_hwmon: Adjust module label names based on MTCAP sensor counter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Transceiver module temperature sensors are indexed after ASIC and platform sensors. The current label printing method does not take this into account and simply prints the index of the transceiver module sensor. On new systems that have platform sensors this results in incorrect (shifted) transceiver module labels being printed: $ sensors [...] front panel 002: +37.0°C (crit = +70.0°C, emerg = +75.0°C) front panel 003: +47.0°C (crit = +70.0°C, emerg = +75.0°C) [...] Fix by taking the sensor count into account. After the fix: $ sensors [...] front panel 001: +37.0°C (crit = +70.0°C, emerg = +75.0°C) front panel 002: +47.0°C (crit = +70.0°C, emerg = +75.0°C) [...] Fixes: a53779de6a0e ("mlxsw: core: Add QSFP module temperature label attribute to hwmon") Signed-off-by: Vadim Pasternak Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index 70735068cf292..0fd290d776ffe 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -405,7 +405,8 @@ mlxsw_hwmon_module_temp_label_show(struct device *dev, container_of(attr, struct mlxsw_hwmon_attr, dev_attr); return sprintf(buf, "front panel %03u\n", - mlxsw_hwmon_attr->type_index); + mlxsw_hwmon_attr->type_index + 1 - + mlxsw_hwmon_attr->mlxsw_hwmon_dev->sensor_count); } static ssize_t From a9ac2e18779597f280d68a5b5f5bdd51a34080fa Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Thu, 24 Aug 2023 13:40:30 +0530 Subject: [PATCH 09/24] octeontx2-pf: Fix PFC TX scheduler free During PFC TX schedulers free, flag TXSCHQ_FREE_ALL was being set which caused free up all schedulers other than the PFC schedulers. This patch fixes that to free only the PFC Tx schedulers. Fixes: 99c969a83d82 ("octeontx2-pf: Add egress PFC support") Signed-off-by: Suman Ghosh Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230824081032.436432-2-sumang@marvell.com Signed-off-by: Jakub Kicinski --- .../ethernet/marvell/octeontx2/nic/otx2_common.c | 1 + .../ethernet/marvell/octeontx2/nic/otx2_dcbnl.c | 15 ++++----------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 3e1c70c746227..b9712040a0bc2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -804,6 +804,7 @@ void otx2_txschq_free_one(struct otx2_nic *pfvf, u16 lvl, u16 schq) mutex_unlock(&pfvf->mbox.lock); } +EXPORT_SYMBOL(otx2_txschq_free_one); void otx2_txschq_stop(struct otx2_nic *pfvf) { diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c index ccaf97bb1ce03..6492749dd7c89 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c @@ -125,19 +125,12 @@ int otx2_pfc_txschq_alloc(struct otx2_nic *pfvf) static int otx2_pfc_txschq_stop_one(struct otx2_nic *pfvf, u8 prio) { - struct nix_txsch_free_req *free_req; + int lvl; - mutex_lock(&pfvf->mbox.lock); /* free PFC TLx nodes */ - free_req = otx2_mbox_alloc_msg_nix_txsch_free(&pfvf->mbox); - if (!free_req) { - mutex_unlock(&pfvf->mbox.lock); - return -ENOMEM; - } - - free_req->flags = TXSCHQ_FREE_ALL; - otx2_sync_mbox_msg(&pfvf->mbox); - mutex_unlock(&pfvf->mbox.lock); + for (lvl = 0; lvl < pfvf->hw.txschq_link_cfg_lvl; lvl++) + otx2_txschq_free_one(pfvf, lvl, + pfvf->pfc_schq_list[lvl][prio]); pfvf->pfc_alloc_status[prio] = false; return 0; From 47bcc9c1cf6aa60156c7532983090e86d9d171b6 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Thu, 24 Aug 2023 13:40:31 +0530 Subject: [PATCH 10/24] octeontx2-af: CN10KB: fix PFC configuration Suppose user has enabled pfc with prio 0,1 on a PF netdev(eth0) dcb pfc set dev eth0 prio-pfc o:on 1:on later user enabled pfc priorities 2 and 3 on the VF interface(eth1) dcb pfc set dev eth1 prio-pfc 2:on 3:on Instead of enabling pfc on all priorities (0..3), the driver only enables on priorities 2,3. This patch corrects the issue by using the proper CSR address. Fixes: b9d0fedc6234 ("octeontx2-af: cn10kb: Add RPM_USX MAC support") Signed-off-by: Hariprasad Kelam Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230824081032.436432-3-sumang@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/rpm.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index b4fcb20c3f4fd..af21e2030cff2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -355,8 +355,8 @@ int rpm_lmac_enadis_pause_frm(void *rpmd, int lmac_id, u8 tx_pause, void rpm_lmac_pause_frm_config(void *rpmd, int lmac_id, bool enable) { + u64 cfg, pfc_class_mask_cfg; rpm_t *rpm = rpmd; - u64 cfg; /* ALL pause frames received are completely ignored */ cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); @@ -380,9 +380,11 @@ void rpm_lmac_pause_frm_config(void *rpmd, int lmac_id, bool enable) rpm_write(rpm, 0, RPMX_CMR_CHAN_MSK_OR, ~0ULL); /* Disable all PFC classes */ - cfg = rpm_read(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL); + pfc_class_mask_cfg = is_dev_rpm2(rpm) ? RPM2_CMRX_PRT_CBFC_CTL : + RPMX_CMRX_PRT_CBFC_CTL; + cfg = rpm_read(rpm, lmac_id, pfc_class_mask_cfg); cfg = FIELD_SET(RPM_PFC_CLASS_MASK, 0, cfg); - rpm_write(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL, cfg); + rpm_write(rpm, lmac_id, pfc_class_mask_cfg, cfg); } int rpm_get_rx_stats(void *rpmd, int lmac_id, int idx, u64 *rx_stat) @@ -605,8 +607,11 @@ int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause, u16 p if (!is_lmac_valid(rpm, lmac_id)) return -ENODEV; + pfc_class_mask_cfg = is_dev_rpm2(rpm) ? RPM2_CMRX_PRT_CBFC_CTL : + RPMX_CMRX_PRT_CBFC_CTL; + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); - class_en = rpm_read(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL); + class_en = rpm_read(rpm, lmac_id, pfc_class_mask_cfg); pfc_en |= FIELD_GET(RPM_PFC_CLASS_MASK, class_en); if (rx_pause) { @@ -635,10 +640,6 @@ int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause, u16 p cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE; rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); - - pfc_class_mask_cfg = is_dev_rpm2(rpm) ? RPM2_CMRX_PRT_CBFC_CTL : - RPMX_CMRX_PRT_CBFC_CTL; - rpm_write(rpm, lmac_id, pfc_class_mask_cfg, class_en); return 0; From 597d0ec0e4ca6a912affea4cc94df08959e9ec74 Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Thu, 24 Aug 2023 13:40:32 +0530 Subject: [PATCH 11/24] cteonxt2-pf: Fix backpressure config for multiple PFC priorities to work simultaneously MAC (CGX or RPM) asserts backpressure at TL3 or TL2 node of the egress hierarchical scheduler tree depending on link level config done. If there are multiple PFC priorities enabled at a time and for all such flows to backoff, each priority will have to assert backpressure at different TL3/TL2 scheduler nodes and these flows will need to submit egress pkts to these nodes. Current PFC configuration has an issue where in only one backpressure scheduler node is being allocated which is resulting in only one PFC priority to work. This patch fixes this issue. Fixes: 99c969a83d82 ("octeontx2-pf: Add egress PFC support") Signed-off-by: Suman Ghosh Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230824081032.436432-4-sumang@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c index 6492749dd7c89..bfddbff7bcdfb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c @@ -70,7 +70,7 @@ static int otx2_pfc_txschq_alloc_one(struct otx2_nic *pfvf, u8 prio) * link config level. These rest of the scheduler can be * same as hw.txschq_list. */ - for (lvl = 0; lvl < pfvf->hw.txschq_link_cfg_lvl; lvl++) + for (lvl = 0; lvl <= pfvf->hw.txschq_link_cfg_lvl; lvl++) req->schq[lvl] = 1; rc = otx2_sync_mbox_msg(&pfvf->mbox); @@ -83,7 +83,7 @@ static int otx2_pfc_txschq_alloc_one(struct otx2_nic *pfvf, u8 prio) return PTR_ERR(rsp); /* Setup transmit scheduler list */ - for (lvl = 0; lvl < pfvf->hw.txschq_link_cfg_lvl; lvl++) { + for (lvl = 0; lvl <= pfvf->hw.txschq_link_cfg_lvl; lvl++) { if (!rsp->schq[lvl]) return -ENOSPC; @@ -128,7 +128,7 @@ static int otx2_pfc_txschq_stop_one(struct otx2_nic *pfvf, u8 prio) int lvl; /* free PFC TLx nodes */ - for (lvl = 0; lvl < pfvf->hw.txschq_link_cfg_lvl; lvl++) + for (lvl = 0; lvl <= pfvf->hw.txschq_link_cfg_lvl; lvl++) otx2_txschq_free_one(pfvf, lvl, pfvf->pfc_schq_list[lvl][prio]); From c4413a20fa6d7c4888009fb7dd391685f196cd36 Mon Sep 17 00:00:00 2001 From: Alex Austin Date: Thu, 24 Aug 2023 17:46:57 +0100 Subject: [PATCH 12/24] sfc: Check firmware supports Ethernet PTP filter Not all firmware variants support RSS filters. Do not fail all PTP functionality when raw ethernet PTP filters fail to insert. Fixes: e4616f64726b ("sfc: support PTP over Ethernet") Signed-off-by: Alex Austin Acked-by: Edward Cree Reviewed-by: Pieter Jansen van Vuuren Link: https://lore.kernel.org/r/20230824164657.42379-1-alex.austin@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/ptp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index 0c40571133cb9..00cf6de3bb2be 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -1485,7 +1485,9 @@ static int efx_ptp_insert_multicast_filters(struct efx_nic *efx) goto fail; rc = efx_ptp_insert_eth_multicast_filter(efx); - if (rc < 0) + + /* Not all firmware variants support this filter */ + if (rc < 0 && rc != -EPROTONOSUPPORT) goto fail; } From b3d26c5702c7d6c45456326e56d2ccf3f103e60f Mon Sep 17 00:00:00 2001 From: Budimir Markovic Date: Thu, 24 Aug 2023 01:49:05 -0700 Subject: [PATCH 13/24] net/sched: sch_hfsc: Ensure inner classes have fsc curve HFSC assumes that inner classes have an fsc curve, but it is currently possible for classes without an fsc curve to become parents. This leads to bugs including a use-after-free. Don't allow non-root classes without HFSC_FSC to become parents. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Budimir Markovic Signed-off-by: Budimir Markovic Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20230824084905.422-1-markovicbudimir@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_hfsc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 70b0c5873d326..61d52594ff6d8 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1012,6 +1012,10 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (parent == NULL) return -ENOENT; } + if (!(parent->cl_flags & HFSC_FSC) && parent != &q->root) { + NL_SET_ERR_MSG(extack, "Invalid parent - parent class must have FSC"); + return -EINVAL; + } if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0) return -EINVAL; From 91202ce78fcd070982a115f0bf6f328af619aa00 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 24 Aug 2023 09:17:50 -0700 Subject: [PATCH 14/24] pds_core: protect devlink callbacks from fw_down state Don't access structs that have been cleared when in the fw_down state and the various structs have been cleaned and are waiting to recover. This caused a panic on rmmod when already in fw_down and devlink_param_unregister() tried to check the parameters. Fixes: 40ced8944536 ("pds_core: devlink params for enabling VIF support") Signed-off-by: Shannon Nelson Reviewed-by: Brett Creeley Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230824161754.34264-2-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/devlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c index 9c6b3653c1c7c..d9607033bbf21 100644 --- a/drivers/net/ethernet/amd/pds_core/devlink.c +++ b/drivers/net/ethernet/amd/pds_core/devlink.c @@ -10,6 +10,9 @@ pdsc_viftype *pdsc_dl_find_viftype_by_id(struct pdsc *pdsc, { int vt; + if (!pdsc->viftype_status) + return NULL; + for (vt = 0; vt < PDS_DEV_TYPE_MAX; vt++) { if (pdsc->viftype_status[vt].dl_id == dl_id) return &pdsc->viftype_status[vt]; From e48b894a1db7f6ce66bff0402ab21ff9f0e56034 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 24 Aug 2023 09:17:51 -0700 Subject: [PATCH 15/24] pds_core: no health reporter in VF Make sure the health reporter is set up before we use it in our devlink health updates, especially since the VF doesn't set up the health reporter. Fixes: 25b450c05a49 ("pds_core: add devlink health facilities") Signed-off-by: Shannon Nelson Reviewed-by: Brett Creeley Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230824161754.34264-3-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index f2c79456d7452..383e3311a52c2 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -524,7 +524,8 @@ static void pdsc_fw_down(struct pdsc *pdsc) } /* Notify clients of fw_down */ - devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc); + if (pdsc->fw_reporter) + devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc); pdsc_notify(PDS_EVENT_RESET, &reset_event); pdsc_stop(pdsc); @@ -554,8 +555,9 @@ static void pdsc_fw_up(struct pdsc *pdsc) /* Notify clients of fw_up */ pdsc->fw_recoveries++; - devlink_health_reporter_state_update(pdsc->fw_reporter, - DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); + if (pdsc->fw_reporter) + devlink_health_reporter_state_update(pdsc->fw_reporter, + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); pdsc_notify(PDS_EVENT_RESET, &reset_event); return; From 95e383226d6fcda6c217912f11edf8d74de9cc85 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 24 Aug 2023 09:17:52 -0700 Subject: [PATCH 16/24] pds_core: no reset command for VF The VF doesn't need to send a reset command, and in a PCI reset scenario it might not have a valid IO space to write to anyway. Fixes: 523847df1b37 ("pds_core: add devcmd device interfaces") Signed-off-by: Shannon Nelson Reviewed-by: Brett Creeley Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230824161754.34264-4-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index 383e3311a52c2..36f9b932b9e2a 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -464,7 +464,8 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing) { int i; - pdsc_devcmd_reset(pdsc); + if (!pdsc->pdev->is_virtfn) + pdsc_devcmd_reset(pdsc); pdsc_qcq_free(pdsc, &pdsc->notifyqcq); pdsc_qcq_free(pdsc, &pdsc->adminqcq); From 969cfd4c8ca50c32901342cdd3d677c3ffe61371 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 24 Aug 2023 09:17:53 -0700 Subject: [PATCH 17/24] pds_core: check for work queue before use Add a check that the wq exists before queuing up work for a failed devcmd, as the PF is responsible for health and the VF doesn't have a wq. Fixes: c2dbb0904310 ("pds_core: health timer and workqueue") Signed-off-by: Shannon Nelson Reviewed-by: Brett Creeley Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230824161754.34264-5-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c index debe5216fe29e..524f422ee7ace 100644 --- a/drivers/net/ethernet/amd/pds_core/dev.c +++ b/drivers/net/ethernet/amd/pds_core/dev.c @@ -183,7 +183,7 @@ int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd, err = pdsc_devcmd_wait(pdsc, max_seconds); memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp)); - if (err == -ENXIO || err == -ETIMEDOUT) + if ((err == -ENXIO || err == -ETIMEDOUT) && pdsc->wq) queue_work(pdsc->wq, &pdsc->health_work); return err; From 0ea064e74bc8f915aba3f2d0fb3418247a09b73d Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 24 Aug 2023 09:17:54 -0700 Subject: [PATCH 18/24] pds_core: pass opcode to devcmd_wait Don't rely on the PCI memory for the devcmd opcode because we read a 0xff value if the PCI bus is broken, which can cause us to report a bogus dev_cmd opcode later. Fixes: 523847df1b37 ("pds_core: add devcmd device interfaces") Signed-off-by: Shannon Nelson Reviewed-by: Brett Creeley Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230824161754.34264-6-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/dev.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c index 524f422ee7ace..f77cd9f5a2fda 100644 --- a/drivers/net/ethernet/amd/pds_core/dev.c +++ b/drivers/net/ethernet/amd/pds_core/dev.c @@ -121,7 +121,7 @@ static const char *pdsc_devcmd_str(int opcode) } } -static int pdsc_devcmd_wait(struct pdsc *pdsc, int max_seconds) +static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds) { struct device *dev = pdsc->dev; unsigned long start_time; @@ -131,9 +131,6 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, int max_seconds) int done = 0; int err = 0; int status; - int opcode; - - opcode = ioread8(&pdsc->cmd_regs->cmd.opcode); start_time = jiffies; max_wait = start_time + (max_seconds * HZ); @@ -180,7 +177,7 @@ int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd, memcpy_toio(&pdsc->cmd_regs->cmd, cmd, sizeof(*cmd)); pdsc_devcmd_dbell(pdsc); - err = pdsc_devcmd_wait(pdsc, max_seconds); + err = pdsc_devcmd_wait(pdsc, cmd->opcode, max_seconds); memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp)); if ((err == -ENXIO || err == -ETIMEDOUT) && pdsc->wq) From c2f8fd7949603efb03908e05abbf7726748c8de3 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 24 Aug 2023 09:50:59 -0700 Subject: [PATCH 19/24] netrom: Deny concurrent connect(). syzkaller reported null-ptr-deref [0] related to AF_NETROM. This is another self-accept issue from the strace log. [1] syz-executor creates an AF_NETROM socket and calls connect(), which is blocked at that time. Then, sk->sk_state is TCP_SYN_SENT and sock->state is SS_CONNECTING. [pid 5059] socket(AF_NETROM, SOCK_SEQPACKET, 0) = 4 [pid 5059] connect(4, {sa_family=AF_NETROM, sa_data="..." Another thread calls connect() concurrently, which finally fails with -EINVAL. However, the problem here is the socket state is reset even while the first connect() is blocked. [pid 5060] connect(4, NULL, 0 [pid 5060] <... connect resumed>) = -1 EINVAL (Invalid argument) As sk->state is TCP_CLOSE and sock->state is SS_UNCONNECTED, the following listen() succeeds. Then, the first connect() looks up itself as a listener and puts skb into the queue with skb->sk itself. As a result, the next accept() gets another FD of itself as 3, and the first connect() finishes. [pid 5060] listen(4, 0 [pid 5060] <... listen resumed>) = 0 [pid 5060] accept(4, NULL, NULL [pid 5060] <... accept resumed>) = 3 [pid 5059] <... connect resumed>) = 0 Then, accept4() is called but blocked, which causes the general protection fault later. [pid 5059] accept4(4, NULL, 0x20000400, SOCK_NONBLOCK After that, another self-accept occurs by accept() and writev(). [pid 5060] accept(4, NULL, NULL [pid 5061] writev(3, [{iov_base=...}] [pid 5061] <... writev resumed>) = 99 [pid 5060] <... accept resumed>) = 6 Finally, the leader thread close()s all FDs. Since the three FDs reference the same socket, nr_release() does the cleanup for it three times, and the remaining accept4() causes the following fault. [pid 5058] close(3) = 0 [pid 5058] close(4) = 0 [pid 5058] close(5) = -1 EBADF (Bad file descriptor) [pid 5058] close(6) = 0 [pid 5058] <... exit_group resumed>) = ? [ 83.456055][ T5059] general protection fault, probably for non-canonical address 0xdffffc0000000003: 0000 [#1] PREEMPT SMP KASAN To avoid the issue, we need to return an error for connect() if another connect() is in progress, as done in __inet_stream_connect(). [0]: general protection fault, probably for non-canonical address 0xdffffc0000000003: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f] CPU: 0 PID: 5059 Comm: syz-executor.0 Not tainted 6.5.0-rc5-syzkaller-00194-gace0ab3a4b54 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023 RIP: 0010:__lock_acquire+0x109/0x5de0 kernel/locking/lockdep.c:5012 Code: 45 85 c9 0f 84 cc 0e 00 00 44 8b 05 11 6e 23 0b 45 85 c0 0f 84 be 0d 00 00 48 ba 00 00 00 00 00 fc ff df 4c 89 d1 48 c1 e9 03 <80> 3c 11 00 0f 85 e8 40 00 00 49 81 3a a0 69 48 90 0f 84 96 0d 00 RSP: 0018:ffffc90003d6f9e0 EFLAGS: 00010006 RAX: ffff8880244c8000 RBX: 1ffff920007adf6c RCX: 0000000000000003 RDX: dffffc0000000000 RSI: 0000000000000000 RDI: 0000000000000018 RBP: 0000000000000001 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000018 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f51d519a6c0(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f51d5158d58 CR3: 000000002943f000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: lock_acquire kernel/locking/lockdep.c:5761 [inline] lock_acquire+0x1ae/0x510 kernel/locking/lockdep.c:5726 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0x3a/0x50 kernel/locking/spinlock.c:162 prepare_to_wait+0x47/0x380 kernel/sched/wait.c:269 nr_accept+0x20d/0x650 net/netrom/af_netrom.c:798 do_accept+0x3a6/0x570 net/socket.c:1872 __sys_accept4_file net/socket.c:1913 [inline] __sys_accept4+0x99/0x120 net/socket.c:1943 __do_sys_accept4 net/socket.c:1954 [inline] __se_sys_accept4 net/socket.c:1951 [inline] __x64_sys_accept4+0x96/0x100 net/socket.c:1951 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f51d447cae9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f51d519a0c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000120 RAX: ffffffffffffffda RBX: 00007f51d459bf80 RCX: 00007f51d447cae9 RDX: 0000000020000400 RSI: 0000000000000000 RDI: 0000000000000004 RBP: 00007f51d44c847a R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000800 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007f51d459bf80 R15: 00007ffc25c34e48 Link: https://syzkaller.appspot.com/text?tag=CrashLog&x=152cdb63a80000 [1] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+666c97e4686410e79649@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=666c97e4686410e79649 Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/netrom/af_netrom.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index eb8ccbd58df74..96e91ab71573c 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -660,6 +660,11 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr, goto out_release; } + if (sock->state == SS_CONNECTING) { + err = -EALREADY; + goto out_release; + } + sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; From bb5ed01cd2428cd25b1c88a3a9cba87055eb289f Mon Sep 17 00:00:00 2001 From: Radoslaw Tyl Date: Thu, 24 Aug 2023 13:46:19 -0700 Subject: [PATCH 20/24] igb: set max size RX buffer when store bad packet is enabled Increase the RX buffer size to 3K when the SBP bit is on. The size of the RX buffer determines the number of pages allocated which may not be sufficient for receive frames larger than the set MTU size. Cc: stable@vger.kernel.org Fixes: 89eaefb61dc9 ("igb: Support RX-ALL feature flag.") Reported-by: Manfred Rudigier Signed-off-by: Radoslaw Tyl Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igb/igb_main.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 9a2561409b06f..08e3df37089fe 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -4814,6 +4814,10 @@ void igb_configure_rx_ring(struct igb_adapter *adapter, static void igb_set_rx_buffer_len(struct igb_adapter *adapter, struct igb_ring *rx_ring) { +#if (PAGE_SIZE < 8192) + struct e1000_hw *hw = &adapter->hw; +#endif + /* set build_skb and buffer size flags */ clear_ring_build_skb_enabled(rx_ring); clear_ring_uses_large_buffer(rx_ring); @@ -4824,10 +4828,9 @@ static void igb_set_rx_buffer_len(struct igb_adapter *adapter, set_ring_build_skb_enabled(rx_ring); #if (PAGE_SIZE < 8192) - if (adapter->max_frame_size <= IGB_MAX_FRAME_BUILD_SKB) - return; - - set_ring_uses_large_buffer(rx_ring); + if (adapter->max_frame_size > IGB_MAX_FRAME_BUILD_SKB || + rd32(E1000_RCTL) & E1000_RCTL_SBP) + set_ring_uses_large_buffer(rx_ring); #endif } From 72dd7e427e16fca901daaa399e4b0893c3bb85c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9?= Date: Fri, 25 Aug 2023 10:20:27 +0200 Subject: [PATCH 21/24] dt-bindings: net: dsa: marvell: fix wrong model in compatibility list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix wrong switch name in compatibility list. 88E6163 switch does not exist and is in fact 88E6361 Fixes: 9229a9483d80 ("dt-bindings: net: dsa: marvell: add MV88E6361 switch to compatibility list") Signed-off-by: Alexis Lothoré Reviewed-by: Andrew Lunn Acked-by: Conor Dooley Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/dsa/marvell.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/dsa/marvell.txt b/Documentation/devicetree/bindings/net/dsa/marvell.txt index 33726134f5c96..6ec0c181b6db2 100644 --- a/Documentation/devicetree/bindings/net/dsa/marvell.txt +++ b/Documentation/devicetree/bindings/net/dsa/marvell.txt @@ -20,7 +20,7 @@ which is at a different MDIO base address in different switch families. 6171, 6172, 6175, 6176, 6185, 6240, 6320, 6321, 6341, 6350, 6351, 6352 - "marvell,mv88e6190" : Switch has base address 0x00. Use with models: - 6163, 6190, 6190X, 6191, 6290, 6390, 6390X + 6190, 6190X, 6191, 6290, 6361, 6390, 6390X - "marvell,mv88e6250" : Switch has base address 0x08 or 0x18. Use with model: 6220, 6250 From 977ad86c2a1bcaf58f01ab98df5cc145083c489c Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 25 Aug 2023 15:32:41 +0200 Subject: [PATCH 22/24] dccp: Fix out of bounds access in DCCP error handler There was a previous attempt to fix an out-of-bounds access in the DCCP error handlers, but that fix assumed that the error handlers only want to access the first 8 bytes of the DCCP header. Actually, they also look at the DCCP sequence number, which is stored beyond 8 bytes, so an explicit pskb_may_pull() is required. Fixes: 6706a97fec96 ("dccp: fix out of bound access in dccp_v4_err()") Fixes: 1aa9d1a0e7ee ("ipv6: dccp: fix out of bound access in dccp_v6_err()") Cc: stable@vger.kernel.org Signed-off-by: Jann Horn Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 13 +++++++++---- net/dccp/ipv6.c | 15 ++++++++++----- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index a545ad71201c8..a5361fb7a415b 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -255,12 +255,17 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info) int err; struct net *net = dev_net(skb->dev); - /* Only need dccph_dport & dccph_sport which are the first - * 4 bytes in dccp header. + /* For the first __dccp_basic_hdr_len() check, we only need dh->dccph_x, + * which is in byte 7 of the dccp header. * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us. + * + * Later on, we want to access the sequence number fields, which are + * beyond 8 bytes, so we have to pskb_may_pull() ourselves. */ - BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); - BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); + dh = (struct dccp_hdr *)(skb->data + offset); + if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh))) + return -EINVAL; + iph = (struct iphdr *)skb->data; dh = (struct dccp_hdr *)(skb->data + offset); sk = __inet_lookup_established(net, &dccp_hashinfo, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index d29d1163203d9..25816e790527d 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -74,7 +74,7 @@ static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb) static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { - const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; + const struct ipv6hdr *hdr; const struct dccp_hdr *dh; struct dccp_sock *dp; struct ipv6_pinfo *np; @@ -83,12 +83,17 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, __u64 seq; struct net *net = dev_net(skb->dev); - /* Only need dccph_dport & dccph_sport which are the first - * 4 bytes in dccp header. + /* For the first __dccp_basic_hdr_len() check, we only need dh->dccph_x, + * which is in byte 7 of the dccp header. * Our caller (icmpv6_notify()) already pulled 8 bytes for us. + * + * Later on, we want to access the sequence number fields, which are + * beyond 8 bytes, so we have to pskb_may_pull() ourselves. */ - BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); - BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); + dh = (struct dccp_hdr *)(skb->data + offset); + if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh))) + return -EINVAL; + hdr = (const struct ipv6hdr *)skb->data; dh = (struct dccp_hdr *)(skb->data + offset); sk = __inet6_lookup_established(net, &dccp_hashinfo, From ec1b90886f3cd7e90d4c0381dffb418e2d3a1473 Mon Sep 17 00:00:00 2001 From: Mikhail Kobuk Date: Fri, 25 Aug 2023 22:04:41 +0300 Subject: [PATCH 23/24] ethernet: tg3: remove unreachable code 'tp->irq_max' value is either 1 [L16336] or 5 [L16354], as indicated in tg3_get_invariants(). Therefore, 'i' can't exceed 4 in tg3_init_one() that makes (i <= 4) always true. Moreover, 'intmbx' value set at the last iteration is not used later in it's scope. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 78f90dcf184b ("tg3: Move napi_add calls below tg3_get_invariants") Signed-off-by: Mikhail Kobuk Reviewed-by: Alexey Khoroshilov Reviewed-by: Simon Horman Reviewed-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index cb2810f175ccd..5408ed0517726 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -17795,10 +17795,7 @@ static int tg3_init_one(struct pci_dev *pdev, tnapi->tx_pending = TG3_DEF_TX_RING_PENDING; tnapi->int_mbox = intmbx; - if (i <= 4) - intmbx += 0x8; - else - intmbx += 0x4; + intmbx += 0x8; tnapi->consmbox = rcvmbx; tnapi->prodmbox = sndmbx; From 90ca51e8c654699b672ba61aeaa418dfb3252e5e Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 25 Aug 2023 21:44:01 +0200 Subject: [PATCH 24/24] r8169: fix ASPM-related issues on a number of systems with NIC version from RTL8168h This effectively reverts 4b5f82f6aaef. On a number of systems ASPM L1 causes tx timeouts with RTL8168h, see referenced bug report. Fixes: 4b5f82f6aaef ("r8169: enable ASPM L1/L1.1 from RTL8168h") Cc: stable@vger.kernel.org Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217814 Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 5eb50b265c0bd..6351a2dc13bce 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -5239,13 +5239,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* Disable ASPM L1 as that cause random device stop working * problems as well as full system hangs for some PCIe devices users. - * Chips from RTL8168h partially have issues with L1.2, but seem - * to work fine with L1 and L1.1. */ if (rtl_aspm_is_safe(tp)) rc = 0; - else if (tp->mac_version >= RTL_GIGA_MAC_VER_46) - rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2); else rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1); tp->aspm_manageable = !rc;