Skip to content

Commit 55a515b

Browse files
vladimirolteankuba-moo
authored andcommitted
net: dsa: felix: drop oversized frames with tc-taprio instead of hanging the port
Currently, sending a packet into a time gate too small for it (or always closed) causes the queue system to hold the frame forever. Even worse, this frame isn't subject to aging either, because for that to happen, it needs to be scheduled for transmission in the first place. But the frame will consume buffer memory and frame references while it is forever held in the queue system. Before commit a4ae997 ("net: mscc: ocelot: initialize watermarks to sane defaults"), this behavior was somewhat subtle, as the switch had a more intricately tuned default watermark configuration out of reset, which did not allow any single port and tc to consume the entire switch buffer space. Nonetheless, the held frames are still there, and they reduce the total backplane capacity of the switch. However, after the aforementioned commit, the behavior can be very clearly seen, since we deliberately allow each {port, tc} to consume the entire shared buffer of the switch minus the reservations (and we disable all reservations by default). That is to say, we allow a permanently closed tc-taprio gate to hang the entire switch. A careful inspection of the documentation shows that the QSYS:Q_MAX_SDU per-port-tc registers serve 2 purposes: one is for guard band calculation (when zero, this falls back to QSYS:PORT_MAX_SDU), and the other is to enable oversized frame dropping (when non-zero). Currently the QSYS:Q_MAX_SDU registers are all zero, so oversized frame dropping is disabled. The goal of the change is to enable it seamlessly. For that, we need to hook into the MTU change, tc-taprio change, and port link speed change procedures, since we depend on these variables. Frames are not dropped on egress due to a queue system oversize condition, instead that egress port is simply excluded from the mask of valid destination ports for the packet. If there are no destination ports at all, the ingress counter that increments is the generic "drop_tail" in ethtool -S. The issue exists in various forms since the tc-taprio offload was introduced. Fixes: de143c0 ("net: dsa: felix: Configure Time-Aware Scheduler via taprio offload") Reported-by: Richie Pearn <richard.pearn@nxp.com> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1 parent d68a373 commit 55a515b

File tree

3 files changed

+211
-0
lines changed

3 files changed

+211
-0
lines changed

drivers/net/dsa/ocelot/felix.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1553,9 +1553,18 @@ static void felix_txtstamp(struct dsa_switch *ds, int port,
15531553
static int felix_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
15541554
{
15551555
struct ocelot *ocelot = ds->priv;
1556+
struct ocelot_port *ocelot_port = ocelot->ports[port];
1557+
struct felix *felix = ocelot_to_felix(ocelot);
15561558

15571559
ocelot_port_set_maxlen(ocelot, port, new_mtu);
15581560

1561+
mutex_lock(&ocelot->tas_lock);
1562+
1563+
if (ocelot_port->taprio && felix->info->tas_guard_bands_update)
1564+
felix->info->tas_guard_bands_update(ocelot, port);
1565+
1566+
mutex_unlock(&ocelot->tas_lock);
1567+
15591568
return 0;
15601569
}
15611570

drivers/net/dsa/ocelot/felix.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ struct felix_info {
5353
struct phylink_link_state *state);
5454
int (*port_setup_tc)(struct dsa_switch *ds, int port,
5555
enum tc_setup_type type, void *type_data);
56+
void (*tas_guard_bands_update)(struct ocelot *ocelot, int port);
5657
void (*port_sched_speed_set)(struct ocelot *ocelot, int port,
5758
u32 speed);
5859
struct regmap *(*init_regmap)(struct ocelot *ocelot,

drivers/net/dsa/ocelot/felix_vsc9959.c

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1127,9 +1127,199 @@ static void vsc9959_mdio_bus_free(struct ocelot *ocelot)
11271127
mdiobus_free(felix->imdio);
11281128
}
11291129

1130+
/* Extract shortest continuous gate open intervals in ns for each traffic class
1131+
* of a cyclic tc-taprio schedule. If a gate is always open, the duration is
1132+
* considered U64_MAX. If the gate is always closed, it is considered 0.
1133+
*/
1134+
static void vsc9959_tas_min_gate_lengths(struct tc_taprio_qopt_offload *taprio,
1135+
u64 min_gate_len[OCELOT_NUM_TC])
1136+
{
1137+
struct tc_taprio_sched_entry *entry;
1138+
u64 gate_len[OCELOT_NUM_TC];
1139+
int tc, i, n;
1140+
1141+
/* Initialize arrays */
1142+
for (tc = 0; tc < OCELOT_NUM_TC; tc++) {
1143+
min_gate_len[tc] = U64_MAX;
1144+
gate_len[tc] = 0;
1145+
}
1146+
1147+
/* If we don't have taprio, consider all gates as permanently open */
1148+
if (!taprio)
1149+
return;
1150+
1151+
n = taprio->num_entries;
1152+
1153+
/* Walk through the gate list twice to determine the length
1154+
* of consecutively open gates for a traffic class, including
1155+
* open gates that wrap around. We are just interested in the
1156+
* minimum window size, and this doesn't change what the
1157+
* minimum is (if the gate never closes, min_gate_len will
1158+
* remain U64_MAX).
1159+
*/
1160+
for (i = 0; i < 2 * n; i++) {
1161+
entry = &taprio->entries[i % n];
1162+
1163+
for (tc = 0; tc < OCELOT_NUM_TC; tc++) {
1164+
if (entry->gate_mask & BIT(tc)) {
1165+
gate_len[tc] += entry->interval;
1166+
} else {
1167+
/* Gate closes now, record a potential new
1168+
* minimum and reinitialize length
1169+
*/
1170+
if (min_gate_len[tc] > gate_len[tc])
1171+
min_gate_len[tc] = gate_len[tc];
1172+
gate_len[tc] = 0;
1173+
}
1174+
}
1175+
}
1176+
}
1177+
1178+
/* Update QSYS_PORT_MAX_SDU to make sure the static guard bands added by the
1179+
* switch (see the ALWAYS_GUARD_BAND_SCH_Q comment) are correct at all MTU
1180+
* values (the default value is 1518). Also, for traffic class windows smaller
1181+
* than one MTU sized frame, update QSYS_QMAXSDU_CFG to enable oversized frame
1182+
* dropping, such that these won't hang the port, as they will never be sent.
1183+
*/
1184+
static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port)
1185+
{
1186+
struct ocelot_port *ocelot_port = ocelot->ports[port];
1187+
u64 min_gate_len[OCELOT_NUM_TC];
1188+
int speed, picos_per_byte;
1189+
u64 needed_bit_time_ps;
1190+
u32 val, maxlen;
1191+
u8 tas_speed;
1192+
int tc;
1193+
1194+
lockdep_assert_held(&ocelot->tas_lock);
1195+
1196+
val = ocelot_read_rix(ocelot, QSYS_TAG_CONFIG, port);
1197+
tas_speed = QSYS_TAG_CONFIG_LINK_SPEED_X(val);
1198+
1199+
switch (tas_speed) {
1200+
case OCELOT_SPEED_10:
1201+
speed = SPEED_10;
1202+
break;
1203+
case OCELOT_SPEED_100:
1204+
speed = SPEED_100;
1205+
break;
1206+
case OCELOT_SPEED_1000:
1207+
speed = SPEED_1000;
1208+
break;
1209+
case OCELOT_SPEED_2500:
1210+
speed = SPEED_2500;
1211+
break;
1212+
default:
1213+
return;
1214+
}
1215+
1216+
picos_per_byte = (USEC_PER_SEC * 8) / speed;
1217+
1218+
val = ocelot_port_readl(ocelot_port, DEV_MAC_MAXLEN_CFG);
1219+
/* MAXLEN_CFG accounts automatically for VLAN. We need to include it
1220+
* manually in the bit time calculation, plus the preamble and SFD.
1221+
*/
1222+
maxlen = val + 2 * VLAN_HLEN;
1223+
/* Consider the standard Ethernet overhead of 8 octets preamble+SFD,
1224+
* 4 octets FCS, 12 octets IFG.
1225+
*/
1226+
needed_bit_time_ps = (maxlen + 24) * picos_per_byte;
1227+
1228+
dev_dbg(ocelot->dev,
1229+
"port %d: max frame size %d needs %llu ps at speed %d\n",
1230+
port, maxlen, needed_bit_time_ps, speed);
1231+
1232+
vsc9959_tas_min_gate_lengths(ocelot_port->taprio, min_gate_len);
1233+
1234+
for (tc = 0; tc < OCELOT_NUM_TC; tc++) {
1235+
u32 max_sdu;
1236+
1237+
if (min_gate_len[tc] == U64_MAX /* Gate always open */ ||
1238+
min_gate_len[tc] * 1000 > needed_bit_time_ps) {
1239+
/* Setting QMAXSDU_CFG to 0 disables oversized frame
1240+
* dropping.
1241+
*/
1242+
max_sdu = 0;
1243+
dev_dbg(ocelot->dev,
1244+
"port %d tc %d min gate len %llu"
1245+
", sending all frames\n",
1246+
port, tc, min_gate_len[tc]);
1247+
} else {
1248+
/* If traffic class doesn't support a full MTU sized
1249+
* frame, make sure to enable oversize frame dropping
1250+
* for frames larger than the smallest that would fit.
1251+
*/
1252+
max_sdu = div_u64(min_gate_len[tc] * 1000,
1253+
picos_per_byte);
1254+
/* A TC gate may be completely closed, which is a
1255+
* special case where all packets are oversized.
1256+
* Any limit smaller than 64 octets accomplishes this
1257+
*/
1258+
if (!max_sdu)
1259+
max_sdu = 1;
1260+
/* Take L1 overhead into account, but just don't allow
1261+
* max_sdu to go negative or to 0. Here we use 20
1262+
* because QSYS_MAXSDU_CFG_* already counts the 4 FCS
1263+
* octets as part of packet size.
1264+
*/
1265+
if (max_sdu > 20)
1266+
max_sdu -= 20;
1267+
dev_info(ocelot->dev,
1268+
"port %d tc %d min gate length %llu"
1269+
" ns not enough for max frame size %d at %d"
1270+
" Mbps, dropping frames over %d"
1271+
" octets including FCS\n",
1272+
port, tc, min_gate_len[tc], maxlen, speed,
1273+
max_sdu);
1274+
}
1275+
1276+
/* ocelot_write_rix is a macro that concatenates
1277+
* QSYS_MAXSDU_CFG_* with _RSZ, so we need to spell out
1278+
* the writes to each traffic class
1279+
*/
1280+
switch (tc) {
1281+
case 0:
1282+
ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_0,
1283+
port);
1284+
break;
1285+
case 1:
1286+
ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_1,
1287+
port);
1288+
break;
1289+
case 2:
1290+
ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_2,
1291+
port);
1292+
break;
1293+
case 3:
1294+
ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_3,
1295+
port);
1296+
break;
1297+
case 4:
1298+
ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_4,
1299+
port);
1300+
break;
1301+
case 5:
1302+
ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_5,
1303+
port);
1304+
break;
1305+
case 6:
1306+
ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_6,
1307+
port);
1308+
break;
1309+
case 7:
1310+
ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_7,
1311+
port);
1312+
break;
1313+
}
1314+
}
1315+
1316+
ocelot_write_rix(ocelot, maxlen, QSYS_PORT_MAX_SDU, port);
1317+
}
1318+
11301319
static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port,
11311320
u32 speed)
11321321
{
1322+
struct ocelot_port *ocelot_port = ocelot->ports[port];
11331323
u8 tas_speed;
11341324

11351325
switch (speed) {
@@ -1154,6 +1344,13 @@ static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port,
11541344
QSYS_TAG_CONFIG_LINK_SPEED(tas_speed),
11551345
QSYS_TAG_CONFIG_LINK_SPEED_M,
11561346
QSYS_TAG_CONFIG, port);
1347+
1348+
mutex_lock(&ocelot->tas_lock);
1349+
1350+
if (ocelot_port->taprio)
1351+
vsc9959_tas_guard_bands_update(ocelot, port);
1352+
1353+
mutex_unlock(&ocelot->tas_lock);
11571354
}
11581355

11591356
static void vsc9959_new_base_time(struct ocelot *ocelot, ktime_t base_time,
@@ -1210,6 +1407,8 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
12101407
taprio_offload_free(ocelot_port->taprio);
12111408
ocelot_port->taprio = NULL;
12121409

1410+
vsc9959_tas_guard_bands_update(ocelot, port);
1411+
12131412
mutex_unlock(&ocelot->tas_lock);
12141413
return 0;
12151414
}
@@ -1284,6 +1483,7 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
12841483
goto err;
12851484

12861485
ocelot_port->taprio = taprio_offload_get(taprio);
1486+
vsc9959_tas_guard_bands_update(ocelot, port);
12871487

12881488
err:
12891489
mutex_unlock(&ocelot->tas_lock);
@@ -2307,6 +2507,7 @@ static const struct felix_info felix_info_vsc9959 = {
23072507
.port_modes = vsc9959_port_modes,
23082508
.port_setup_tc = vsc9959_port_setup_tc,
23092509
.port_sched_speed_set = vsc9959_sched_speed_set,
2510+
.tas_guard_bands_update = vsc9959_tas_guard_bands_update,
23102511
.init_regmap = ocelot_regmap_init,
23112512
};
23122513

0 commit comments

Comments
 (0)