33
44#include <linux/netdevice.h>
55#include "lag.h"
6+ #include "lag_mp.h"
67#include "mlx5_core.h"
78#include "eswitch.h"
89
10+ static bool mlx5_lag_multipath_check_prereq (struct mlx5_lag * ldev )
11+ {
12+ if (!ldev -> pf [0 ].dev || !ldev -> pf [1 ].dev )
13+ return false;
14+
15+ return mlx5_esw_multipath_prereq (ldev -> pf [0 ].dev , ldev -> pf [1 ].dev );
16+ }
17+
918static bool __mlx5_lag_is_multipath (struct mlx5_lag * ldev )
1019{
1120 return !!(ldev -> flags & MLX5_LAG_FLAG_MULTIPATH );
@@ -21,3 +30,275 @@ bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
2130
2231 return res ;
2332}
33+
34+ /**
35+ * Set lag port affinity
36+ *
37+ * @ldev: lag device
38+ * @port:
39+ * 0 - set normal affinity.
40+ * 1 - set affinity to port 1.
41+ * 2 - set affinity to port 2.
42+ *
43+ **/
44+ static void mlx5_lag_set_port_affinity (struct mlx5_lag * ldev , int port )
45+ {
46+ struct lag_tracker tracker ;
47+
48+ if (!__mlx5_lag_is_multipath (ldev ))
49+ return ;
50+
51+ switch (port ) {
52+ case 0 :
53+ tracker .netdev_state [0 ].tx_enabled = true;
54+ tracker .netdev_state [1 ].tx_enabled = true;
55+ tracker .netdev_state [0 ].link_up = true;
56+ tracker .netdev_state [1 ].link_up = true;
57+ break ;
58+ case 1 :
59+ tracker .netdev_state [0 ].tx_enabled = true;
60+ tracker .netdev_state [0 ].link_up = true;
61+ tracker .netdev_state [1 ].tx_enabled = false;
62+ tracker .netdev_state [1 ].link_up = false;
63+ break ;
64+ case 2 :
65+ tracker .netdev_state [0 ].tx_enabled = false;
66+ tracker .netdev_state [0 ].link_up = false;
67+ tracker .netdev_state [1 ].tx_enabled = true;
68+ tracker .netdev_state [1 ].link_up = true;
69+ break ;
70+ default :
71+ mlx5_core_warn (ldev -> pf [0 ].dev , "Invalid affinity port %d" ,
72+ port );
73+ return ;
74+ }
75+
76+ mlx5_modify_lag (ldev , & tracker );
77+ }
78+
79+ static void mlx5_lag_fib_event_flush (struct notifier_block * nb )
80+ {
81+ struct lag_mp * mp = container_of (nb , struct lag_mp , fib_nb );
82+ struct mlx5_lag * ldev = container_of (mp , struct mlx5_lag , lag_mp );
83+
84+ flush_workqueue (ldev -> wq );
85+ }
86+
87+ struct mlx5_fib_event_work {
88+ struct work_struct work ;
89+ struct mlx5_lag * ldev ;
90+ unsigned long event ;
91+ union {
92+ struct fib_entry_notifier_info fen_info ;
93+ struct fib_nh_notifier_info fnh_info ;
94+ };
95+ };
96+
97+ static void mlx5_lag_fib_route_event (struct mlx5_lag * ldev ,
98+ unsigned long event ,
99+ struct fib_info * fi )
100+ {
101+ struct lag_mp * mp = & ldev -> lag_mp ;
102+
103+ /* Handle delete event */
104+ if (event == FIB_EVENT_ENTRY_DEL ) {
105+ /* stop track */
106+ if (mp -> mfi == fi )
107+ mp -> mfi = NULL ;
108+ return ;
109+ }
110+
111+ /* Handle add/replace event */
112+ if (fi -> fib_nhs == 1 ) {
113+ if (__mlx5_lag_is_active (ldev )) {
114+ struct net_device * nh_dev = fi -> fib_nh [0 ].nh_dev ;
115+ int i = mlx5_lag_dev_get_netdev_idx (ldev , nh_dev );
116+
117+ mlx5_lag_set_port_affinity (ldev , ++ i );
118+ }
119+ return ;
120+ }
121+
122+ if (fi -> fib_nhs != 2 )
123+ return ;
124+
125+ /* Verify next hops are ports of the same hca */
126+ if (!(fi -> fib_nh [0 ].nh_dev == ldev -> pf [0 ].netdev &&
127+ fi -> fib_nh [1 ].nh_dev == ldev -> pf [1 ].netdev ) &&
128+ !(fi -> fib_nh [0 ].nh_dev == ldev -> pf [1 ].netdev &&
129+ fi -> fib_nh [1 ].nh_dev == ldev -> pf [0 ].netdev )) {
130+ mlx5_core_warn (ldev -> pf [0 ].dev , "Multipath offload require two ports of the same HCA\n" );
131+ return ;
132+ }
133+
134+ /* First time we see multipath route */
135+ if (!mp -> mfi && !__mlx5_lag_is_active (ldev )) {
136+ struct lag_tracker tracker ;
137+
138+ tracker = ldev -> tracker ;
139+ mlx5_activate_lag (ldev , & tracker , MLX5_LAG_FLAG_MULTIPATH );
140+ }
141+
142+ mlx5_lag_set_port_affinity (ldev , 0 );
143+ mp -> mfi = fi ;
144+ }
145+
146+ static void mlx5_lag_fib_nexthop_event (struct mlx5_lag * ldev ,
147+ unsigned long event ,
148+ struct fib_nh * fib_nh ,
149+ struct fib_info * fi )
150+ {
151+ struct lag_mp * mp = & ldev -> lag_mp ;
152+
153+ /* Check the nh event is related to the route */
154+ if (!mp -> mfi || mp -> mfi != fi )
155+ return ;
156+
157+ /* nh added/removed */
158+ if (event == FIB_EVENT_NH_DEL ) {
159+ int i = mlx5_lag_dev_get_netdev_idx (ldev , fib_nh -> nh_dev );
160+
161+ if (i >= 0 ) {
162+ i = (i + 1 ) % 2 + 1 ; /* peer port */
163+ mlx5_lag_set_port_affinity (ldev , i );
164+ }
165+ } else if (event == FIB_EVENT_NH_ADD &&
166+ fi -> fib_nhs == 2 ) {
167+ mlx5_lag_set_port_affinity (ldev , 0 );
168+ }
169+ }
170+
171+ static void mlx5_lag_fib_update (struct work_struct * work )
172+ {
173+ struct mlx5_fib_event_work * fib_work =
174+ container_of (work , struct mlx5_fib_event_work , work );
175+ struct mlx5_lag * ldev = fib_work -> ldev ;
176+ struct fib_nh * fib_nh ;
177+
178+ /* Protect internal structures from changes */
179+ rtnl_lock ();
180+ switch (fib_work -> event ) {
181+ case FIB_EVENT_ENTRY_REPLACE : /* fall through */
182+ case FIB_EVENT_ENTRY_APPEND : /* fall through */
183+ case FIB_EVENT_ENTRY_ADD : /* fall through */
184+ case FIB_EVENT_ENTRY_DEL :
185+ mlx5_lag_fib_route_event (ldev , fib_work -> event ,
186+ fib_work -> fen_info .fi );
187+ fib_info_put (fib_work -> fen_info .fi );
188+ break ;
189+ case FIB_EVENT_NH_ADD : /* fall through */
190+ case FIB_EVENT_NH_DEL :
191+ fib_nh = fib_work -> fnh_info .fib_nh ;
192+ mlx5_lag_fib_nexthop_event (ldev ,
193+ fib_work -> event ,
194+ fib_work -> fnh_info .fib_nh ,
195+ fib_nh -> nh_parent );
196+ fib_info_put (fib_work -> fnh_info .fib_nh -> nh_parent );
197+ break ;
198+ }
199+
200+ rtnl_unlock ();
201+ kfree (fib_work );
202+ }
203+
204+ static struct mlx5_fib_event_work *
205+ mlx5_lag_init_fib_work (struct mlx5_lag * ldev , unsigned long event )
206+ {
207+ struct mlx5_fib_event_work * fib_work ;
208+
209+ fib_work = kzalloc (sizeof (* fib_work ), GFP_ATOMIC );
210+ if (WARN_ON (!fib_work ))
211+ return NULL ;
212+
213+ INIT_WORK (& fib_work -> work , mlx5_lag_fib_update );
214+ fib_work -> ldev = ldev ;
215+ fib_work -> event = event ;
216+
217+ return fib_work ;
218+ }
219+
220+ static int mlx5_lag_fib_event (struct notifier_block * nb ,
221+ unsigned long event ,
222+ void * ptr )
223+ {
224+ struct lag_mp * mp = container_of (nb , struct lag_mp , fib_nb );
225+ struct mlx5_lag * ldev = container_of (mp , struct mlx5_lag , lag_mp );
226+ struct fib_notifier_info * info = ptr ;
227+ struct mlx5_fib_event_work * fib_work ;
228+ struct fib_entry_notifier_info * fen_info ;
229+ struct fib_nh_notifier_info * fnh_info ;
230+ struct fib_info * fi ;
231+
232+ if (info -> family != AF_INET )
233+ return NOTIFY_DONE ;
234+
235+ if (!mlx5_lag_multipath_check_prereq (ldev ))
236+ return NOTIFY_DONE ;
237+
238+ switch (event ) {
239+ case FIB_EVENT_ENTRY_REPLACE : /* fall through */
240+ case FIB_EVENT_ENTRY_APPEND : /* fall through */
241+ case FIB_EVENT_ENTRY_ADD : /* fall through */
242+ case FIB_EVENT_ENTRY_DEL :
243+ fen_info = container_of (info , struct fib_entry_notifier_info ,
244+ info );
245+ fi = fen_info -> fi ;
246+ if (fi -> fib_dev != ldev -> pf [0 ].netdev &&
247+ fi -> fib_dev != ldev -> pf [1 ].netdev ) {
248+ return NOTIFY_DONE ;
249+ }
250+ fib_work = mlx5_lag_init_fib_work (ldev , event );
251+ if (!fib_work )
252+ return NOTIFY_DONE ;
253+ fib_work -> fen_info = * fen_info ;
254+ /* Take reference on fib_info to prevent it from being
255+ * freed while work is queued. Release it afterwards.
256+ */
257+ fib_info_hold (fib_work -> fen_info .fi );
258+ break ;
259+ case FIB_EVENT_NH_ADD : /* fall through */
260+ case FIB_EVENT_NH_DEL :
261+ fnh_info = container_of (info , struct fib_nh_notifier_info ,
262+ info );
263+ fib_work = mlx5_lag_init_fib_work (ldev , event );
264+ if (!fib_work )
265+ return NOTIFY_DONE ;
266+ fib_work -> fnh_info = * fnh_info ;
267+ fib_info_hold (fib_work -> fnh_info .fib_nh -> nh_parent );
268+ break ;
269+ default :
270+ return NOTIFY_DONE ;
271+ }
272+
273+ queue_work (ldev -> wq , & fib_work -> work );
274+
275+ return NOTIFY_DONE ;
276+ }
277+
278+ int mlx5_lag_mp_init (struct mlx5_lag * ldev )
279+ {
280+ struct lag_mp * mp = & ldev -> lag_mp ;
281+ int err ;
282+
283+ if (mp -> fib_nb .notifier_call )
284+ return 0 ;
285+
286+ mp -> fib_nb .notifier_call = mlx5_lag_fib_event ;
287+ err = register_fib_notifier (& mp -> fib_nb ,
288+ mlx5_lag_fib_event_flush );
289+ if (err )
290+ mp -> fib_nb .notifier_call = NULL ;
291+
292+ return err ;
293+ }
294+
295+ void mlx5_lag_mp_cleanup (struct mlx5_lag * ldev )
296+ {
297+ struct lag_mp * mp = & ldev -> lag_mp ;
298+
299+ if (!mp -> fib_nb .notifier_call )
300+ return ;
301+
302+ unregister_fib_notifier (& mp -> fib_nb );
303+ mp -> fib_nb .notifier_call = NULL ;
304+ }
0 commit comments