1 /*
2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <linux/netdevice.h>
34 #include <net/bonding.h>
35 #include <linux/mlx5/driver.h>
36 #include <linux/mlx5/eswitch.h>
37 #include <linux/mlx5/vport.h>
38 #include "lib/devcom.h"
39 #include "mlx5_core.h"
40 #include "eswitch.h"
41 #include "esw/acl/ofld.h"
42 #include "lag.h"
43 #include "mp.h"
44 #include "mpesw.h"
45
46 enum {
47 MLX5_LAG_EGRESS_PORT_1 = 1,
48 MLX5_LAG_EGRESS_PORT_2,
49 };
50
51 /* General purpose, use for short periods of time.
52 * Beware of lock dependencies (preferably, no locks should be acquired
53 * under it).
54 */
55 static DEFINE_SPINLOCK(lag_lock);
56
get_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)57 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
58 {
59 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
60 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT;
61
62 if (mode == MLX5_LAG_MODE_MPESW)
63 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW;
64
65 return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY;
66 }
67
mlx5_cmd_create_lag(struct mlx5_core_dev * dev,u8 * ports,int mode,unsigned long flags)68 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode,
69 unsigned long flags)
70 {
71 bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
72 &flags);
73 int port_sel_mode = get_port_sel_mode(mode, flags);
74 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
75 void *lag_ctx;
76
77 lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
78 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
79 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode);
80 if (port_sel_mode == MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY) {
81 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
82 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
83 }
84 MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode);
85
86 return mlx5_cmd_exec_in(dev, create_lag, in);
87 }
88
mlx5_cmd_modify_lag(struct mlx5_core_dev * dev,u8 num_ports,u8 * ports)89 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports,
90 u8 *ports)
91 {
92 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
93 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
94
95 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
96 MLX5_SET(modify_lag_in, in, field_select, 0x1);
97
98 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
99 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
100
101 return mlx5_cmd_exec_in(dev, modify_lag, in);
102 }
103
mlx5_cmd_create_vport_lag(struct mlx5_core_dev * dev)104 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
105 {
106 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
107
108 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
109
110 return mlx5_cmd_exec_in(dev, create_vport_lag, in);
111 }
112 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
113
mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev * dev)114 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
115 {
116 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
117
118 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
119
120 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
121 }
122 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
123
mlx5_infer_tx_disabled(struct lag_tracker * tracker,u8 num_ports,u8 * ports,int * num_disabled)124 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports,
125 u8 *ports, int *num_disabled)
126 {
127 int i;
128
129 *num_disabled = 0;
130 for (i = 0; i < num_ports; i++) {
131 if (!tracker->netdev_state[i].tx_enabled ||
132 !tracker->netdev_state[i].link_up)
133 ports[(*num_disabled)++] = i;
134 }
135 }
136
mlx5_infer_tx_enabled(struct lag_tracker * tracker,u8 num_ports,u8 * ports,int * num_enabled)137 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
138 u8 *ports, int *num_enabled)
139 {
140 int i;
141
142 *num_enabled = 0;
143 for (i = 0; i < num_ports; i++) {
144 if (tracker->netdev_state[i].tx_enabled &&
145 tracker->netdev_state[i].link_up)
146 ports[(*num_enabled)++] = i;
147 }
148
149 if (*num_enabled == 0)
150 mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled);
151 }
152
mlx5_lag_print_mapping(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,struct lag_tracker * tracker,unsigned long flags)153 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
154 struct mlx5_lag *ldev,
155 struct lag_tracker *tracker,
156 unsigned long flags)
157 {
158 char buf[MLX5_MAX_PORTS * 10 + 1] = {};
159 u8 enabled_ports[MLX5_MAX_PORTS] = {};
160 int written = 0;
161 int num_enabled;
162 int idx;
163 int err;
164 int i;
165 int j;
166
167 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
168 mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports,
169 &num_enabled);
170 for (i = 0; i < num_enabled; i++) {
171 err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1);
172 if (err != 3)
173 return;
174 written += err;
175 }
176 buf[written - 2] = 0;
177 mlx5_core_info(dev, "lag map active ports: %s\n", buf);
178 } else {
179 for (i = 0; i < ldev->ports; i++) {
180 for (j = 0; j < ldev->buckets; j++) {
181 idx = i * ldev->buckets + j;
182 err = scnprintf(buf + written, 10,
183 " port %d:%d", i + 1, ldev->v2p_map[idx]);
184 if (err != 9)
185 return;
186 written += err;
187 }
188 }
189 mlx5_core_info(dev, "lag map:%s\n", buf);
190 }
191 }
192
193 static int mlx5_lag_netdev_event(struct notifier_block *this,
194 unsigned long event, void *ptr);
195 static void mlx5_do_bond_work(struct work_struct *work);
196
mlx5_ldev_free(struct kref * ref)197 static void mlx5_ldev_free(struct kref *ref)
198 {
199 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
200
201 if (ldev->nb.notifier_call)
202 unregister_netdevice_notifier_net(&init_net, &ldev->nb);
203 mlx5_lag_mp_cleanup(ldev);
204 mlx5_lag_mpesw_cleanup(ldev);
205 cancel_work_sync(&ldev->mpesw_work);
206 destroy_workqueue(ldev->wq);
207 mutex_destroy(&ldev->lock);
208 kfree(ldev);
209 }
210
mlx5_ldev_put(struct mlx5_lag * ldev)211 static void mlx5_ldev_put(struct mlx5_lag *ldev)
212 {
213 kref_put(&ldev->ref, mlx5_ldev_free);
214 }
215
mlx5_ldev_get(struct mlx5_lag * ldev)216 static void mlx5_ldev_get(struct mlx5_lag *ldev)
217 {
218 kref_get(&ldev->ref);
219 }
220
mlx5_lag_dev_alloc(struct mlx5_core_dev * dev)221 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
222 {
223 struct mlx5_lag *ldev;
224 int err;
225
226 ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
227 if (!ldev)
228 return NULL;
229
230 ldev->wq = create_singlethread_workqueue("mlx5_lag");
231 if (!ldev->wq) {
232 kfree(ldev);
233 return NULL;
234 }
235
236 kref_init(&ldev->ref);
237 mutex_init(&ldev->lock);
238 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
239
240 ldev->nb.notifier_call = mlx5_lag_netdev_event;
241 if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
242 ldev->nb.notifier_call = NULL;
243 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
244 }
245 ldev->mode = MLX5_LAG_MODE_NONE;
246
247 err = mlx5_lag_mp_init(ldev);
248 if (err)
249 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
250 err);
251
252 mlx5_lag_mpesw_init(ldev);
253 ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
254 ldev->buckets = 1;
255
256 return ldev;
257 }
258
mlx5_lag_dev_get_netdev_idx(struct mlx5_lag * ldev,struct net_device * ndev)259 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
260 struct net_device *ndev)
261 {
262 int i;
263
264 for (i = 0; i < ldev->ports; i++)
265 if (ldev->pf[i].netdev == ndev)
266 return i;
267
268 return -ENOENT;
269 }
270
__mlx5_lag_is_roce(struct mlx5_lag * ldev)271 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
272 {
273 return ldev->mode == MLX5_LAG_MODE_ROCE;
274 }
275
__mlx5_lag_is_sriov(struct mlx5_lag * ldev)276 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
277 {
278 return ldev->mode == MLX5_LAG_MODE_SRIOV;
279 }
280
281 /* Create a mapping between steering slots and active ports.
282 * As we have ldev->buckets slots per port first assume the native
283 * mapping should be used.
284 * If there are ports that are disabled fill the relevant slots
285 * with mapping that points to active ports.
286 */
mlx5_infer_tx_affinity_mapping(struct lag_tracker * tracker,u8 num_ports,u8 buckets,u8 * ports)287 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
288 u8 num_ports,
289 u8 buckets,
290 u8 *ports)
291 {
292 int disabled[MLX5_MAX_PORTS] = {};
293 int enabled[MLX5_MAX_PORTS] = {};
294 int disabled_ports_num = 0;
295 int enabled_ports_num = 0;
296 int idx;
297 u32 rand;
298 int i;
299 int j;
300
301 for (i = 0; i < num_ports; i++) {
302 if (tracker->netdev_state[i].tx_enabled &&
303 tracker->netdev_state[i].link_up)
304 enabled[enabled_ports_num++] = i;
305 else
306 disabled[disabled_ports_num++] = i;
307 }
308
309 /* Use native mapping by default where each port's buckets
310 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
311 */
312 for (i = 0; i < num_ports; i++)
313 for (j = 0; j < buckets; j++) {
314 idx = i * buckets + j;
315 ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i;
316 }
317
318 /* If all ports are disabled/enabled keep native mapping */
319 if (enabled_ports_num == num_ports ||
320 disabled_ports_num == num_ports)
321 return;
322
323 /* Go over the disabled ports and for each assign a random active port */
324 for (i = 0; i < disabled_ports_num; i++) {
325 for (j = 0; j < buckets; j++) {
326 get_random_bytes(&rand, 4);
327 ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1;
328 }
329 }
330 }
331
mlx5_lag_has_drop_rule(struct mlx5_lag * ldev)332 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
333 {
334 int i;
335
336 for (i = 0; i < ldev->ports; i++)
337 if (ldev->pf[i].has_drop)
338 return true;
339 return false;
340 }
341
mlx5_lag_drop_rule_cleanup(struct mlx5_lag * ldev)342 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
343 {
344 int i;
345
346 for (i = 0; i < ldev->ports; i++) {
347 if (!ldev->pf[i].has_drop)
348 continue;
349
350 mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch,
351 MLX5_VPORT_UPLINK);
352 ldev->pf[i].has_drop = false;
353 }
354 }
355
mlx5_lag_drop_rule_setup(struct mlx5_lag * ldev,struct lag_tracker * tracker)356 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
357 struct lag_tracker *tracker)
358 {
359 u8 disabled_ports[MLX5_MAX_PORTS] = {};
360 struct mlx5_core_dev *dev;
361 int disabled_index;
362 int num_disabled;
363 int err;
364 int i;
365
366 /* First delete the current drop rule so there won't be any dropped
367 * packets
368 */
369 mlx5_lag_drop_rule_cleanup(ldev);
370
371 if (!ldev->tracker.has_inactive)
372 return;
373
374 mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled);
375
376 for (i = 0; i < num_disabled; i++) {
377 disabled_index = disabled_ports[i];
378 dev = ldev->pf[disabled_index].dev;
379 err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
380 MLX5_VPORT_UPLINK);
381 if (!err)
382 ldev->pf[disabled_index].has_drop = true;
383 else
384 mlx5_core_err(dev,
385 "Failed to create lag drop rule, error: %d", err);
386 }
387 }
388
_mlx5_modify_lag(struct mlx5_lag * ldev,u8 * ports)389 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
390 {
391 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
392
393 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags))
394 return mlx5_lag_port_sel_modify(ldev, ports);
395 return mlx5_cmd_modify_lag(dev0, ldev->ports, ports);
396 }
397
mlx5_modify_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)398 void mlx5_modify_lag(struct mlx5_lag *ldev,
399 struct lag_tracker *tracker)
400 {
401 u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {};
402 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
403 int idx;
404 int err;
405 int i;
406 int j;
407
408 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports);
409
410 for (i = 0; i < ldev->ports; i++) {
411 for (j = 0; j < ldev->buckets; j++) {
412 idx = i * ldev->buckets + j;
413 if (ports[idx] == ldev->v2p_map[idx])
414 continue;
415 err = _mlx5_modify_lag(ldev, ports);
416 if (err) {
417 mlx5_core_err(dev0,
418 "Failed to modify LAG (%d)\n",
419 err);
420 return;
421 }
422 memcpy(ldev->v2p_map, ports, sizeof(ports));
423
424 mlx5_lag_print_mapping(dev0, ldev, tracker,
425 ldev->mode_flags);
426 break;
427 }
428 }
429
430 if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
431 !(ldev->mode == MLX5_LAG_MODE_ROCE))
432 mlx5_lag_drop_rule_setup(ldev, tracker);
433 }
434
435 #define MLX5_LAG_ROCE_HASH_PORTS_SUPPORTED 4
mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag * ldev,unsigned long * flags)436 static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev,
437 unsigned long *flags)
438 {
439 struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1];
440
441 if (ldev->ports == MLX5_LAG_ROCE_HASH_PORTS_SUPPORTED) {
442 /* Four ports are support only in hash mode */
443 if (!MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table))
444 return -EINVAL;
445 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
446 if (ldev->ports > 2)
447 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
448 }
449
450 return 0;
451 }
452
mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,unsigned long * flags)453 static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev,
454 struct lag_tracker *tracker,
455 enum mlx5_lag_mode mode,
456 unsigned long *flags)
457 {
458 struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1];
459
460 if (mode == MLX5_LAG_MODE_MPESW)
461 return;
462
463 if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) &&
464 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH)
465 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
466 }
467
mlx5_lag_set_flags(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,struct lag_tracker * tracker,bool shared_fdb,unsigned long * flags)468 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
469 struct lag_tracker *tracker, bool shared_fdb,
470 unsigned long *flags)
471 {
472 bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
473
474 *flags = 0;
475 if (shared_fdb) {
476 set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
477 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
478 }
479
480 if (mode == MLX5_LAG_MODE_MPESW)
481 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
482
483 if (roce_lag)
484 return mlx5_lag_set_port_sel_mode_roce(ldev, flags);
485
486 mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags);
487 return 0;
488 }
489
mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)490 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
491 {
492 int port_sel_mode = get_port_sel_mode(mode, flags);
493
494 switch (port_sel_mode) {
495 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
496 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash";
497 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw";
498 default: return "invalid";
499 }
500 }
501
mlx5_create_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,unsigned long flags)502 static int mlx5_create_lag(struct mlx5_lag *ldev,
503 struct lag_tracker *tracker,
504 enum mlx5_lag_mode mode,
505 unsigned long flags)
506 {
507 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
508 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
509 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
510 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
511 int err;
512
513 if (tracker)
514 mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
515 mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
516 shared_fdb, mlx5_get_str_port_sel_mode(mode, flags));
517
518 err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags);
519 if (err) {
520 mlx5_core_err(dev0,
521 "Failed to create LAG (%d)\n",
522 err);
523 return err;
524 }
525
526 if (shared_fdb) {
527 err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
528 dev1->priv.eswitch);
529 if (err)
530 mlx5_core_err(dev0, "Can't enable single FDB mode\n");
531 else
532 mlx5_core_info(dev0, "Operation mode is single FDB\n");
533 }
534
535 if (err) {
536 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
537 if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
538 mlx5_core_err(dev0,
539 "Failed to deactivate RoCE LAG; driver restart required\n");
540 }
541
542 return err;
543 }
544
mlx5_activate_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,bool shared_fdb)545 int mlx5_activate_lag(struct mlx5_lag *ldev,
546 struct lag_tracker *tracker,
547 enum mlx5_lag_mode mode,
548 bool shared_fdb)
549 {
550 bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
551 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
552 unsigned long flags = 0;
553 int err;
554
555 err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
556 if (err)
557 return err;
558
559 if (mode != MLX5_LAG_MODE_MPESW) {
560 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map);
561 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
562 err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
563 ldev->v2p_map);
564 if (err) {
565 mlx5_core_err(dev0,
566 "Failed to create LAG port selection(%d)\n",
567 err);
568 return err;
569 }
570 }
571 }
572
573 err = mlx5_create_lag(ldev, tracker, mode, flags);
574 if (err) {
575 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
576 mlx5_lag_port_sel_destroy(ldev);
577 if (roce_lag)
578 mlx5_core_err(dev0,
579 "Failed to activate RoCE LAG\n");
580 else
581 mlx5_core_err(dev0,
582 "Failed to activate VF LAG\n"
583 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
584 return err;
585 }
586
587 if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
588 !roce_lag)
589 mlx5_lag_drop_rule_setup(ldev, tracker);
590
591 ldev->mode = mode;
592 ldev->mode_flags = flags;
593 return 0;
594 }
595
mlx5_deactivate_lag(struct mlx5_lag * ldev)596 static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
597 {
598 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
599 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
600 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
601 bool roce_lag = __mlx5_lag_is_roce(ldev);
602 unsigned long flags = ldev->mode_flags;
603 int err;
604
605 ldev->mode = MLX5_LAG_MODE_NONE;
606 ldev->mode_flags = 0;
607 mlx5_lag_mp_reset(ldev);
608
609 if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
610 mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch,
611 dev1->priv.eswitch);
612 clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
613 }
614
615 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
616 err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
617 if (err) {
618 if (roce_lag) {
619 mlx5_core_err(dev0,
620 "Failed to deactivate RoCE LAG; driver restart required\n");
621 } else {
622 mlx5_core_err(dev0,
623 "Failed to deactivate VF LAG; driver restart required\n"
624 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
625 }
626 return err;
627 }
628
629 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
630 mlx5_lag_port_sel_destroy(ldev);
631 if (mlx5_lag_has_drop_rule(ldev))
632 mlx5_lag_drop_rule_cleanup(ldev);
633
634 return 0;
635 }
636
637 #define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2
mlx5_lag_check_prereq(struct mlx5_lag * ldev)638 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
639 {
640 #ifdef CONFIG_MLX5_ESWITCH
641 u8 mode;
642 #endif
643 int i;
644
645 for (i = 0; i < ldev->ports; i++)
646 if (!ldev->pf[i].dev)
647 return false;
648
649 #ifdef CONFIG_MLX5_ESWITCH
650 mode = mlx5_eswitch_mode(ldev->pf[MLX5_LAG_P1].dev);
651
652 if (mode != MLX5_ESWITCH_NONE && mode != MLX5_ESWITCH_OFFLOADS)
653 return false;
654
655 for (i = 0; i < ldev->ports; i++)
656 if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode)
657 return false;
658
659 if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports != MLX5_LAG_OFFLOADS_SUPPORTED_PORTS)
660 return false;
661 #else
662 for (i = 0; i < ldev->ports; i++)
663 if (mlx5_sriov_is_enabled(ldev->pf[i].dev))
664 return false;
665 #endif
666 return true;
667 }
668
mlx5_lag_add_devices(struct mlx5_lag * ldev)669 static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
670 {
671 int i;
672
673 for (i = 0; i < ldev->ports; i++) {
674 if (!ldev->pf[i].dev)
675 continue;
676
677 if (ldev->pf[i].dev->priv.flags &
678 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
679 continue;
680
681 ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
682 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
683 }
684 }
685
mlx5_lag_remove_devices(struct mlx5_lag * ldev)686 static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
687 {
688 int i;
689
690 for (i = 0; i < ldev->ports; i++) {
691 if (!ldev->pf[i].dev)
692 continue;
693
694 if (ldev->pf[i].dev->priv.flags &
695 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
696 continue;
697
698 ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
699 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
700 }
701 }
702
mlx5_disable_lag(struct mlx5_lag * ldev)703 void mlx5_disable_lag(struct mlx5_lag *ldev)
704 {
705 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
706 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
707 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
708 bool roce_lag;
709 int err;
710 int i;
711
712 roce_lag = __mlx5_lag_is_roce(ldev);
713
714 if (shared_fdb) {
715 mlx5_lag_remove_devices(ldev);
716 } else if (roce_lag) {
717 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
718 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
719 mlx5_rescan_drivers_locked(dev0);
720 }
721 for (i = 1; i < ldev->ports; i++)
722 mlx5_nic_vport_disable_roce(ldev->pf[i].dev);
723 }
724
725 err = mlx5_deactivate_lag(ldev);
726 if (err)
727 return;
728
729 if (shared_fdb || roce_lag)
730 mlx5_lag_add_devices(ldev);
731
732 if (shared_fdb) {
733 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
734 mlx5_eswitch_reload_reps(dev0->priv.eswitch);
735 if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
736 mlx5_eswitch_reload_reps(dev1->priv.eswitch);
737 }
738 }
739
mlx5_shared_fdb_supported(struct mlx5_lag * ldev)740 bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
741 {
742 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
743 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
744
745 if (is_mdev_switchdev_mode(dev0) &&
746 is_mdev_switchdev_mode(dev1) &&
747 mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
748 mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
749 mlx5_devcom_is_paired(dev0->priv.devcom,
750 MLX5_DEVCOM_ESW_OFFLOADS) &&
751 MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
752 MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
753 MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
754 return true;
755
756 return false;
757 }
758
mlx5_lag_is_roce_lag(struct mlx5_lag * ldev)759 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
760 {
761 bool roce_lag = true;
762 int i;
763
764 for (i = 0; i < ldev->ports; i++)
765 roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev);
766
767 #ifdef CONFIG_MLX5_ESWITCH
768 for (i = 0; i < ldev->ports; i++)
769 roce_lag = roce_lag &&
770 ldev->pf[i].dev->priv.eswitch->mode == MLX5_ESWITCH_NONE;
771 #endif
772
773 return roce_lag;
774 }
775
mlx5_lag_should_modify_lag(struct mlx5_lag * ldev,bool do_bond)776 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond)
777 {
778 return do_bond && __mlx5_lag_is_active(ldev) &&
779 ldev->mode != MLX5_LAG_MODE_MPESW;
780 }
781
mlx5_lag_should_disable_lag(struct mlx5_lag * ldev,bool do_bond)782 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
783 {
784 return !do_bond && __mlx5_lag_is_active(ldev) &&
785 ldev->mode != MLX5_LAG_MODE_MPESW;
786 }
787
mlx5_do_bond(struct mlx5_lag * ldev)788 static void mlx5_do_bond(struct mlx5_lag *ldev)
789 {
790 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
791 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
792 struct lag_tracker tracker = { };
793 bool do_bond, roce_lag;
794 int err;
795 int i;
796
797 if (!mlx5_lag_is_ready(ldev)) {
798 do_bond = false;
799 } else {
800 /* VF LAG is in multipath mode, ignore bond change requests */
801 if (mlx5_lag_is_multipath(dev0))
802 return;
803
804 tracker = ldev->tracker;
805
806 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
807 }
808
809 if (do_bond && !__mlx5_lag_is_active(ldev)) {
810 bool shared_fdb = mlx5_shared_fdb_supported(ldev);
811
812 roce_lag = mlx5_lag_is_roce_lag(ldev);
813
814 if (shared_fdb || roce_lag)
815 mlx5_lag_remove_devices(ldev);
816
817 err = mlx5_activate_lag(ldev, &tracker,
818 roce_lag ? MLX5_LAG_MODE_ROCE :
819 MLX5_LAG_MODE_SRIOV,
820 shared_fdb);
821 if (err) {
822 if (shared_fdb || roce_lag)
823 mlx5_lag_add_devices(ldev);
824
825 return;
826 } else if (roce_lag) {
827 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
828 mlx5_rescan_drivers_locked(dev0);
829 for (i = 1; i < ldev->ports; i++)
830 mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
831 } else if (shared_fdb) {
832 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
833 mlx5_rescan_drivers_locked(dev0);
834
835 err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
836 if (!err)
837 err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
838
839 if (err) {
840 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
841 mlx5_rescan_drivers_locked(dev0);
842 mlx5_deactivate_lag(ldev);
843 mlx5_lag_add_devices(ldev);
844 mlx5_eswitch_reload_reps(dev0->priv.eswitch);
845 mlx5_eswitch_reload_reps(dev1->priv.eswitch);
846 mlx5_core_err(dev0, "Failed to enable lag\n");
847 return;
848 }
849 }
850 } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
851 mlx5_modify_lag(ldev, &tracker);
852 } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
853 mlx5_disable_lag(ldev);
854 }
855 }
856
mlx5_queue_bond_work(struct mlx5_lag * ldev,unsigned long delay)857 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
858 {
859 queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
860 }
861
mlx5_do_bond_work(struct work_struct * work)862 static void mlx5_do_bond_work(struct work_struct *work)
863 {
864 struct delayed_work *delayed_work = to_delayed_work(work);
865 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
866 bond_work);
867 int status;
868
869 status = mlx5_dev_list_trylock();
870 if (!status) {
871 mlx5_queue_bond_work(ldev, HZ);
872 return;
873 }
874
875 mutex_lock(&ldev->lock);
876 if (ldev->mode_changes_in_progress) {
877 mutex_unlock(&ldev->lock);
878 mlx5_dev_list_unlock();
879 mlx5_queue_bond_work(ldev, HZ);
880 return;
881 }
882
883 mlx5_do_bond(ldev);
884 mutex_unlock(&ldev->lock);
885 mlx5_dev_list_unlock();
886 }
887
mlx5_handle_changeupper_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct netdev_notifier_changeupper_info * info)888 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
889 struct lag_tracker *tracker,
890 struct netdev_notifier_changeupper_info *info)
891 {
892 struct net_device *upper = info->upper_dev, *ndev_tmp;
893 struct netdev_lag_upper_info *lag_upper_info = NULL;
894 bool is_bonded, is_in_lag, mode_supported;
895 bool has_inactive = 0;
896 struct slave *slave;
897 u8 bond_status = 0;
898 int num_slaves = 0;
899 int changed = 0;
900 int idx;
901
902 if (!netif_is_lag_master(upper))
903 return 0;
904
905 if (info->linking)
906 lag_upper_info = info->upper_info;
907
908 /* The event may still be of interest if the slave does not belong to
909 * us, but is enslaved to a master which has one or more of our netdevs
910 * as slaves (e.g., if a new slave is added to a master that bonds two
911 * of our netdevs, we should unbond).
912 */
913 rcu_read_lock();
914 for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
915 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
916 if (idx >= 0) {
917 slave = bond_slave_get_rcu(ndev_tmp);
918 if (slave)
919 has_inactive |= bond_is_slave_inactive(slave);
920 bond_status |= (1 << idx);
921 }
922
923 num_slaves++;
924 }
925 rcu_read_unlock();
926
927 /* None of this lagdev's netdevs are slaves of this master. */
928 if (!(bond_status & GENMASK(ldev->ports - 1, 0)))
929 return 0;
930
931 if (lag_upper_info) {
932 tracker->tx_type = lag_upper_info->tx_type;
933 tracker->hash_type = lag_upper_info->hash_type;
934 }
935
936 tracker->has_inactive = has_inactive;
937 /* Determine bonding status:
938 * A device is considered bonded if both its physical ports are slaves
939 * of the same lag master, and only them.
940 */
941 is_in_lag = num_slaves == ldev->ports &&
942 bond_status == GENMASK(ldev->ports - 1, 0);
943
944 /* Lag mode must be activebackup or hash. */
945 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
946 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
947
948 is_bonded = is_in_lag && mode_supported;
949 if (tracker->is_bonded != is_bonded) {
950 tracker->is_bonded = is_bonded;
951 changed = 1;
952 }
953
954 if (!is_in_lag)
955 return changed;
956
957 if (!mlx5_lag_is_ready(ldev))
958 NL_SET_ERR_MSG_MOD(info->info.extack,
959 "Can't activate LAG offload, PF is configured with more than 64 VFs");
960 else if (!mode_supported)
961 NL_SET_ERR_MSG_MOD(info->info.extack,
962 "Can't activate LAG offload, TX type isn't supported");
963
964 return changed;
965 }
966
mlx5_handle_changelowerstate_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changelowerstate_info * info)967 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
968 struct lag_tracker *tracker,
969 struct net_device *ndev,
970 struct netdev_notifier_changelowerstate_info *info)
971 {
972 struct netdev_lag_lower_state_info *lag_lower_info;
973 int idx;
974
975 if (!netif_is_lag_port(ndev))
976 return 0;
977
978 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
979 if (idx < 0)
980 return 0;
981
982 /* This information is used to determine virtual to physical
983 * port mapping.
984 */
985 lag_lower_info = info->lower_state_info;
986 if (!lag_lower_info)
987 return 0;
988
989 tracker->netdev_state[idx] = *lag_lower_info;
990
991 return 1;
992 }
993
mlx5_handle_changeinfodata_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev)994 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
995 struct lag_tracker *tracker,
996 struct net_device *ndev)
997 {
998 struct net_device *ndev_tmp;
999 struct slave *slave;
1000 bool has_inactive = 0;
1001 int idx;
1002
1003 if (!netif_is_lag_master(ndev))
1004 return 0;
1005
1006 rcu_read_lock();
1007 for_each_netdev_in_bond_rcu(ndev, ndev_tmp) {
1008 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
1009 if (idx < 0)
1010 continue;
1011
1012 slave = bond_slave_get_rcu(ndev_tmp);
1013 if (slave)
1014 has_inactive |= bond_is_slave_inactive(slave);
1015 }
1016 rcu_read_unlock();
1017
1018 if (tracker->has_inactive == has_inactive)
1019 return 0;
1020
1021 tracker->has_inactive = has_inactive;
1022
1023 return 1;
1024 }
1025
1026 /* this handler is always registered to netdev events */
mlx5_lag_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)1027 static int mlx5_lag_netdev_event(struct notifier_block *this,
1028 unsigned long event, void *ptr)
1029 {
1030 struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
1031 struct lag_tracker tracker;
1032 struct mlx5_lag *ldev;
1033 int changed = 0;
1034
1035 if (event != NETDEV_CHANGEUPPER &&
1036 event != NETDEV_CHANGELOWERSTATE &&
1037 event != NETDEV_CHANGEINFODATA)
1038 return NOTIFY_DONE;
1039
1040 ldev = container_of(this, struct mlx5_lag, nb);
1041
1042 tracker = ldev->tracker;
1043
1044 switch (event) {
1045 case NETDEV_CHANGEUPPER:
1046 changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr);
1047 break;
1048 case NETDEV_CHANGELOWERSTATE:
1049 changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
1050 ndev, ptr);
1051 break;
1052 case NETDEV_CHANGEINFODATA:
1053 changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev);
1054 break;
1055 }
1056
1057 ldev->tracker = tracker;
1058
1059 if (changed)
1060 mlx5_queue_bond_work(ldev, 0);
1061
1062 return NOTIFY_DONE;
1063 }
1064
mlx5_ldev_add_netdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev,struct net_device * netdev)1065 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
1066 struct mlx5_core_dev *dev,
1067 struct net_device *netdev)
1068 {
1069 unsigned int fn = mlx5_get_dev_index(dev);
1070 unsigned long flags;
1071
1072 if (fn >= ldev->ports)
1073 return;
1074
1075 spin_lock_irqsave(&lag_lock, flags);
1076 ldev->pf[fn].netdev = netdev;
1077 ldev->tracker.netdev_state[fn].link_up = 0;
1078 ldev->tracker.netdev_state[fn].tx_enabled = 0;
1079 spin_unlock_irqrestore(&lag_lock, flags);
1080 }
1081
mlx5_ldev_remove_netdev(struct mlx5_lag * ldev,struct net_device * netdev)1082 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
1083 struct net_device *netdev)
1084 {
1085 unsigned long flags;
1086 int i;
1087
1088 spin_lock_irqsave(&lag_lock, flags);
1089 for (i = 0; i < ldev->ports; i++) {
1090 if (ldev->pf[i].netdev == netdev) {
1091 ldev->pf[i].netdev = NULL;
1092 break;
1093 }
1094 }
1095 spin_unlock_irqrestore(&lag_lock, flags);
1096 }
1097
mlx5_ldev_add_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1098 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
1099 struct mlx5_core_dev *dev)
1100 {
1101 unsigned int fn = mlx5_get_dev_index(dev);
1102
1103 if (fn >= ldev->ports)
1104 return;
1105
1106 ldev->pf[fn].dev = dev;
1107 dev->priv.lag = ldev;
1108 }
1109
mlx5_ldev_remove_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1110 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
1111 struct mlx5_core_dev *dev)
1112 {
1113 int i;
1114
1115 for (i = 0; i < ldev->ports; i++)
1116 if (ldev->pf[i].dev == dev)
1117 break;
1118
1119 if (i == ldev->ports)
1120 return;
1121
1122 ldev->pf[i].dev = NULL;
1123 dev->priv.lag = NULL;
1124 }
1125
1126 /* Must be called with intf_mutex held */
__mlx5_lag_dev_add_mdev(struct mlx5_core_dev * dev)1127 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
1128 {
1129 struct mlx5_lag *ldev = NULL;
1130 struct mlx5_core_dev *tmp_dev;
1131
1132 tmp_dev = mlx5_get_next_phys_dev_lag(dev);
1133 if (tmp_dev)
1134 ldev = tmp_dev->priv.lag;
1135
1136 if (!ldev) {
1137 ldev = mlx5_lag_dev_alloc(dev);
1138 if (!ldev) {
1139 mlx5_core_err(dev, "Failed to alloc lag dev\n");
1140 return 0;
1141 }
1142 mlx5_ldev_add_mdev(ldev, dev);
1143 return 0;
1144 }
1145
1146 mutex_lock(&ldev->lock);
1147 if (ldev->mode_changes_in_progress) {
1148 mutex_unlock(&ldev->lock);
1149 return -EAGAIN;
1150 }
1151 mlx5_ldev_get(ldev);
1152 mlx5_ldev_add_mdev(ldev, dev);
1153 mutex_unlock(&ldev->lock);
1154
1155 return 0;
1156 }
1157
mlx5_lag_remove_mdev(struct mlx5_core_dev * dev)1158 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
1159 {
1160 struct mlx5_lag *ldev;
1161
1162 ldev = mlx5_lag_dev(dev);
1163 if (!ldev)
1164 return;
1165
1166 /* mdev is being removed, might as well remove debugfs
1167 * as early as possible.
1168 */
1169 mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
1170 recheck:
1171 mutex_lock(&ldev->lock);
1172 if (ldev->mode_changes_in_progress) {
1173 mutex_unlock(&ldev->lock);
1174 msleep(100);
1175 goto recheck;
1176 }
1177 mlx5_ldev_remove_mdev(ldev, dev);
1178 mutex_unlock(&ldev->lock);
1179 mlx5_ldev_put(ldev);
1180 }
1181
mlx5_lag_add_mdev(struct mlx5_core_dev * dev)1182 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
1183 {
1184 int err;
1185
1186 if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
1187 !MLX5_CAP_GEN(dev, lag_master) ||
1188 (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS ||
1189 MLX5_CAP_GEN(dev, num_lag_ports) <= 1))
1190 return;
1191
1192 recheck:
1193 mlx5_dev_list_lock();
1194 err = __mlx5_lag_dev_add_mdev(dev);
1195 mlx5_dev_list_unlock();
1196
1197 if (err) {
1198 msleep(100);
1199 goto recheck;
1200 }
1201 mlx5_ldev_add_debugfs(dev);
1202 }
1203
mlx5_lag_remove_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)1204 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
1205 struct net_device *netdev)
1206 {
1207 struct mlx5_lag *ldev;
1208 bool lag_is_active;
1209
1210 ldev = mlx5_lag_dev(dev);
1211 if (!ldev)
1212 return;
1213
1214 mutex_lock(&ldev->lock);
1215 mlx5_ldev_remove_netdev(ldev, netdev);
1216 clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1217
1218 lag_is_active = __mlx5_lag_is_active(ldev);
1219 mutex_unlock(&ldev->lock);
1220
1221 if (lag_is_active)
1222 mlx5_queue_bond_work(ldev, 0);
1223 }
1224
mlx5_lag_add_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)1225 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
1226 struct net_device *netdev)
1227 {
1228 struct mlx5_lag *ldev;
1229 int i;
1230
1231 ldev = mlx5_lag_dev(dev);
1232 if (!ldev)
1233 return;
1234
1235 mutex_lock(&ldev->lock);
1236 mlx5_ldev_add_netdev(ldev, dev, netdev);
1237
1238 for (i = 0; i < ldev->ports; i++)
1239 if (!ldev->pf[i].netdev)
1240 break;
1241
1242 if (i >= ldev->ports)
1243 set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1244 mutex_unlock(&ldev->lock);
1245 mlx5_queue_bond_work(ldev, 0);
1246 }
1247
mlx5_lag_is_roce(struct mlx5_core_dev * dev)1248 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
1249 {
1250 struct mlx5_lag *ldev;
1251 unsigned long flags;
1252 bool res;
1253
1254 spin_lock_irqsave(&lag_lock, flags);
1255 ldev = mlx5_lag_dev(dev);
1256 res = ldev && __mlx5_lag_is_roce(ldev);
1257 spin_unlock_irqrestore(&lag_lock, flags);
1258
1259 return res;
1260 }
1261 EXPORT_SYMBOL(mlx5_lag_is_roce);
1262
mlx5_lag_is_active(struct mlx5_core_dev * dev)1263 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
1264 {
1265 struct mlx5_lag *ldev;
1266 unsigned long flags;
1267 bool res;
1268
1269 spin_lock_irqsave(&lag_lock, flags);
1270 ldev = mlx5_lag_dev(dev);
1271 res = ldev && __mlx5_lag_is_active(ldev);
1272 spin_unlock_irqrestore(&lag_lock, flags);
1273
1274 return res;
1275 }
1276 EXPORT_SYMBOL(mlx5_lag_is_active);
1277
mlx5_lag_is_master(struct mlx5_core_dev * dev)1278 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
1279 {
1280 struct mlx5_lag *ldev;
1281 unsigned long flags;
1282 bool res;
1283
1284 spin_lock_irqsave(&lag_lock, flags);
1285 ldev = mlx5_lag_dev(dev);
1286 res = ldev && __mlx5_lag_is_active(ldev) &&
1287 dev == ldev->pf[MLX5_LAG_P1].dev;
1288 spin_unlock_irqrestore(&lag_lock, flags);
1289
1290 return res;
1291 }
1292 EXPORT_SYMBOL(mlx5_lag_is_master);
1293
mlx5_lag_is_sriov(struct mlx5_core_dev * dev)1294 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
1295 {
1296 struct mlx5_lag *ldev;
1297 unsigned long flags;
1298 bool res;
1299
1300 spin_lock_irqsave(&lag_lock, flags);
1301 ldev = mlx5_lag_dev(dev);
1302 res = ldev && __mlx5_lag_is_sriov(ldev);
1303 spin_unlock_irqrestore(&lag_lock, flags);
1304
1305 return res;
1306 }
1307 EXPORT_SYMBOL(mlx5_lag_is_sriov);
1308
mlx5_lag_is_shared_fdb(struct mlx5_core_dev * dev)1309 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
1310 {
1311 struct mlx5_lag *ldev;
1312 unsigned long flags;
1313 bool res;
1314
1315 spin_lock_irqsave(&lag_lock, flags);
1316 ldev = mlx5_lag_dev(dev);
1317 res = ldev && __mlx5_lag_is_sriov(ldev) &&
1318 test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
1319 spin_unlock_irqrestore(&lag_lock, flags);
1320
1321 return res;
1322 }
1323 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
1324
mlx5_lag_disable_change(struct mlx5_core_dev * dev)1325 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
1326 {
1327 struct mlx5_lag *ldev;
1328
1329 ldev = mlx5_lag_dev(dev);
1330 if (!ldev)
1331 return;
1332
1333 mlx5_dev_list_lock();
1334 mutex_lock(&ldev->lock);
1335
1336 ldev->mode_changes_in_progress++;
1337 if (__mlx5_lag_is_active(ldev))
1338 mlx5_disable_lag(ldev);
1339
1340 mutex_unlock(&ldev->lock);
1341 mlx5_dev_list_unlock();
1342 }
1343
mlx5_lag_enable_change(struct mlx5_core_dev * dev)1344 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
1345 {
1346 struct mlx5_lag *ldev;
1347
1348 ldev = mlx5_lag_dev(dev);
1349 if (!ldev)
1350 return;
1351
1352 mutex_lock(&ldev->lock);
1353 ldev->mode_changes_in_progress--;
1354 mutex_unlock(&ldev->lock);
1355 mlx5_queue_bond_work(ldev, 0);
1356 }
1357
mlx5_lag_get_roce_netdev(struct mlx5_core_dev * dev)1358 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
1359 {
1360 struct net_device *ndev = NULL;
1361 struct mlx5_lag *ldev;
1362 unsigned long flags;
1363 int i;
1364
1365 spin_lock_irqsave(&lag_lock, flags);
1366 ldev = mlx5_lag_dev(dev);
1367
1368 if (!(ldev && __mlx5_lag_is_roce(ldev)))
1369 goto unlock;
1370
1371 if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
1372 for (i = 0; i < ldev->ports; i++)
1373 if (ldev->tracker.netdev_state[i].tx_enabled)
1374 ndev = ldev->pf[i].netdev;
1375 if (!ndev)
1376 ndev = ldev->pf[ldev->ports - 1].netdev;
1377 } else {
1378 ndev = ldev->pf[MLX5_LAG_P1].netdev;
1379 }
1380 if (ndev)
1381 dev_hold(ndev);
1382
1383 unlock:
1384 spin_unlock_irqrestore(&lag_lock, flags);
1385
1386 return ndev;
1387 }
1388 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
1389
mlx5_lag_get_slave_port(struct mlx5_core_dev * dev,struct net_device * slave)1390 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
1391 struct net_device *slave)
1392 {
1393 struct mlx5_lag *ldev;
1394 unsigned long flags;
1395 u8 port = 0;
1396 int i;
1397
1398 spin_lock_irqsave(&lag_lock, flags);
1399 ldev = mlx5_lag_dev(dev);
1400 if (!(ldev && __mlx5_lag_is_roce(ldev)))
1401 goto unlock;
1402
1403 for (i = 0; i < ldev->ports; i++) {
1404 if (ldev->pf[MLX5_LAG_P1].netdev == slave) {
1405 port = i;
1406 break;
1407 }
1408 }
1409
1410 port = ldev->v2p_map[port * ldev->buckets];
1411
1412 unlock:
1413 spin_unlock_irqrestore(&lag_lock, flags);
1414 return port;
1415 }
1416 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
1417
mlx5_lag_get_num_ports(struct mlx5_core_dev * dev)1418 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
1419 {
1420 struct mlx5_lag *ldev;
1421
1422 ldev = mlx5_lag_dev(dev);
1423 if (!ldev)
1424 return 0;
1425
1426 return ldev->ports;
1427 }
1428 EXPORT_SYMBOL(mlx5_lag_get_num_ports);
1429
mlx5_lag_get_peer_mdev(struct mlx5_core_dev * dev)1430 struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
1431 {
1432 struct mlx5_core_dev *peer_dev = NULL;
1433 struct mlx5_lag *ldev;
1434 unsigned long flags;
1435
1436 spin_lock_irqsave(&lag_lock, flags);
1437 ldev = mlx5_lag_dev(dev);
1438 if (!ldev)
1439 goto unlock;
1440
1441 peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
1442 ldev->pf[MLX5_LAG_P2].dev :
1443 ldev->pf[MLX5_LAG_P1].dev;
1444
1445 unlock:
1446 spin_unlock_irqrestore(&lag_lock, flags);
1447 return peer_dev;
1448 }
1449 EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
1450
mlx5_lag_query_cong_counters(struct mlx5_core_dev * dev,u64 * values,int num_counters,size_t * offsets)1451 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1452 u64 *values,
1453 int num_counters,
1454 size_t *offsets)
1455 {
1456 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
1457 struct mlx5_core_dev **mdev;
1458 struct mlx5_lag *ldev;
1459 unsigned long flags;
1460 int num_ports;
1461 int ret, i, j;
1462 void *out;
1463
1464 out = kvzalloc(outlen, GFP_KERNEL);
1465 if (!out)
1466 return -ENOMEM;
1467
1468 mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL);
1469 if (!mdev) {
1470 ret = -ENOMEM;
1471 goto free_out;
1472 }
1473
1474 memset(values, 0, sizeof(*values) * num_counters);
1475
1476 spin_lock_irqsave(&lag_lock, flags);
1477 ldev = mlx5_lag_dev(dev);
1478 if (ldev && __mlx5_lag_is_active(ldev)) {
1479 num_ports = ldev->ports;
1480 for (i = 0; i < ldev->ports; i++)
1481 mdev[i] = ldev->pf[i].dev;
1482 } else {
1483 num_ports = 1;
1484 mdev[MLX5_LAG_P1] = dev;
1485 }
1486 spin_unlock_irqrestore(&lag_lock, flags);
1487
1488 for (i = 0; i < num_ports; ++i) {
1489 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
1490
1491 MLX5_SET(query_cong_statistics_in, in, opcode,
1492 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
1493 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
1494 out);
1495 if (ret)
1496 goto free_mdev;
1497
1498 for (j = 0; j < num_counters; ++j)
1499 values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
1500 }
1501
1502 free_mdev:
1503 kvfree(mdev);
1504 free_out:
1505 kvfree(out);
1506 return ret;
1507 }
1508 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
1509