1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/netdevice.h>
34 #include <net/bonding.h>
35 #include <linux/mlx5/driver.h>
36 #include <linux/mlx5/eswitch.h>
37 #include <linux/mlx5/vport.h>
38 #include "lib/devcom.h"
39 #include "mlx5_core.h"
40 #include "eswitch.h"
41 #include "esw/acl/ofld.h"
42 #include "lag.h"
43 #include "mp.h"
44 #include "mpesw.h"
45 
46 enum {
47 	MLX5_LAG_EGRESS_PORT_1 = 1,
48 	MLX5_LAG_EGRESS_PORT_2,
49 };
50 
51 /* General purpose, use for short periods of time.
52  * Beware of lock dependencies (preferably, no locks should be acquired
53  * under it).
54  */
55 static DEFINE_SPINLOCK(lag_lock);
56 
get_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)57 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
58 {
59 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
60 		return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT;
61 
62 	if (mode == MLX5_LAG_MODE_MPESW)
63 		return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW;
64 
65 	return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY;
66 }
67 
mlx5_cmd_create_lag(struct mlx5_core_dev * dev,u8 * ports,int mode,unsigned long flags)68 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode,
69 			       unsigned long flags)
70 {
71 	bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
72 				     &flags);
73 	int port_sel_mode = get_port_sel_mode(mode, flags);
74 	u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
75 	void *lag_ctx;
76 
77 	lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
78 	MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
79 	MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode);
80 	if (port_sel_mode == MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY) {
81 		MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
82 		MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
83 	}
84 	MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode);
85 
86 	return mlx5_cmd_exec_in(dev, create_lag, in);
87 }
88 
mlx5_cmd_modify_lag(struct mlx5_core_dev * dev,u8 num_ports,u8 * ports)89 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports,
90 			       u8 *ports)
91 {
92 	u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
93 	void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
94 
95 	MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
96 	MLX5_SET(modify_lag_in, in, field_select, 0x1);
97 
98 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
99 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
100 
101 	return mlx5_cmd_exec_in(dev, modify_lag, in);
102 }
103 
mlx5_cmd_create_vport_lag(struct mlx5_core_dev * dev)104 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
105 {
106 	u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
107 
108 	MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
109 
110 	return mlx5_cmd_exec_in(dev, create_vport_lag, in);
111 }
112 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
113 
mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev * dev)114 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
115 {
116 	u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
117 
118 	MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
119 
120 	return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
121 }
122 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
123 
mlx5_infer_tx_disabled(struct lag_tracker * tracker,u8 num_ports,u8 * ports,int * num_disabled)124 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports,
125 				   u8 *ports, int *num_disabled)
126 {
127 	int i;
128 
129 	*num_disabled = 0;
130 	for (i = 0; i < num_ports; i++) {
131 		if (!tracker->netdev_state[i].tx_enabled ||
132 		    !tracker->netdev_state[i].link_up)
133 			ports[(*num_disabled)++] = i;
134 	}
135 }
136 
mlx5_infer_tx_enabled(struct lag_tracker * tracker,u8 num_ports,u8 * ports,int * num_enabled)137 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
138 			   u8 *ports, int *num_enabled)
139 {
140 	int i;
141 
142 	*num_enabled = 0;
143 	for (i = 0; i < num_ports; i++) {
144 		if (tracker->netdev_state[i].tx_enabled &&
145 		    tracker->netdev_state[i].link_up)
146 			ports[(*num_enabled)++] = i;
147 	}
148 
149 	if (*num_enabled == 0)
150 		mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled);
151 }
152 
mlx5_lag_print_mapping(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,struct lag_tracker * tracker,unsigned long flags)153 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
154 				   struct mlx5_lag *ldev,
155 				   struct lag_tracker *tracker,
156 				   unsigned long flags)
157 {
158 	char buf[MLX5_MAX_PORTS * 10 + 1] = {};
159 	u8 enabled_ports[MLX5_MAX_PORTS] = {};
160 	int written = 0;
161 	int num_enabled;
162 	int idx;
163 	int err;
164 	int i;
165 	int j;
166 
167 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
168 		mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports,
169 				      &num_enabled);
170 		for (i = 0; i < num_enabled; i++) {
171 			err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1);
172 			if (err != 3)
173 				return;
174 			written += err;
175 		}
176 		buf[written - 2] = 0;
177 		mlx5_core_info(dev, "lag map active ports: %s\n", buf);
178 	} else {
179 		for (i = 0; i < ldev->ports; i++) {
180 			for (j  = 0; j < ldev->buckets; j++) {
181 				idx = i * ldev->buckets + j;
182 				err = scnprintf(buf + written, 10,
183 						" port %d:%d", i + 1, ldev->v2p_map[idx]);
184 				if (err != 9)
185 					return;
186 				written += err;
187 			}
188 		}
189 		mlx5_core_info(dev, "lag map:%s\n", buf);
190 	}
191 }
192 
193 static int mlx5_lag_netdev_event(struct notifier_block *this,
194 				 unsigned long event, void *ptr);
195 static void mlx5_do_bond_work(struct work_struct *work);
196 
mlx5_ldev_free(struct kref * ref)197 static void mlx5_ldev_free(struct kref *ref)
198 {
199 	struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
200 
201 	if (ldev->nb.notifier_call)
202 		unregister_netdevice_notifier_net(&init_net, &ldev->nb);
203 	mlx5_lag_mp_cleanup(ldev);
204 	mlx5_lag_mpesw_cleanup(ldev);
205 	cancel_work_sync(&ldev->mpesw_work);
206 	destroy_workqueue(ldev->wq);
207 	mutex_destroy(&ldev->lock);
208 	kfree(ldev);
209 }
210 
mlx5_ldev_put(struct mlx5_lag * ldev)211 static void mlx5_ldev_put(struct mlx5_lag *ldev)
212 {
213 	kref_put(&ldev->ref, mlx5_ldev_free);
214 }
215 
mlx5_ldev_get(struct mlx5_lag * ldev)216 static void mlx5_ldev_get(struct mlx5_lag *ldev)
217 {
218 	kref_get(&ldev->ref);
219 }
220 
mlx5_lag_dev_alloc(struct mlx5_core_dev * dev)221 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
222 {
223 	struct mlx5_lag *ldev;
224 	int err;
225 
226 	ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
227 	if (!ldev)
228 		return NULL;
229 
230 	ldev->wq = create_singlethread_workqueue("mlx5_lag");
231 	if (!ldev->wq) {
232 		kfree(ldev);
233 		return NULL;
234 	}
235 
236 	kref_init(&ldev->ref);
237 	mutex_init(&ldev->lock);
238 	INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
239 
240 	ldev->nb.notifier_call = mlx5_lag_netdev_event;
241 	if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
242 		ldev->nb.notifier_call = NULL;
243 		mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
244 	}
245 	ldev->mode = MLX5_LAG_MODE_NONE;
246 
247 	err = mlx5_lag_mp_init(ldev);
248 	if (err)
249 		mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
250 			      err);
251 
252 	mlx5_lag_mpesw_init(ldev);
253 	ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
254 	ldev->buckets = 1;
255 
256 	return ldev;
257 }
258 
mlx5_lag_dev_get_netdev_idx(struct mlx5_lag * ldev,struct net_device * ndev)259 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
260 				struct net_device *ndev)
261 {
262 	int i;
263 
264 	for (i = 0; i < ldev->ports; i++)
265 		if (ldev->pf[i].netdev == ndev)
266 			return i;
267 
268 	return -ENOENT;
269 }
270 
__mlx5_lag_is_roce(struct mlx5_lag * ldev)271 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
272 {
273 	return ldev->mode == MLX5_LAG_MODE_ROCE;
274 }
275 
__mlx5_lag_is_sriov(struct mlx5_lag * ldev)276 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
277 {
278 	return ldev->mode == MLX5_LAG_MODE_SRIOV;
279 }
280 
281 /* Create a mapping between steering slots and active ports.
282  * As we have ldev->buckets slots per port first assume the native
283  * mapping should be used.
284  * If there are ports that are disabled fill the relevant slots
285  * with mapping that points to active ports.
286  */
mlx5_infer_tx_affinity_mapping(struct lag_tracker * tracker,u8 num_ports,u8 buckets,u8 * ports)287 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
288 					   u8 num_ports,
289 					   u8 buckets,
290 					   u8 *ports)
291 {
292 	int disabled[MLX5_MAX_PORTS] = {};
293 	int enabled[MLX5_MAX_PORTS] = {};
294 	int disabled_ports_num = 0;
295 	int enabled_ports_num = 0;
296 	int idx;
297 	u32 rand;
298 	int i;
299 	int j;
300 
301 	for (i = 0; i < num_ports; i++) {
302 		if (tracker->netdev_state[i].tx_enabled &&
303 		    tracker->netdev_state[i].link_up)
304 			enabled[enabled_ports_num++] = i;
305 		else
306 			disabled[disabled_ports_num++] = i;
307 	}
308 
309 	/* Use native mapping by default where each port's buckets
310 	 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
311 	 */
312 	for (i = 0; i < num_ports; i++)
313 		for (j = 0; j < buckets; j++) {
314 			idx = i * buckets + j;
315 			ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i;
316 		}
317 
318 	/* If all ports are disabled/enabled keep native mapping */
319 	if (enabled_ports_num == num_ports ||
320 	    disabled_ports_num == num_ports)
321 		return;
322 
323 	/* Go over the disabled ports and for each assign a random active port */
324 	for (i = 0; i < disabled_ports_num; i++) {
325 		for (j = 0; j < buckets; j++) {
326 			get_random_bytes(&rand, 4);
327 			ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1;
328 		}
329 	}
330 }
331 
mlx5_lag_has_drop_rule(struct mlx5_lag * ldev)332 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
333 {
334 	int i;
335 
336 	for (i = 0; i < ldev->ports; i++)
337 		if (ldev->pf[i].has_drop)
338 			return true;
339 	return false;
340 }
341 
mlx5_lag_drop_rule_cleanup(struct mlx5_lag * ldev)342 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
343 {
344 	int i;
345 
346 	for (i = 0; i < ldev->ports; i++) {
347 		if (!ldev->pf[i].has_drop)
348 			continue;
349 
350 		mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch,
351 							     MLX5_VPORT_UPLINK);
352 		ldev->pf[i].has_drop = false;
353 	}
354 }
355 
mlx5_lag_drop_rule_setup(struct mlx5_lag * ldev,struct lag_tracker * tracker)356 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
357 				     struct lag_tracker *tracker)
358 {
359 	u8 disabled_ports[MLX5_MAX_PORTS] = {};
360 	struct mlx5_core_dev *dev;
361 	int disabled_index;
362 	int num_disabled;
363 	int err;
364 	int i;
365 
366 	/* First delete the current drop rule so there won't be any dropped
367 	 * packets
368 	 */
369 	mlx5_lag_drop_rule_cleanup(ldev);
370 
371 	if (!ldev->tracker.has_inactive)
372 		return;
373 
374 	mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled);
375 
376 	for (i = 0; i < num_disabled; i++) {
377 		disabled_index = disabled_ports[i];
378 		dev = ldev->pf[disabled_index].dev;
379 		err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
380 								  MLX5_VPORT_UPLINK);
381 		if (!err)
382 			ldev->pf[disabled_index].has_drop = true;
383 		else
384 			mlx5_core_err(dev,
385 				      "Failed to create lag drop rule, error: %d", err);
386 	}
387 }
388 
_mlx5_modify_lag(struct mlx5_lag * ldev,u8 * ports)389 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
390 {
391 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
392 
393 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags))
394 		return mlx5_lag_port_sel_modify(ldev, ports);
395 	return mlx5_cmd_modify_lag(dev0, ldev->ports, ports);
396 }
397 
mlx5_modify_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)398 void mlx5_modify_lag(struct mlx5_lag *ldev,
399 		     struct lag_tracker *tracker)
400 {
401 	u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {};
402 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
403 	int idx;
404 	int err;
405 	int i;
406 	int j;
407 
408 	mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports);
409 
410 	for (i = 0; i < ldev->ports; i++) {
411 		for (j = 0; j < ldev->buckets; j++) {
412 			idx = i * ldev->buckets + j;
413 			if (ports[idx] == ldev->v2p_map[idx])
414 				continue;
415 			err = _mlx5_modify_lag(ldev, ports);
416 			if (err) {
417 				mlx5_core_err(dev0,
418 					      "Failed to modify LAG (%d)\n",
419 					      err);
420 				return;
421 			}
422 			memcpy(ldev->v2p_map, ports, sizeof(ports));
423 
424 			mlx5_lag_print_mapping(dev0, ldev, tracker,
425 					       ldev->mode_flags);
426 			break;
427 		}
428 	}
429 
430 	if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
431 	    !(ldev->mode == MLX5_LAG_MODE_ROCE))
432 		mlx5_lag_drop_rule_setup(ldev, tracker);
433 }
434 
435 #define MLX5_LAG_ROCE_HASH_PORTS_SUPPORTED 4
mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag * ldev,unsigned long * flags)436 static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev,
437 					   unsigned long *flags)
438 {
439 	struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1];
440 
441 	if (ldev->ports == MLX5_LAG_ROCE_HASH_PORTS_SUPPORTED) {
442 		/* Four ports are support only in hash mode */
443 		if (!MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table))
444 			return -EINVAL;
445 		set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
446 		if (ldev->ports > 2)
447 			ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
448 	}
449 
450 	return 0;
451 }
452 
mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,unsigned long * flags)453 static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev,
454 						struct lag_tracker *tracker,
455 						enum mlx5_lag_mode mode,
456 						unsigned long *flags)
457 {
458 	struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1];
459 
460 	if (mode == MLX5_LAG_MODE_MPESW)
461 		return;
462 
463 	if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) &&
464 	    tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH)
465 		set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
466 }
467 
mlx5_lag_set_flags(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,struct lag_tracker * tracker,bool shared_fdb,unsigned long * flags)468 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
469 			      struct lag_tracker *tracker, bool shared_fdb,
470 			      unsigned long *flags)
471 {
472 	bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
473 
474 	*flags = 0;
475 	if (shared_fdb) {
476 		set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
477 		set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
478 	}
479 
480 	if (mode == MLX5_LAG_MODE_MPESW)
481 		set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
482 
483 	if (roce_lag)
484 		return mlx5_lag_set_port_sel_mode_roce(ldev, flags);
485 
486 	mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags);
487 	return 0;
488 }
489 
mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)490 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
491 {
492 	int port_sel_mode = get_port_sel_mode(mode, flags);
493 
494 	switch (port_sel_mode) {
495 	case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
496 	case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash";
497 	case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw";
498 	default: return "invalid";
499 	}
500 }
501 
mlx5_create_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,unsigned long flags)502 static int mlx5_create_lag(struct mlx5_lag *ldev,
503 			   struct lag_tracker *tracker,
504 			   enum mlx5_lag_mode mode,
505 			   unsigned long flags)
506 {
507 	bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
508 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
509 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
510 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
511 	int err;
512 
513 	if (tracker)
514 		mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
515 	mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
516 		       shared_fdb, mlx5_get_str_port_sel_mode(mode, flags));
517 
518 	err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags);
519 	if (err) {
520 		mlx5_core_err(dev0,
521 			      "Failed to create LAG (%d)\n",
522 			      err);
523 		return err;
524 	}
525 
526 	if (shared_fdb) {
527 		err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
528 							      dev1->priv.eswitch);
529 		if (err)
530 			mlx5_core_err(dev0, "Can't enable single FDB mode\n");
531 		else
532 			mlx5_core_info(dev0, "Operation mode is single FDB\n");
533 	}
534 
535 	if (err) {
536 		MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
537 		if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
538 			mlx5_core_err(dev0,
539 				      "Failed to deactivate RoCE LAG; driver restart required\n");
540 	}
541 
542 	return err;
543 }
544 
mlx5_activate_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,bool shared_fdb)545 int mlx5_activate_lag(struct mlx5_lag *ldev,
546 		      struct lag_tracker *tracker,
547 		      enum mlx5_lag_mode mode,
548 		      bool shared_fdb)
549 {
550 	bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
551 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
552 	unsigned long flags = 0;
553 	int err;
554 
555 	err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
556 	if (err)
557 		return err;
558 
559 	if (mode != MLX5_LAG_MODE_MPESW) {
560 		mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map);
561 		if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
562 			err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
563 						       ldev->v2p_map);
564 			if (err) {
565 				mlx5_core_err(dev0,
566 					      "Failed to create LAG port selection(%d)\n",
567 					      err);
568 				return err;
569 			}
570 		}
571 	}
572 
573 	err = mlx5_create_lag(ldev, tracker, mode, flags);
574 	if (err) {
575 		if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
576 			mlx5_lag_port_sel_destroy(ldev);
577 		if (roce_lag)
578 			mlx5_core_err(dev0,
579 				      "Failed to activate RoCE LAG\n");
580 		else
581 			mlx5_core_err(dev0,
582 				      "Failed to activate VF LAG\n"
583 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
584 		return err;
585 	}
586 
587 	if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
588 	    !roce_lag)
589 		mlx5_lag_drop_rule_setup(ldev, tracker);
590 
591 	ldev->mode = mode;
592 	ldev->mode_flags = flags;
593 	return 0;
594 }
595 
mlx5_deactivate_lag(struct mlx5_lag * ldev)596 static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
597 {
598 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
599 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
600 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
601 	bool roce_lag = __mlx5_lag_is_roce(ldev);
602 	unsigned long flags = ldev->mode_flags;
603 	int err;
604 
605 	ldev->mode = MLX5_LAG_MODE_NONE;
606 	ldev->mode_flags = 0;
607 	mlx5_lag_mp_reset(ldev);
608 
609 	if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
610 		mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch,
611 							 dev1->priv.eswitch);
612 		clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
613 	}
614 
615 	MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
616 	err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
617 	if (err) {
618 		if (roce_lag) {
619 			mlx5_core_err(dev0,
620 				      "Failed to deactivate RoCE LAG; driver restart required\n");
621 		} else {
622 			mlx5_core_err(dev0,
623 				      "Failed to deactivate VF LAG; driver restart required\n"
624 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
625 		}
626 		return err;
627 	}
628 
629 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
630 		mlx5_lag_port_sel_destroy(ldev);
631 	if (mlx5_lag_has_drop_rule(ldev))
632 		mlx5_lag_drop_rule_cleanup(ldev);
633 
634 	return 0;
635 }
636 
637 #define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2
mlx5_lag_check_prereq(struct mlx5_lag * ldev)638 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
639 {
640 #ifdef CONFIG_MLX5_ESWITCH
641 	u8 mode;
642 #endif
643 	int i;
644 
645 	for (i = 0; i < ldev->ports; i++)
646 		if (!ldev->pf[i].dev)
647 			return false;
648 
649 #ifdef CONFIG_MLX5_ESWITCH
650 	mode = mlx5_eswitch_mode(ldev->pf[MLX5_LAG_P1].dev);
651 
652 	if (mode != MLX5_ESWITCH_NONE && mode != MLX5_ESWITCH_OFFLOADS)
653 		return false;
654 
655 	for (i = 0; i < ldev->ports; i++)
656 		if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode)
657 			return false;
658 
659 	if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports != MLX5_LAG_OFFLOADS_SUPPORTED_PORTS)
660 		return false;
661 #else
662 	for (i = 0; i < ldev->ports; i++)
663 		if (mlx5_sriov_is_enabled(ldev->pf[i].dev))
664 			return false;
665 #endif
666 	return true;
667 }
668 
mlx5_lag_add_devices(struct mlx5_lag * ldev)669 static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
670 {
671 	int i;
672 
673 	for (i = 0; i < ldev->ports; i++) {
674 		if (!ldev->pf[i].dev)
675 			continue;
676 
677 		if (ldev->pf[i].dev->priv.flags &
678 		    MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
679 			continue;
680 
681 		ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
682 		mlx5_rescan_drivers_locked(ldev->pf[i].dev);
683 	}
684 }
685 
mlx5_lag_remove_devices(struct mlx5_lag * ldev)686 static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
687 {
688 	int i;
689 
690 	for (i = 0; i < ldev->ports; i++) {
691 		if (!ldev->pf[i].dev)
692 			continue;
693 
694 		if (ldev->pf[i].dev->priv.flags &
695 		    MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
696 			continue;
697 
698 		ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
699 		mlx5_rescan_drivers_locked(ldev->pf[i].dev);
700 	}
701 }
702 
mlx5_disable_lag(struct mlx5_lag * ldev)703 void mlx5_disable_lag(struct mlx5_lag *ldev)
704 {
705 	bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
706 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
707 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
708 	bool roce_lag;
709 	int err;
710 	int i;
711 
712 	roce_lag = __mlx5_lag_is_roce(ldev);
713 
714 	if (shared_fdb) {
715 		mlx5_lag_remove_devices(ldev);
716 	} else if (roce_lag) {
717 		if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
718 			dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
719 			mlx5_rescan_drivers_locked(dev0);
720 		}
721 		for (i = 1; i < ldev->ports; i++)
722 			mlx5_nic_vport_disable_roce(ldev->pf[i].dev);
723 	}
724 
725 	err = mlx5_deactivate_lag(ldev);
726 	if (err)
727 		return;
728 
729 	if (shared_fdb || roce_lag)
730 		mlx5_lag_add_devices(ldev);
731 
732 	if (shared_fdb) {
733 		if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
734 			mlx5_eswitch_reload_reps(dev0->priv.eswitch);
735 		if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
736 			mlx5_eswitch_reload_reps(dev1->priv.eswitch);
737 	}
738 }
739 
mlx5_shared_fdb_supported(struct mlx5_lag * ldev)740 bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
741 {
742 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
743 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
744 
745 	if (is_mdev_switchdev_mode(dev0) &&
746 	    is_mdev_switchdev_mode(dev1) &&
747 	    mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
748 	    mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
749 	    mlx5_devcom_is_paired(dev0->priv.devcom,
750 				  MLX5_DEVCOM_ESW_OFFLOADS) &&
751 	    MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
752 	    MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
753 	    MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
754 		return true;
755 
756 	return false;
757 }
758 
mlx5_lag_is_roce_lag(struct mlx5_lag * ldev)759 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
760 {
761 	bool roce_lag = true;
762 	int i;
763 
764 	for (i = 0; i < ldev->ports; i++)
765 		roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev);
766 
767 #ifdef CONFIG_MLX5_ESWITCH
768 	for (i = 0; i < ldev->ports; i++)
769 		roce_lag = roce_lag &&
770 			ldev->pf[i].dev->priv.eswitch->mode == MLX5_ESWITCH_NONE;
771 #endif
772 
773 	return roce_lag;
774 }
775 
mlx5_lag_should_modify_lag(struct mlx5_lag * ldev,bool do_bond)776 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond)
777 {
778 	return do_bond && __mlx5_lag_is_active(ldev) &&
779 	       ldev->mode != MLX5_LAG_MODE_MPESW;
780 }
781 
mlx5_lag_should_disable_lag(struct mlx5_lag * ldev,bool do_bond)782 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
783 {
784 	return !do_bond && __mlx5_lag_is_active(ldev) &&
785 	       ldev->mode != MLX5_LAG_MODE_MPESW;
786 }
787 
mlx5_do_bond(struct mlx5_lag * ldev)788 static void mlx5_do_bond(struct mlx5_lag *ldev)
789 {
790 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
791 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
792 	struct lag_tracker tracker = { };
793 	bool do_bond, roce_lag;
794 	int err;
795 	int i;
796 
797 	if (!mlx5_lag_is_ready(ldev)) {
798 		do_bond = false;
799 	} else {
800 		/* VF LAG is in multipath mode, ignore bond change requests */
801 		if (mlx5_lag_is_multipath(dev0))
802 			return;
803 
804 		tracker = ldev->tracker;
805 
806 		do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
807 	}
808 
809 	if (do_bond && !__mlx5_lag_is_active(ldev)) {
810 		bool shared_fdb = mlx5_shared_fdb_supported(ldev);
811 
812 		roce_lag = mlx5_lag_is_roce_lag(ldev);
813 
814 		if (shared_fdb || roce_lag)
815 			mlx5_lag_remove_devices(ldev);
816 
817 		err = mlx5_activate_lag(ldev, &tracker,
818 					roce_lag ? MLX5_LAG_MODE_ROCE :
819 						   MLX5_LAG_MODE_SRIOV,
820 					shared_fdb);
821 		if (err) {
822 			if (shared_fdb || roce_lag)
823 				mlx5_lag_add_devices(ldev);
824 
825 			return;
826 		} else if (roce_lag) {
827 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
828 			mlx5_rescan_drivers_locked(dev0);
829 			for (i = 1; i < ldev->ports; i++)
830 				mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
831 		} else if (shared_fdb) {
832 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
833 			mlx5_rescan_drivers_locked(dev0);
834 
835 			err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
836 			if (!err)
837 				err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
838 
839 			if (err) {
840 				dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
841 				mlx5_rescan_drivers_locked(dev0);
842 				mlx5_deactivate_lag(ldev);
843 				mlx5_lag_add_devices(ldev);
844 				mlx5_eswitch_reload_reps(dev0->priv.eswitch);
845 				mlx5_eswitch_reload_reps(dev1->priv.eswitch);
846 				mlx5_core_err(dev0, "Failed to enable lag\n");
847 				return;
848 			}
849 		}
850 	} else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
851 		mlx5_modify_lag(ldev, &tracker);
852 	} else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
853 		mlx5_disable_lag(ldev);
854 	}
855 }
856 
mlx5_queue_bond_work(struct mlx5_lag * ldev,unsigned long delay)857 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
858 {
859 	queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
860 }
861 
mlx5_do_bond_work(struct work_struct * work)862 static void mlx5_do_bond_work(struct work_struct *work)
863 {
864 	struct delayed_work *delayed_work = to_delayed_work(work);
865 	struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
866 					     bond_work);
867 	int status;
868 
869 	status = mlx5_dev_list_trylock();
870 	if (!status) {
871 		mlx5_queue_bond_work(ldev, HZ);
872 		return;
873 	}
874 
875 	mutex_lock(&ldev->lock);
876 	if (ldev->mode_changes_in_progress) {
877 		mutex_unlock(&ldev->lock);
878 		mlx5_dev_list_unlock();
879 		mlx5_queue_bond_work(ldev, HZ);
880 		return;
881 	}
882 
883 	mlx5_do_bond(ldev);
884 	mutex_unlock(&ldev->lock);
885 	mlx5_dev_list_unlock();
886 }
887 
mlx5_handle_changeupper_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct netdev_notifier_changeupper_info * info)888 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
889 					 struct lag_tracker *tracker,
890 					 struct netdev_notifier_changeupper_info *info)
891 {
892 	struct net_device *upper = info->upper_dev, *ndev_tmp;
893 	struct netdev_lag_upper_info *lag_upper_info = NULL;
894 	bool is_bonded, is_in_lag, mode_supported;
895 	bool has_inactive = 0;
896 	struct slave *slave;
897 	u8 bond_status = 0;
898 	int num_slaves = 0;
899 	int changed = 0;
900 	int idx;
901 
902 	if (!netif_is_lag_master(upper))
903 		return 0;
904 
905 	if (info->linking)
906 		lag_upper_info = info->upper_info;
907 
908 	/* The event may still be of interest if the slave does not belong to
909 	 * us, but is enslaved to a master which has one or more of our netdevs
910 	 * as slaves (e.g., if a new slave is added to a master that bonds two
911 	 * of our netdevs, we should unbond).
912 	 */
913 	rcu_read_lock();
914 	for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
915 		idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
916 		if (idx >= 0) {
917 			slave = bond_slave_get_rcu(ndev_tmp);
918 			if (slave)
919 				has_inactive |= bond_is_slave_inactive(slave);
920 			bond_status |= (1 << idx);
921 		}
922 
923 		num_slaves++;
924 	}
925 	rcu_read_unlock();
926 
927 	/* None of this lagdev's netdevs are slaves of this master. */
928 	if (!(bond_status & GENMASK(ldev->ports - 1, 0)))
929 		return 0;
930 
931 	if (lag_upper_info) {
932 		tracker->tx_type = lag_upper_info->tx_type;
933 		tracker->hash_type = lag_upper_info->hash_type;
934 	}
935 
936 	tracker->has_inactive = has_inactive;
937 	/* Determine bonding status:
938 	 * A device is considered bonded if both its physical ports are slaves
939 	 * of the same lag master, and only them.
940 	 */
941 	is_in_lag = num_slaves == ldev->ports &&
942 		bond_status == GENMASK(ldev->ports - 1, 0);
943 
944 	/* Lag mode must be activebackup or hash. */
945 	mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
946 			 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
947 
948 	is_bonded = is_in_lag && mode_supported;
949 	if (tracker->is_bonded != is_bonded) {
950 		tracker->is_bonded = is_bonded;
951 		changed = 1;
952 	}
953 
954 	if (!is_in_lag)
955 		return changed;
956 
957 	if (!mlx5_lag_is_ready(ldev))
958 		NL_SET_ERR_MSG_MOD(info->info.extack,
959 				   "Can't activate LAG offload, PF is configured with more than 64 VFs");
960 	else if (!mode_supported)
961 		NL_SET_ERR_MSG_MOD(info->info.extack,
962 				   "Can't activate LAG offload, TX type isn't supported");
963 
964 	return changed;
965 }
966 
mlx5_handle_changelowerstate_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changelowerstate_info * info)967 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
968 					      struct lag_tracker *tracker,
969 					      struct net_device *ndev,
970 					      struct netdev_notifier_changelowerstate_info *info)
971 {
972 	struct netdev_lag_lower_state_info *lag_lower_info;
973 	int idx;
974 
975 	if (!netif_is_lag_port(ndev))
976 		return 0;
977 
978 	idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
979 	if (idx < 0)
980 		return 0;
981 
982 	/* This information is used to determine virtual to physical
983 	 * port mapping.
984 	 */
985 	lag_lower_info = info->lower_state_info;
986 	if (!lag_lower_info)
987 		return 0;
988 
989 	tracker->netdev_state[idx] = *lag_lower_info;
990 
991 	return 1;
992 }
993 
mlx5_handle_changeinfodata_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev)994 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
995 					    struct lag_tracker *tracker,
996 					    struct net_device *ndev)
997 {
998 	struct net_device *ndev_tmp;
999 	struct slave *slave;
1000 	bool has_inactive = 0;
1001 	int idx;
1002 
1003 	if (!netif_is_lag_master(ndev))
1004 		return 0;
1005 
1006 	rcu_read_lock();
1007 	for_each_netdev_in_bond_rcu(ndev, ndev_tmp) {
1008 		idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
1009 		if (idx < 0)
1010 			continue;
1011 
1012 		slave = bond_slave_get_rcu(ndev_tmp);
1013 		if (slave)
1014 			has_inactive |= bond_is_slave_inactive(slave);
1015 	}
1016 	rcu_read_unlock();
1017 
1018 	if (tracker->has_inactive == has_inactive)
1019 		return 0;
1020 
1021 	tracker->has_inactive = has_inactive;
1022 
1023 	return 1;
1024 }
1025 
1026 /* this handler is always registered to netdev events */
mlx5_lag_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)1027 static int mlx5_lag_netdev_event(struct notifier_block *this,
1028 				 unsigned long event, void *ptr)
1029 {
1030 	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
1031 	struct lag_tracker tracker;
1032 	struct mlx5_lag *ldev;
1033 	int changed = 0;
1034 
1035 	if (event != NETDEV_CHANGEUPPER &&
1036 	    event != NETDEV_CHANGELOWERSTATE &&
1037 	    event != NETDEV_CHANGEINFODATA)
1038 		return NOTIFY_DONE;
1039 
1040 	ldev    = container_of(this, struct mlx5_lag, nb);
1041 
1042 	tracker = ldev->tracker;
1043 
1044 	switch (event) {
1045 	case NETDEV_CHANGEUPPER:
1046 		changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr);
1047 		break;
1048 	case NETDEV_CHANGELOWERSTATE:
1049 		changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
1050 							     ndev, ptr);
1051 		break;
1052 	case NETDEV_CHANGEINFODATA:
1053 		changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev);
1054 		break;
1055 	}
1056 
1057 	ldev->tracker = tracker;
1058 
1059 	if (changed)
1060 		mlx5_queue_bond_work(ldev, 0);
1061 
1062 	return NOTIFY_DONE;
1063 }
1064 
mlx5_ldev_add_netdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev,struct net_device * netdev)1065 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
1066 				 struct mlx5_core_dev *dev,
1067 				 struct net_device *netdev)
1068 {
1069 	unsigned int fn = mlx5_get_dev_index(dev);
1070 	unsigned long flags;
1071 
1072 	if (fn >= ldev->ports)
1073 		return;
1074 
1075 	spin_lock_irqsave(&lag_lock, flags);
1076 	ldev->pf[fn].netdev = netdev;
1077 	ldev->tracker.netdev_state[fn].link_up = 0;
1078 	ldev->tracker.netdev_state[fn].tx_enabled = 0;
1079 	spin_unlock_irqrestore(&lag_lock, flags);
1080 }
1081 
mlx5_ldev_remove_netdev(struct mlx5_lag * ldev,struct net_device * netdev)1082 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
1083 				    struct net_device *netdev)
1084 {
1085 	unsigned long flags;
1086 	int i;
1087 
1088 	spin_lock_irqsave(&lag_lock, flags);
1089 	for (i = 0; i < ldev->ports; i++) {
1090 		if (ldev->pf[i].netdev == netdev) {
1091 			ldev->pf[i].netdev = NULL;
1092 			break;
1093 		}
1094 	}
1095 	spin_unlock_irqrestore(&lag_lock, flags);
1096 }
1097 
mlx5_ldev_add_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1098 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
1099 			       struct mlx5_core_dev *dev)
1100 {
1101 	unsigned int fn = mlx5_get_dev_index(dev);
1102 
1103 	if (fn >= ldev->ports)
1104 		return;
1105 
1106 	ldev->pf[fn].dev = dev;
1107 	dev->priv.lag = ldev;
1108 }
1109 
mlx5_ldev_remove_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1110 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
1111 				  struct mlx5_core_dev *dev)
1112 {
1113 	int i;
1114 
1115 	for (i = 0; i < ldev->ports; i++)
1116 		if (ldev->pf[i].dev == dev)
1117 			break;
1118 
1119 	if (i == ldev->ports)
1120 		return;
1121 
1122 	ldev->pf[i].dev = NULL;
1123 	dev->priv.lag = NULL;
1124 }
1125 
1126 /* Must be called with intf_mutex held */
__mlx5_lag_dev_add_mdev(struct mlx5_core_dev * dev)1127 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
1128 {
1129 	struct mlx5_lag *ldev = NULL;
1130 	struct mlx5_core_dev *tmp_dev;
1131 
1132 	tmp_dev = mlx5_get_next_phys_dev_lag(dev);
1133 	if (tmp_dev)
1134 		ldev = tmp_dev->priv.lag;
1135 
1136 	if (!ldev) {
1137 		ldev = mlx5_lag_dev_alloc(dev);
1138 		if (!ldev) {
1139 			mlx5_core_err(dev, "Failed to alloc lag dev\n");
1140 			return 0;
1141 		}
1142 		mlx5_ldev_add_mdev(ldev, dev);
1143 		return 0;
1144 	}
1145 
1146 	mutex_lock(&ldev->lock);
1147 	if (ldev->mode_changes_in_progress) {
1148 		mutex_unlock(&ldev->lock);
1149 		return -EAGAIN;
1150 	}
1151 	mlx5_ldev_get(ldev);
1152 	mlx5_ldev_add_mdev(ldev, dev);
1153 	mutex_unlock(&ldev->lock);
1154 
1155 	return 0;
1156 }
1157 
mlx5_lag_remove_mdev(struct mlx5_core_dev * dev)1158 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
1159 {
1160 	struct mlx5_lag *ldev;
1161 
1162 	ldev = mlx5_lag_dev(dev);
1163 	if (!ldev)
1164 		return;
1165 
1166 	/* mdev is being removed, might as well remove debugfs
1167 	 * as early as possible.
1168 	 */
1169 	mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
1170 recheck:
1171 	mutex_lock(&ldev->lock);
1172 	if (ldev->mode_changes_in_progress) {
1173 		mutex_unlock(&ldev->lock);
1174 		msleep(100);
1175 		goto recheck;
1176 	}
1177 	mlx5_ldev_remove_mdev(ldev, dev);
1178 	mutex_unlock(&ldev->lock);
1179 	mlx5_ldev_put(ldev);
1180 }
1181 
mlx5_lag_add_mdev(struct mlx5_core_dev * dev)1182 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
1183 {
1184 	int err;
1185 
1186 	if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
1187 	    !MLX5_CAP_GEN(dev, lag_master) ||
1188 	    (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS ||
1189 	     MLX5_CAP_GEN(dev, num_lag_ports) <= 1))
1190 		return;
1191 
1192 recheck:
1193 	mlx5_dev_list_lock();
1194 	err = __mlx5_lag_dev_add_mdev(dev);
1195 	mlx5_dev_list_unlock();
1196 
1197 	if (err) {
1198 		msleep(100);
1199 		goto recheck;
1200 	}
1201 	mlx5_ldev_add_debugfs(dev);
1202 }
1203 
mlx5_lag_remove_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)1204 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
1205 			    struct net_device *netdev)
1206 {
1207 	struct mlx5_lag *ldev;
1208 	bool lag_is_active;
1209 
1210 	ldev = mlx5_lag_dev(dev);
1211 	if (!ldev)
1212 		return;
1213 
1214 	mutex_lock(&ldev->lock);
1215 	mlx5_ldev_remove_netdev(ldev, netdev);
1216 	clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1217 
1218 	lag_is_active = __mlx5_lag_is_active(ldev);
1219 	mutex_unlock(&ldev->lock);
1220 
1221 	if (lag_is_active)
1222 		mlx5_queue_bond_work(ldev, 0);
1223 }
1224 
mlx5_lag_add_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)1225 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
1226 			 struct net_device *netdev)
1227 {
1228 	struct mlx5_lag *ldev;
1229 	int i;
1230 
1231 	ldev = mlx5_lag_dev(dev);
1232 	if (!ldev)
1233 		return;
1234 
1235 	mutex_lock(&ldev->lock);
1236 	mlx5_ldev_add_netdev(ldev, dev, netdev);
1237 
1238 	for (i = 0; i < ldev->ports; i++)
1239 		if (!ldev->pf[i].netdev)
1240 			break;
1241 
1242 	if (i >= ldev->ports)
1243 		set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1244 	mutex_unlock(&ldev->lock);
1245 	mlx5_queue_bond_work(ldev, 0);
1246 }
1247 
mlx5_lag_is_roce(struct mlx5_core_dev * dev)1248 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
1249 {
1250 	struct mlx5_lag *ldev;
1251 	unsigned long flags;
1252 	bool res;
1253 
1254 	spin_lock_irqsave(&lag_lock, flags);
1255 	ldev = mlx5_lag_dev(dev);
1256 	res  = ldev && __mlx5_lag_is_roce(ldev);
1257 	spin_unlock_irqrestore(&lag_lock, flags);
1258 
1259 	return res;
1260 }
1261 EXPORT_SYMBOL(mlx5_lag_is_roce);
1262 
mlx5_lag_is_active(struct mlx5_core_dev * dev)1263 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
1264 {
1265 	struct mlx5_lag *ldev;
1266 	unsigned long flags;
1267 	bool res;
1268 
1269 	spin_lock_irqsave(&lag_lock, flags);
1270 	ldev = mlx5_lag_dev(dev);
1271 	res  = ldev && __mlx5_lag_is_active(ldev);
1272 	spin_unlock_irqrestore(&lag_lock, flags);
1273 
1274 	return res;
1275 }
1276 EXPORT_SYMBOL(mlx5_lag_is_active);
1277 
mlx5_lag_is_master(struct mlx5_core_dev * dev)1278 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
1279 {
1280 	struct mlx5_lag *ldev;
1281 	unsigned long flags;
1282 	bool res;
1283 
1284 	spin_lock_irqsave(&lag_lock, flags);
1285 	ldev = mlx5_lag_dev(dev);
1286 	res = ldev && __mlx5_lag_is_active(ldev) &&
1287 		dev == ldev->pf[MLX5_LAG_P1].dev;
1288 	spin_unlock_irqrestore(&lag_lock, flags);
1289 
1290 	return res;
1291 }
1292 EXPORT_SYMBOL(mlx5_lag_is_master);
1293 
mlx5_lag_is_sriov(struct mlx5_core_dev * dev)1294 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
1295 {
1296 	struct mlx5_lag *ldev;
1297 	unsigned long flags;
1298 	bool res;
1299 
1300 	spin_lock_irqsave(&lag_lock, flags);
1301 	ldev = mlx5_lag_dev(dev);
1302 	res  = ldev && __mlx5_lag_is_sriov(ldev);
1303 	spin_unlock_irqrestore(&lag_lock, flags);
1304 
1305 	return res;
1306 }
1307 EXPORT_SYMBOL(mlx5_lag_is_sriov);
1308 
mlx5_lag_is_shared_fdb(struct mlx5_core_dev * dev)1309 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
1310 {
1311 	struct mlx5_lag *ldev;
1312 	unsigned long flags;
1313 	bool res;
1314 
1315 	spin_lock_irqsave(&lag_lock, flags);
1316 	ldev = mlx5_lag_dev(dev);
1317 	res = ldev && __mlx5_lag_is_sriov(ldev) &&
1318 	      test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
1319 	spin_unlock_irqrestore(&lag_lock, flags);
1320 
1321 	return res;
1322 }
1323 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
1324 
mlx5_lag_disable_change(struct mlx5_core_dev * dev)1325 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
1326 {
1327 	struct mlx5_lag *ldev;
1328 
1329 	ldev = mlx5_lag_dev(dev);
1330 	if (!ldev)
1331 		return;
1332 
1333 	mlx5_dev_list_lock();
1334 	mutex_lock(&ldev->lock);
1335 
1336 	ldev->mode_changes_in_progress++;
1337 	if (__mlx5_lag_is_active(ldev))
1338 		mlx5_disable_lag(ldev);
1339 
1340 	mutex_unlock(&ldev->lock);
1341 	mlx5_dev_list_unlock();
1342 }
1343 
mlx5_lag_enable_change(struct mlx5_core_dev * dev)1344 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
1345 {
1346 	struct mlx5_lag *ldev;
1347 
1348 	ldev = mlx5_lag_dev(dev);
1349 	if (!ldev)
1350 		return;
1351 
1352 	mutex_lock(&ldev->lock);
1353 	ldev->mode_changes_in_progress--;
1354 	mutex_unlock(&ldev->lock);
1355 	mlx5_queue_bond_work(ldev, 0);
1356 }
1357 
mlx5_lag_get_roce_netdev(struct mlx5_core_dev * dev)1358 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
1359 {
1360 	struct net_device *ndev = NULL;
1361 	struct mlx5_lag *ldev;
1362 	unsigned long flags;
1363 	int i;
1364 
1365 	spin_lock_irqsave(&lag_lock, flags);
1366 	ldev = mlx5_lag_dev(dev);
1367 
1368 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
1369 		goto unlock;
1370 
1371 	if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
1372 		for (i = 0; i < ldev->ports; i++)
1373 			if (ldev->tracker.netdev_state[i].tx_enabled)
1374 				ndev = ldev->pf[i].netdev;
1375 		if (!ndev)
1376 			ndev = ldev->pf[ldev->ports - 1].netdev;
1377 	} else {
1378 		ndev = ldev->pf[MLX5_LAG_P1].netdev;
1379 	}
1380 	if (ndev)
1381 		dev_hold(ndev);
1382 
1383 unlock:
1384 	spin_unlock_irqrestore(&lag_lock, flags);
1385 
1386 	return ndev;
1387 }
1388 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
1389 
mlx5_lag_get_slave_port(struct mlx5_core_dev * dev,struct net_device * slave)1390 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
1391 			   struct net_device *slave)
1392 {
1393 	struct mlx5_lag *ldev;
1394 	unsigned long flags;
1395 	u8 port = 0;
1396 	int i;
1397 
1398 	spin_lock_irqsave(&lag_lock, flags);
1399 	ldev = mlx5_lag_dev(dev);
1400 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
1401 		goto unlock;
1402 
1403 	for (i = 0; i < ldev->ports; i++) {
1404 		if (ldev->pf[MLX5_LAG_P1].netdev == slave) {
1405 			port = i;
1406 			break;
1407 		}
1408 	}
1409 
1410 	port = ldev->v2p_map[port * ldev->buckets];
1411 
1412 unlock:
1413 	spin_unlock_irqrestore(&lag_lock, flags);
1414 	return port;
1415 }
1416 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
1417 
mlx5_lag_get_num_ports(struct mlx5_core_dev * dev)1418 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
1419 {
1420 	struct mlx5_lag *ldev;
1421 
1422 	ldev = mlx5_lag_dev(dev);
1423 	if (!ldev)
1424 		return 0;
1425 
1426 	return ldev->ports;
1427 }
1428 EXPORT_SYMBOL(mlx5_lag_get_num_ports);
1429 
mlx5_lag_get_peer_mdev(struct mlx5_core_dev * dev)1430 struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
1431 {
1432 	struct mlx5_core_dev *peer_dev = NULL;
1433 	struct mlx5_lag *ldev;
1434 	unsigned long flags;
1435 
1436 	spin_lock_irqsave(&lag_lock, flags);
1437 	ldev = mlx5_lag_dev(dev);
1438 	if (!ldev)
1439 		goto unlock;
1440 
1441 	peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
1442 			   ldev->pf[MLX5_LAG_P2].dev :
1443 			   ldev->pf[MLX5_LAG_P1].dev;
1444 
1445 unlock:
1446 	spin_unlock_irqrestore(&lag_lock, flags);
1447 	return peer_dev;
1448 }
1449 EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
1450 
mlx5_lag_query_cong_counters(struct mlx5_core_dev * dev,u64 * values,int num_counters,size_t * offsets)1451 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1452 				 u64 *values,
1453 				 int num_counters,
1454 				 size_t *offsets)
1455 {
1456 	int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
1457 	struct mlx5_core_dev **mdev;
1458 	struct mlx5_lag *ldev;
1459 	unsigned long flags;
1460 	int num_ports;
1461 	int ret, i, j;
1462 	void *out;
1463 
1464 	out = kvzalloc(outlen, GFP_KERNEL);
1465 	if (!out)
1466 		return -ENOMEM;
1467 
1468 	mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL);
1469 	if (!mdev) {
1470 		ret = -ENOMEM;
1471 		goto free_out;
1472 	}
1473 
1474 	memset(values, 0, sizeof(*values) * num_counters);
1475 
1476 	spin_lock_irqsave(&lag_lock, flags);
1477 	ldev = mlx5_lag_dev(dev);
1478 	if (ldev && __mlx5_lag_is_active(ldev)) {
1479 		num_ports = ldev->ports;
1480 		for (i = 0; i < ldev->ports; i++)
1481 			mdev[i] = ldev->pf[i].dev;
1482 	} else {
1483 		num_ports = 1;
1484 		mdev[MLX5_LAG_P1] = dev;
1485 	}
1486 	spin_unlock_irqrestore(&lag_lock, flags);
1487 
1488 	for (i = 0; i < num_ports; ++i) {
1489 		u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
1490 
1491 		MLX5_SET(query_cong_statistics_in, in, opcode,
1492 			 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
1493 		ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
1494 					  out);
1495 		if (ret)
1496 			goto free_mdev;
1497 
1498 		for (j = 0; j < num_counters; ++j)
1499 			values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
1500 	}
1501 
1502 free_mdev:
1503 	kvfree(mdev);
1504 free_out:
1505 	kvfree(out);
1506 	return ret;
1507 }
1508 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
1509