1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies. */
3 
4 #include <net/dst_metadata.h>
5 #include <linux/netdevice.h>
6 #include <linux/if_macvlan.h>
7 #include <linux/list.h>
8 #include <linux/rculist.h>
9 #include <linux/rtnetlink.h>
10 #include <linux/workqueue.h>
11 #include <linux/spinlock.h>
12 #include "tc.h"
13 #include "neigh.h"
14 #include "en_rep.h"
15 #include "eswitch.h"
16 #include "lib/fs_chains.h"
17 #include "en/tc_ct.h"
18 #include "en/mapping.h"
19 #include "en/tc_tun.h"
20 #include "lib/port_tun.h"
21 #include "en/tc/sample.h"
22 #include "en_accel/ipsec_rxtx.h"
23 #include "en/tc/int_port.h"
24 #include "en/tc/act/act.h"
25 
26 struct mlx5e_rep_indr_block_priv {
27 	struct net_device *netdev;
28 	struct mlx5e_rep_priv *rpriv;
29 	enum flow_block_binder_type binder_type;
30 
31 	struct list_head list;
32 };
33 
mlx5e_rep_encap_entry_attach(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct mlx5e_neigh * m_neigh,struct net_device * neigh_dev)34 int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
35 				 struct mlx5e_encap_entry *e,
36 				 struct mlx5e_neigh *m_neigh,
37 				 struct net_device *neigh_dev)
38 {
39 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
40 	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
41 	struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
42 	struct mlx5e_neigh_hash_entry *nhe;
43 	int err;
44 
45 	err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type);
46 	if (err)
47 		return err;
48 
49 	mutex_lock(&rpriv->neigh_update.encap_lock);
50 	nhe = mlx5e_rep_neigh_entry_lookup(priv, m_neigh);
51 	if (!nhe) {
52 		err = mlx5e_rep_neigh_entry_create(priv, m_neigh, neigh_dev, &nhe);
53 		if (err) {
54 			mutex_unlock(&rpriv->neigh_update.encap_lock);
55 			mlx5_tun_entropy_refcount_dec(tun_entropy,
56 						      e->reformat_type);
57 			return err;
58 		}
59 	}
60 
61 	e->nhe = nhe;
62 	spin_lock(&nhe->encap_list_lock);
63 	list_add_rcu(&e->encap_list, &nhe->encap_list);
64 	spin_unlock(&nhe->encap_list_lock);
65 
66 	mutex_unlock(&rpriv->neigh_update.encap_lock);
67 
68 	return 0;
69 }
70 
mlx5e_rep_encap_entry_detach(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)71 void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
72 				  struct mlx5e_encap_entry *e)
73 {
74 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
75 	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
76 	struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
77 
78 	if (!e->nhe)
79 		return;
80 
81 	spin_lock(&e->nhe->encap_list_lock);
82 	list_del_rcu(&e->encap_list);
83 	spin_unlock(&e->nhe->encap_list_lock);
84 
85 	mlx5e_rep_neigh_entry_release(e->nhe);
86 	e->nhe = NULL;
87 	mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type);
88 }
89 
mlx5e_rep_update_flows(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,bool neigh_connected,unsigned char ha[ETH_ALEN])90 void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
91 			    struct mlx5e_encap_entry *e,
92 			    bool neigh_connected,
93 			    unsigned char ha[ETH_ALEN])
94 {
95 	struct ethhdr *eth = (struct ethhdr *)e->encap_header;
96 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
97 	bool encap_connected;
98 	LIST_HEAD(flow_list);
99 
100 	ASSERT_RTNL();
101 
102 	mutex_lock(&esw->offloads.encap_tbl_lock);
103 	encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
104 	if (encap_connected == neigh_connected && ether_addr_equal(e->h_dest, ha))
105 		goto unlock;
106 
107 	mlx5e_take_all_encap_flows(e, &flow_list);
108 
109 	if ((e->flags & MLX5_ENCAP_ENTRY_VALID) &&
110 	    (!neigh_connected || !ether_addr_equal(e->h_dest, ha)))
111 		mlx5e_tc_encap_flows_del(priv, e, &flow_list);
112 
113 	if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
114 		struct net_device *route_dev;
115 
116 		ether_addr_copy(e->h_dest, ha);
117 		ether_addr_copy(eth->h_dest, ha);
118 		/* Update the encap source mac, in case that we delete
119 		 * the flows when encap source mac changed.
120 		 */
121 		route_dev = __dev_get_by_index(dev_net(priv->netdev), e->route_dev_ifindex);
122 		if (route_dev)
123 			ether_addr_copy(eth->h_source, route_dev->dev_addr);
124 
125 		mlx5e_tc_encap_flows_add(priv, e, &flow_list);
126 	}
127 unlock:
128 	mutex_unlock(&esw->offloads.encap_tbl_lock);
129 	mlx5e_put_flow_list(priv, &flow_list);
130 }
131 
132 static int
mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv * priv,struct flow_cls_offload * cls_flower,int flags)133 mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
134 			      struct flow_cls_offload *cls_flower, int flags)
135 {
136 	switch (cls_flower->command) {
137 	case FLOW_CLS_REPLACE:
138 		return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
139 					      flags);
140 	case FLOW_CLS_DESTROY:
141 		return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
142 					   flags);
143 	case FLOW_CLS_STATS:
144 		return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
145 					  flags);
146 	default:
147 		return -EOPNOTSUPP;
148 	}
149 }
150 
151 static
mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv * priv,struct tc_cls_matchall_offload * ma)152 int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv,
153 				    struct tc_cls_matchall_offload *ma)
154 {
155 	switch (ma->command) {
156 	case TC_CLSMATCHALL_REPLACE:
157 		return mlx5e_tc_configure_matchall(priv, ma);
158 	case TC_CLSMATCHALL_DESTROY:
159 		return mlx5e_tc_delete_matchall(priv, ma);
160 	case TC_CLSMATCHALL_STATS:
161 		mlx5e_tc_stats_matchall(priv, ma);
162 		return 0;
163 	default:
164 		return -EOPNOTSUPP;
165 	}
166 }
167 
mlx5e_rep_setup_tc_cb(enum tc_setup_type type,void * type_data,void * cb_priv)168 static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
169 				 void *cb_priv)
170 {
171 	unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
172 	struct mlx5e_priv *priv = cb_priv;
173 
174 	if (!priv->netdev || !netif_device_present(priv->netdev))
175 		return -EOPNOTSUPP;
176 
177 	switch (type) {
178 	case TC_SETUP_CLSFLOWER:
179 		return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags);
180 	case TC_SETUP_CLSMATCHALL:
181 		return mlx5e_rep_setup_tc_cls_matchall(priv, type_data);
182 	default:
183 		return -EOPNOTSUPP;
184 	}
185 }
186 
mlx5e_rep_setup_ft_cb(enum tc_setup_type type,void * type_data,void * cb_priv)187 static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
188 				 void *cb_priv)
189 {
190 	struct flow_cls_offload tmp, *f = type_data;
191 	struct mlx5e_priv *priv = cb_priv;
192 	struct mlx5_eswitch *esw;
193 	unsigned long flags;
194 	int err;
195 
196 	flags = MLX5_TC_FLAG(INGRESS) |
197 		MLX5_TC_FLAG(ESW_OFFLOAD) |
198 		MLX5_TC_FLAG(FT_OFFLOAD);
199 	esw = priv->mdev->priv.eswitch;
200 
201 	switch (type) {
202 	case TC_SETUP_CLSFLOWER:
203 		memcpy(&tmp, f, sizeof(*f));
204 
205 		if (!mlx5_chains_prios_supported(esw_chains(esw)))
206 			return -EOPNOTSUPP;
207 
208 		/* Re-use tc offload path by moving the ft flow to the
209 		 * reserved ft chain.
210 		 *
211 		 * FT offload can use prio range [0, INT_MAX], so we normalize
212 		 * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
213 		 * as with tc, where prio 0 isn't supported.
214 		 *
215 		 * We only support chain 0 of FT offload.
216 		 */
217 		if (tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw)))
218 			return -EOPNOTSUPP;
219 		if (tmp.common.chain_index != 0)
220 			return -EOPNOTSUPP;
221 
222 		tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw));
223 		tmp.common.prio++;
224 		err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags);
225 		memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
226 		return err;
227 	default:
228 		return -EOPNOTSUPP;
229 	}
230 }
231 
232 static LIST_HEAD(mlx5e_rep_block_tc_cb_list);
233 static LIST_HEAD(mlx5e_rep_block_ft_cb_list);
mlx5e_rep_setup_tc(struct net_device * dev,enum tc_setup_type type,void * type_data)234 int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
235 		       void *type_data)
236 {
237 	struct mlx5e_priv *priv = netdev_priv(dev);
238 	struct flow_block_offload *f = type_data;
239 
240 	f->unlocked_driver_cb = true;
241 
242 	switch (type) {
243 	case TC_SETUP_BLOCK:
244 		return flow_block_cb_setup_simple(type_data,
245 						  &mlx5e_rep_block_tc_cb_list,
246 						  mlx5e_rep_setup_tc_cb,
247 						  priv, priv, true);
248 	case TC_SETUP_FT:
249 		return flow_block_cb_setup_simple(type_data,
250 						  &mlx5e_rep_block_ft_cb_list,
251 						  mlx5e_rep_setup_ft_cb,
252 						  priv, priv, true);
253 	default:
254 		return -EOPNOTSUPP;
255 	}
256 }
257 
mlx5e_rep_tc_init(struct mlx5e_rep_priv * rpriv)258 int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv)
259 {
260 	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
261 	int err;
262 
263 	mutex_init(&uplink_priv->unready_flows_lock);
264 	INIT_LIST_HEAD(&uplink_priv->unready_flows);
265 
266 	/* init shared tc flow table */
267 	err = mlx5e_tc_esw_init(uplink_priv);
268 	return err;
269 }
270 
mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv * rpriv)271 void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv)
272 {
273 	/* delete shared tc flow table */
274 	mlx5e_tc_esw_cleanup(&rpriv->uplink_priv);
275 	mutex_destroy(&rpriv->uplink_priv.unready_flows_lock);
276 }
277 
mlx5e_rep_tc_enable(struct mlx5e_priv * priv)278 void mlx5e_rep_tc_enable(struct mlx5e_priv *priv)
279 {
280 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
281 
282 	INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work,
283 		  mlx5e_tc_reoffload_flows_work);
284 }
285 
mlx5e_rep_tc_disable(struct mlx5e_priv * priv)286 void mlx5e_rep_tc_disable(struct mlx5e_priv *priv)
287 {
288 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
289 
290 	cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
291 }
292 
mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv * priv)293 int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv)
294 {
295 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
296 
297 	queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work);
298 
299 	return NOTIFY_OK;
300 }
301 
302 static struct mlx5e_rep_indr_block_priv *
mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv * rpriv,struct net_device * netdev,enum flow_block_binder_type binder_type)303 mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv,
304 				 struct net_device *netdev,
305 				 enum flow_block_binder_type binder_type)
306 {
307 	struct mlx5e_rep_indr_block_priv *cb_priv;
308 
309 	list_for_each_entry(cb_priv,
310 			    &rpriv->uplink_priv.tc_indr_block_priv_list,
311 			    list)
312 		if (cb_priv->netdev == netdev &&
313 		    cb_priv->binder_type == binder_type)
314 			return cb_priv;
315 
316 	return NULL;
317 }
318 
319 static int
mlx5e_rep_indr_offload(struct net_device * netdev,struct flow_cls_offload * flower,struct mlx5e_rep_indr_block_priv * indr_priv,unsigned long flags)320 mlx5e_rep_indr_offload(struct net_device *netdev,
321 		       struct flow_cls_offload *flower,
322 		       struct mlx5e_rep_indr_block_priv *indr_priv,
323 		       unsigned long flags)
324 {
325 	struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev);
326 	int err = 0;
327 
328 	if (!netif_device_present(indr_priv->rpriv->netdev))
329 		return -EOPNOTSUPP;
330 
331 	switch (flower->command) {
332 	case FLOW_CLS_REPLACE:
333 		err = mlx5e_configure_flower(netdev, priv, flower, flags);
334 		break;
335 	case FLOW_CLS_DESTROY:
336 		err = mlx5e_delete_flower(netdev, priv, flower, flags);
337 		break;
338 	case FLOW_CLS_STATS:
339 		err = mlx5e_stats_flower(netdev, priv, flower, flags);
340 		break;
341 	default:
342 		err = -EOPNOTSUPP;
343 	}
344 
345 	return err;
346 }
347 
mlx5e_rep_indr_setup_tc_cb(enum tc_setup_type type,void * type_data,void * indr_priv)348 static int mlx5e_rep_indr_setup_tc_cb(enum tc_setup_type type,
349 				      void *type_data, void *indr_priv)
350 {
351 	unsigned long flags = MLX5_TC_FLAG(ESW_OFFLOAD);
352 	struct mlx5e_rep_indr_block_priv *priv = indr_priv;
353 
354 	flags |= (priv->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) ?
355 		MLX5_TC_FLAG(EGRESS) :
356 		MLX5_TC_FLAG(INGRESS);
357 
358 	switch (type) {
359 	case TC_SETUP_CLSFLOWER:
360 		return mlx5e_rep_indr_offload(priv->netdev, type_data, priv,
361 					      flags);
362 	default:
363 		return -EOPNOTSUPP;
364 	}
365 }
366 
mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type,void * type_data,void * indr_priv)367 static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type,
368 				      void *type_data, void *indr_priv)
369 {
370 	struct mlx5e_rep_indr_block_priv *priv = indr_priv;
371 	struct flow_cls_offload *f = type_data;
372 	struct flow_cls_offload tmp;
373 	struct mlx5e_priv *mpriv;
374 	struct mlx5_eswitch *esw;
375 	unsigned long flags;
376 	int err;
377 
378 	mpriv = netdev_priv(priv->rpriv->netdev);
379 	esw = mpriv->mdev->priv.eswitch;
380 
381 	flags = MLX5_TC_FLAG(EGRESS) |
382 		MLX5_TC_FLAG(ESW_OFFLOAD) |
383 		MLX5_TC_FLAG(FT_OFFLOAD);
384 
385 	switch (type) {
386 	case TC_SETUP_CLSFLOWER:
387 		memcpy(&tmp, f, sizeof(*f));
388 
389 		/* Re-use tc offload path by moving the ft flow to the
390 		 * reserved ft chain.
391 		 *
392 		 * FT offload can use prio range [0, INT_MAX], so we normalize
393 		 * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
394 		 * as with tc, where prio 0 isn't supported.
395 		 *
396 		 * We only support chain 0 of FT offload.
397 		 */
398 		if (!mlx5_chains_prios_supported(esw_chains(esw)) ||
399 		    tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw)) ||
400 		    tmp.common.chain_index)
401 			return -EOPNOTSUPP;
402 
403 		tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw));
404 		tmp.common.prio++;
405 		err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags);
406 		memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
407 		return err;
408 	default:
409 		return -EOPNOTSUPP;
410 	}
411 }
412 
mlx5e_rep_indr_block_unbind(void * cb_priv)413 static void mlx5e_rep_indr_block_unbind(void *cb_priv)
414 {
415 	struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv;
416 
417 	list_del(&indr_priv->list);
418 	kfree(indr_priv);
419 }
420 
421 static LIST_HEAD(mlx5e_block_cb_list);
422 
mlx5e_rep_macvlan_mode_supported(const struct net_device * dev)423 static bool mlx5e_rep_macvlan_mode_supported(const struct net_device *dev)
424 {
425 	struct macvlan_dev *macvlan = netdev_priv(dev);
426 
427 	return macvlan->mode == MACVLAN_MODE_PASSTHRU;
428 }
429 
430 static int
mlx5e_rep_indr_setup_block(struct net_device * netdev,struct Qdisc * sch,struct mlx5e_rep_priv * rpriv,struct flow_block_offload * f,flow_setup_cb_t * setup_cb,void * data,void (* cleanup)(struct flow_block_cb * block_cb))431 mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch,
432 			   struct mlx5e_rep_priv *rpriv,
433 			   struct flow_block_offload *f,
434 			   flow_setup_cb_t *setup_cb,
435 			   void *data,
436 			   void (*cleanup)(struct flow_block_cb *block_cb))
437 {
438 	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
439 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
440 	bool is_ovs_int_port = netif_is_ovs_master(netdev);
441 	struct mlx5e_rep_indr_block_priv *indr_priv;
442 	struct flow_block_cb *block_cb;
443 
444 	if (!mlx5e_tc_tun_device_to_offload(priv, netdev) &&
445 	    !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev) &&
446 	    !is_ovs_int_port) {
447 		if (!(netif_is_macvlan(netdev) && macvlan_dev_real_dev(netdev) == rpriv->netdev))
448 			return -EOPNOTSUPP;
449 		if (!mlx5e_rep_macvlan_mode_supported(netdev)) {
450 			netdev_warn(netdev, "Offloading ingress filter is supported only with macvlan passthru mode");
451 			return -EOPNOTSUPP;
452 		}
453 	}
454 
455 	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
456 	    f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
457 		return -EOPNOTSUPP;
458 
459 	if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS && !is_ovs_int_port)
460 		return -EOPNOTSUPP;
461 
462 	if (is_ovs_int_port && !mlx5e_tc_int_port_supported(esw))
463 		return -EOPNOTSUPP;
464 
465 	f->unlocked_driver_cb = true;
466 	f->driver_block_list = &mlx5e_block_cb_list;
467 
468 	switch (f->command) {
469 	case FLOW_BLOCK_BIND:
470 		indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev, f->binder_type);
471 		if (indr_priv)
472 			return -EEXIST;
473 
474 		indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL);
475 		if (!indr_priv)
476 			return -ENOMEM;
477 
478 		indr_priv->netdev = netdev;
479 		indr_priv->rpriv = rpriv;
480 		indr_priv->binder_type = f->binder_type;
481 		list_add(&indr_priv->list,
482 			 &rpriv->uplink_priv.tc_indr_block_priv_list);
483 
484 		block_cb = flow_indr_block_cb_alloc(setup_cb, indr_priv, indr_priv,
485 						    mlx5e_rep_indr_block_unbind,
486 						    f, netdev, sch, data, rpriv,
487 						    cleanup);
488 		if (IS_ERR(block_cb)) {
489 			list_del(&indr_priv->list);
490 			kfree(indr_priv);
491 			return PTR_ERR(block_cb);
492 		}
493 		flow_block_cb_add(block_cb, f);
494 		list_add_tail(&block_cb->driver_list, &mlx5e_block_cb_list);
495 
496 		return 0;
497 	case FLOW_BLOCK_UNBIND:
498 		indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev, f->binder_type);
499 		if (!indr_priv)
500 			return -ENOENT;
501 
502 		block_cb = flow_block_cb_lookup(f->block, setup_cb, indr_priv);
503 		if (!block_cb)
504 			return -ENOENT;
505 
506 		flow_indr_block_cb_remove(block_cb, f);
507 		list_del(&block_cb->driver_list);
508 		return 0;
509 	default:
510 		return -EOPNOTSUPP;
511 	}
512 	return 0;
513 }
514 
515 static int
mlx5e_rep_indr_replace_act(struct mlx5e_rep_priv * rpriv,struct flow_offload_action * fl_act)516 mlx5e_rep_indr_replace_act(struct mlx5e_rep_priv *rpriv,
517 			   struct flow_offload_action *fl_act)
518 
519 {
520 	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
521 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
522 	enum mlx5_flow_namespace_type ns_type;
523 	struct flow_action_entry *action;
524 	struct mlx5e_tc_act *act;
525 	bool add = false;
526 	int i;
527 
528 	/* There is no use case currently for more than one action (e.g. pedit).
529 	 * when there will be, need to handle cleaning multiple actions on err.
530 	 */
531 	if (!flow_offload_has_one_action(&fl_act->action))
532 		return -EOPNOTSUPP;
533 
534 	if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
535 		ns_type = MLX5_FLOW_NAMESPACE_FDB;
536 	else
537 		ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
538 
539 	flow_action_for_each(i, action, &fl_act->action) {
540 		act = mlx5e_tc_act_get(action->id, ns_type);
541 		if (!act)
542 			continue;
543 
544 		if (!act->offload_action)
545 			continue;
546 
547 		if (!act->offload_action(priv, fl_act, action))
548 			add = true;
549 	}
550 
551 	return add ? 0 : -EOPNOTSUPP;
552 }
553 
554 static int
mlx5e_rep_indr_destroy_act(struct mlx5e_rep_priv * rpriv,struct flow_offload_action * fl_act)555 mlx5e_rep_indr_destroy_act(struct mlx5e_rep_priv *rpriv,
556 			   struct flow_offload_action *fl_act)
557 {
558 	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
559 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
560 	enum mlx5_flow_namespace_type ns_type;
561 	struct mlx5e_tc_act *act;
562 
563 	if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
564 		ns_type = MLX5_FLOW_NAMESPACE_FDB;
565 	else
566 		ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
567 
568 	act = mlx5e_tc_act_get(fl_act->id, ns_type);
569 	if (!act || !act->destroy_action)
570 		return -EOPNOTSUPP;
571 
572 	return act->destroy_action(priv, fl_act);
573 }
574 
575 static int
mlx5e_rep_indr_stats_act(struct mlx5e_rep_priv * rpriv,struct flow_offload_action * fl_act)576 mlx5e_rep_indr_stats_act(struct mlx5e_rep_priv *rpriv,
577 			 struct flow_offload_action *fl_act)
578 
579 {
580 	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
581 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
582 	enum mlx5_flow_namespace_type ns_type;
583 	struct mlx5e_tc_act *act;
584 
585 	if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
586 		ns_type = MLX5_FLOW_NAMESPACE_FDB;
587 	else
588 		ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
589 
590 	act = mlx5e_tc_act_get(fl_act->id, ns_type);
591 	if (!act || !act->stats_action)
592 		return -EOPNOTSUPP;
593 
594 	return act->stats_action(priv, fl_act);
595 }
596 
597 static int
mlx5e_rep_indr_setup_act(struct mlx5e_rep_priv * rpriv,struct flow_offload_action * fl_act)598 mlx5e_rep_indr_setup_act(struct mlx5e_rep_priv *rpriv,
599 			 struct flow_offload_action *fl_act)
600 {
601 	switch (fl_act->command) {
602 	case FLOW_ACT_REPLACE:
603 		return mlx5e_rep_indr_replace_act(rpriv, fl_act);
604 	case FLOW_ACT_DESTROY:
605 		return mlx5e_rep_indr_destroy_act(rpriv, fl_act);
606 	case FLOW_ACT_STATS:
607 		return mlx5e_rep_indr_stats_act(rpriv, fl_act);
608 	default:
609 		return -EOPNOTSUPP;
610 	}
611 }
612 
613 static int
mlx5e_rep_indr_no_dev_setup(struct mlx5e_rep_priv * rpriv,enum tc_setup_type type,void * data)614 mlx5e_rep_indr_no_dev_setup(struct mlx5e_rep_priv *rpriv,
615 			    enum tc_setup_type type,
616 			    void *data)
617 {
618 	if (!data)
619 		return -EOPNOTSUPP;
620 
621 	switch (type) {
622 	case TC_SETUP_ACT:
623 		return mlx5e_rep_indr_setup_act(rpriv, data);
624 	default:
625 		return -EOPNOTSUPP;
626 	}
627 }
628 
629 static
mlx5e_rep_indr_setup_cb(struct net_device * netdev,struct Qdisc * sch,void * cb_priv,enum tc_setup_type type,void * type_data,void * data,void (* cleanup)(struct flow_block_cb * block_cb))630 int mlx5e_rep_indr_setup_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv,
631 			    enum tc_setup_type type, void *type_data,
632 			    void *data,
633 			    void (*cleanup)(struct flow_block_cb *block_cb))
634 {
635 	if (!netdev)
636 		return mlx5e_rep_indr_no_dev_setup(cb_priv, type, data);
637 
638 	switch (type) {
639 	case TC_SETUP_BLOCK:
640 		return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data,
641 						  mlx5e_rep_indr_setup_tc_cb,
642 						  data, cleanup);
643 	case TC_SETUP_FT:
644 		return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data,
645 						  mlx5e_rep_indr_setup_ft_cb,
646 						  data, cleanup);
647 	default:
648 		return -EOPNOTSUPP;
649 	}
650 }
651 
mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv * rpriv)652 int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv)
653 {
654 	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
655 
656 	/* init indirect block notifications */
657 	INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list);
658 
659 	return flow_indr_dev_register(mlx5e_rep_indr_setup_cb, rpriv);
660 }
661 
mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv * rpriv)662 void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv)
663 {
664 	flow_indr_dev_unregister(mlx5e_rep_indr_setup_cb, rpriv,
665 				 mlx5e_rep_indr_block_unbind);
666 }
667 
mlx5e_restore_tunnel(struct mlx5e_priv * priv,struct sk_buff * skb,struct mlx5e_tc_update_priv * tc_priv,u32 tunnel_id)668 static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
669 				 struct mlx5e_tc_update_priv *tc_priv,
670 				 u32 tunnel_id)
671 {
672 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
673 	struct tunnel_match_enc_opts enc_opts = {};
674 	struct mlx5_rep_uplink_priv *uplink_priv;
675 	struct mlx5e_rep_priv *uplink_rpriv;
676 	struct metadata_dst *tun_dst;
677 	struct tunnel_match_key key;
678 	u32 tun_id, enc_opts_id;
679 	struct net_device *dev;
680 	int err;
681 
682 	enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK;
683 	tun_id = tunnel_id >> ENC_OPTS_BITS;
684 
685 	if (!tun_id)
686 		return true;
687 
688 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
689 	uplink_priv = &uplink_rpriv->uplink_priv;
690 
691 	err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key);
692 	if (err) {
693 		WARN_ON_ONCE(true);
694 		netdev_dbg(priv->netdev,
695 			   "Couldn't find tunnel for tun_id: %d, err: %d\n",
696 			   tun_id, err);
697 		return false;
698 	}
699 
700 	if (enc_opts_id) {
701 		err = mapping_find(uplink_priv->tunnel_enc_opts_mapping,
702 				   enc_opts_id, &enc_opts);
703 		if (err) {
704 			netdev_dbg(priv->netdev,
705 				   "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n",
706 				   enc_opts_id, err);
707 			return false;
708 		}
709 	}
710 
711 	if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
712 		tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst,
713 					   key.enc_ip.tos, key.enc_ip.ttl,
714 					   key.enc_tp.dst, TUNNEL_KEY,
715 					   key32_to_tunnel_id(key.enc_key_id.keyid),
716 					   enc_opts.key.len);
717 	} else if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
718 		tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst,
719 					     key.enc_ip.tos, key.enc_ip.ttl,
720 					     key.enc_tp.dst, 0, TUNNEL_KEY,
721 					     key32_to_tunnel_id(key.enc_key_id.keyid),
722 					     enc_opts.key.len);
723 	} else {
724 		netdev_dbg(priv->netdev,
725 			   "Couldn't restore tunnel, unsupported addr_type: %d\n",
726 			   key.enc_control.addr_type);
727 		return false;
728 	}
729 
730 	if (!tun_dst) {
731 		netdev_dbg(priv->netdev, "Couldn't restore tunnel, no tun_dst\n");
732 		return false;
733 	}
734 
735 	tun_dst->u.tun_info.key.tp_src = key.enc_tp.src;
736 
737 	if (enc_opts.key.len)
738 		ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
739 					enc_opts.key.data,
740 					enc_opts.key.len,
741 					enc_opts.key.dst_opt_type);
742 
743 	skb_dst_set(skb, (struct dst_entry *)tun_dst);
744 	dev = dev_get_by_index(&init_net, key.filter_ifindex);
745 	if (!dev) {
746 		netdev_dbg(priv->netdev,
747 			   "Couldn't find tunnel device with ifindex: %d\n",
748 			   key.filter_ifindex);
749 		return false;
750 	}
751 
752 	/* Set fwd_dev so we do dev_put() after datapath */
753 	tc_priv->fwd_dev = dev;
754 
755 	skb->dev = dev;
756 
757 	return true;
758 }
759 
mlx5e_restore_skb_chain(struct sk_buff * skb,u32 chain,u32 reg_c1,struct mlx5e_tc_update_priv * tc_priv)760 static bool mlx5e_restore_skb_chain(struct sk_buff *skb, u32 chain, u32 reg_c1,
761 				    struct mlx5e_tc_update_priv *tc_priv)
762 {
763 	struct mlx5e_priv *priv = netdev_priv(skb->dev);
764 	u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK;
765 
766 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
767 	if (chain) {
768 		struct mlx5_rep_uplink_priv *uplink_priv;
769 		struct mlx5e_rep_priv *uplink_rpriv;
770 		struct tc_skb_ext *tc_skb_ext;
771 		struct mlx5_eswitch *esw;
772 		u32 zone_restore_id;
773 
774 		tc_skb_ext = tc_skb_ext_alloc(skb);
775 		if (!tc_skb_ext) {
776 			WARN_ON(1);
777 			return false;
778 		}
779 		tc_skb_ext->chain = chain;
780 		zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK;
781 		esw = priv->mdev->priv.eswitch;
782 		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
783 		uplink_priv = &uplink_rpriv->uplink_priv;
784 		if (!mlx5e_tc_ct_restore_flow(uplink_priv->ct_priv, skb,
785 					      zone_restore_id))
786 			return false;
787 	}
788 #endif /* CONFIG_NET_TC_SKB_EXT */
789 
790 	return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
791 }
792 
mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv * tc_priv)793 static void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv)
794 {
795 	if (tc_priv->fwd_dev)
796 		dev_put(tc_priv->fwd_dev);
797 }
798 
mlx5e_restore_skb_sample(struct mlx5e_priv * priv,struct sk_buff * skb,struct mlx5_mapped_obj * mapped_obj,struct mlx5e_tc_update_priv * tc_priv)799 static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb,
800 				     struct mlx5_mapped_obj *mapped_obj,
801 				     struct mlx5e_tc_update_priv *tc_priv)
802 {
803 	if (!mlx5e_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) {
804 		netdev_dbg(priv->netdev,
805 			   "Failed to restore tunnel info for sampled packet\n");
806 		return;
807 	}
808 	mlx5e_tc_sample_skb(skb, mapped_obj);
809 	mlx5_rep_tc_post_napi_receive(tc_priv);
810 }
811 
mlx5e_restore_skb_int_port(struct mlx5e_priv * priv,struct sk_buff * skb,struct mlx5_mapped_obj * mapped_obj,struct mlx5e_tc_update_priv * tc_priv,bool * forward_tx,u32 reg_c1)812 static bool mlx5e_restore_skb_int_port(struct mlx5e_priv *priv, struct sk_buff *skb,
813 				       struct mlx5_mapped_obj *mapped_obj,
814 				       struct mlx5e_tc_update_priv *tc_priv,
815 				       bool *forward_tx,
816 				       u32 reg_c1)
817 {
818 	u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK;
819 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
820 	struct mlx5_rep_uplink_priv *uplink_priv;
821 	struct mlx5e_rep_priv *uplink_rpriv;
822 
823 	/* Tunnel restore takes precedence over int port restore */
824 	if (tunnel_id)
825 		return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
826 
827 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
828 	uplink_priv = &uplink_rpriv->uplink_priv;
829 
830 	if (mlx5e_tc_int_port_dev_fwd(uplink_priv->int_port_priv, skb,
831 				      mapped_obj->int_port_metadata, forward_tx)) {
832 		/* Set fwd_dev for future dev_put */
833 		tc_priv->fwd_dev = skb->dev;
834 
835 		return true;
836 	}
837 
838 	return false;
839 }
840 
mlx5e_rep_tc_receive(struct mlx5_cqe64 * cqe,struct mlx5e_rq * rq,struct sk_buff * skb)841 void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq,
842 			  struct sk_buff *skb)
843 {
844 	u32 reg_c1 = be32_to_cpu(cqe->ft_metadata);
845 	struct mlx5e_tc_update_priv tc_priv = {};
846 	struct mlx5_mapped_obj mapped_obj;
847 	struct mlx5_eswitch *esw;
848 	bool forward_tx = false;
849 	struct mlx5e_priv *priv;
850 	u32 reg_c0;
851 	int err;
852 
853 	reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
854 	if (!reg_c0 || reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG)
855 		goto forward;
856 
857 	/* If reg_c0 is not equal to the default flow tag then skb->mark
858 	 * is not supported and must be reset back to 0.
859 	 */
860 	skb->mark = 0;
861 
862 	priv = netdev_priv(skb->dev);
863 	esw = priv->mdev->priv.eswitch;
864 	err = mapping_find(esw->offloads.reg_c0_obj_pool, reg_c0, &mapped_obj);
865 	if (err) {
866 		netdev_dbg(priv->netdev,
867 			   "Couldn't find mapped object for reg_c0: %d, err: %d\n",
868 			   reg_c0, err);
869 		goto free_skb;
870 	}
871 
872 	if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
873 		if (!mlx5e_restore_skb_chain(skb, mapped_obj.chain, reg_c1, &tc_priv) &&
874 		    !mlx5_ipsec_is_rx_flow(cqe))
875 			goto free_skb;
876 	} else if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) {
877 		mlx5e_restore_skb_sample(priv, skb, &mapped_obj, &tc_priv);
878 		goto free_skb;
879 	} else if (mapped_obj.type == MLX5_MAPPED_OBJ_INT_PORT_METADATA) {
880 		if (!mlx5e_restore_skb_int_port(priv, skb, &mapped_obj, &tc_priv,
881 						&forward_tx, reg_c1))
882 			goto free_skb;
883 	} else {
884 		netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
885 		goto free_skb;
886 	}
887 
888 forward:
889 	if (forward_tx)
890 		dev_queue_xmit(skb);
891 	else
892 		napi_gro_receive(rq->cq.napi, skb);
893 
894 	mlx5_rep_tc_post_napi_receive(&tc_priv);
895 
896 	return;
897 
898 free_skb:
899 	dev_kfree_skb_any(skb);
900 }
901