1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
3 
4 #include <net/fib_notifier.h>
5 #include <net/nexthop.h>
6 #include "tc_tun_encap.h"
7 #include "en_tc.h"
8 #include "tc_tun.h"
9 #include "rep/tc.h"
10 #include "diag/en_tc_tracepoint.h"
11 
12 enum {
13 	MLX5E_ROUTE_ENTRY_VALID     = BIT(0),
14 };
15 
mlx5e_set_int_port_tunnel(struct mlx5e_priv * priv,struct mlx5_flow_attr * attr,struct mlx5e_encap_entry * e,int out_index)16 static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv,
17 				     struct mlx5_flow_attr *attr,
18 				     struct mlx5e_encap_entry *e,
19 				     int out_index)
20 {
21 	struct net_device *route_dev;
22 	int err = 0;
23 
24 	route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex);
25 
26 	if (!route_dev || !netif_is_ovs_master(route_dev))
27 		goto out;
28 
29 	err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex,
30 						MLX5E_TC_INT_PORT_EGRESS,
31 						&attr->action, out_index);
32 
33 out:
34 	if (route_dev)
35 		dev_put(route_dev);
36 
37 	return err;
38 }
39 
40 struct mlx5e_route_key {
41 	int ip_version;
42 	union {
43 		__be32 v4;
44 		struct in6_addr v6;
45 	} endpoint_ip;
46 };
47 
48 struct mlx5e_route_entry {
49 	struct mlx5e_route_key key;
50 	struct list_head encap_entries;
51 	struct list_head decap_flows;
52 	u32 flags;
53 	struct hlist_node hlist;
54 	refcount_t refcnt;
55 	int tunnel_dev_index;
56 	struct rcu_head rcu;
57 };
58 
59 struct mlx5e_tc_tun_encap {
60 	struct mlx5e_priv *priv;
61 	struct notifier_block fib_nb;
62 	spinlock_t route_lock; /* protects route_tbl */
63 	unsigned long route_tbl_last_update;
64 	DECLARE_HASHTABLE(route_tbl, 8);
65 };
66 
mlx5e_route_entry_valid(struct mlx5e_route_entry * r)67 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
68 {
69 	return r->flags & MLX5E_ROUTE_ENTRY_VALID;
70 }
71 
mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec)72 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
73 			     struct mlx5_flow_spec *spec)
74 {
75 	struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
76 	struct mlx5_rx_tun_attr *tun_attr;
77 	void *daddr, *saddr;
78 	u8 ip_version;
79 
80 	tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
81 	if (!tun_attr)
82 		return -ENOMEM;
83 
84 	esw_attr->rx_tun_attr = tun_attr;
85 	ip_version = mlx5e_tc_get_ip_version(spec, true);
86 
87 	if (ip_version == 4) {
88 		daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
89 				     outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
90 		saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
91 				     outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
92 		tun_attr->dst_ip.v4 = *(__be32 *)daddr;
93 		tun_attr->src_ip.v4 = *(__be32 *)saddr;
94 		if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
95 			return 0;
96 	}
97 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
98 	else if (ip_version == 6) {
99 		int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
100 		struct in6_addr zerov6 = {};
101 
102 		daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
103 				     outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
104 		saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
105 				     outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
106 		memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
107 		memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
108 		if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) ||
109 		    !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6)))
110 			return 0;
111 	}
112 #endif
113 	/* Only set the flag if both src and dst ip addresses exist. They are
114 	 * required to establish routing.
115 	 */
116 	flow_flag_set(flow, TUN_RX);
117 	flow->attr->tun_ip_version = ip_version;
118 	return 0;
119 }
120 
mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr * esw_attr)121 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
122 {
123 	bool all_flow_encaps_valid = true;
124 	int i;
125 
126 	/* Flow can be associated with multiple encap entries.
127 	 * Before offloading the flow verify that all of them have
128 	 * a valid neighbour.
129 	 */
130 	for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
131 		if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
132 			continue;
133 		if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
134 			all_flow_encaps_valid = false;
135 			break;
136 		}
137 	}
138 
139 	return all_flow_encaps_valid;
140 }
141 
mlx5e_tc_encap_flows_add(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * flow_list)142 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
143 			      struct mlx5e_encap_entry *e,
144 			      struct list_head *flow_list)
145 {
146 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
147 	struct mlx5_pkt_reformat_params reformat_params;
148 	struct mlx5_esw_flow_attr *esw_attr;
149 	struct mlx5_flow_handle *rule;
150 	struct mlx5_flow_attr *attr;
151 	struct mlx5_flow_spec *spec;
152 	struct mlx5e_tc_flow *flow;
153 	int err;
154 
155 	if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
156 		return;
157 
158 	memset(&reformat_params, 0, sizeof(reformat_params));
159 	reformat_params.type = e->reformat_type;
160 	reformat_params.size = e->encap_size;
161 	reformat_params.data = e->encap_header;
162 	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
163 						     &reformat_params,
164 						     MLX5_FLOW_NAMESPACE_FDB);
165 	if (IS_ERR(e->pkt_reformat)) {
166 		mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
167 			       PTR_ERR(e->pkt_reformat));
168 		return;
169 	}
170 	e->flags |= MLX5_ENCAP_ENTRY_VALID;
171 	mlx5e_rep_queue_neigh_stats_work(priv);
172 
173 	list_for_each_entry(flow, flow_list, tmp_list) {
174 		if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
175 			continue;
176 
177 		spec = &flow->attr->parse_attr->spec;
178 
179 		attr = mlx5e_tc_get_encap_attr(flow);
180 		esw_attr = attr->esw_attr;
181 		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
182 		esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
183 
184 		/* Do not offload flows with unresolved neighbors */
185 		if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
186 			continue;
187 
188 		err = mlx5e_tc_offload_flow_post_acts(flow);
189 		if (err) {
190 			mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
191 				       err);
192 			continue;
193 		}
194 
195 		/* update from slow path rule to encap rule */
196 		rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
197 		if (IS_ERR(rule)) {
198 			mlx5e_tc_unoffload_flow_post_acts(flow);
199 			err = PTR_ERR(rule);
200 			mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
201 				       err);
202 			continue;
203 		}
204 
205 		mlx5e_tc_unoffload_from_slow_path(esw, flow);
206 		flow->rule[0] = rule;
207 		/* was unset when slow path rule removed */
208 		flow_flag_set(flow, OFFLOADED);
209 	}
210 }
211 
mlx5e_tc_encap_flows_del(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * flow_list)212 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
213 			      struct mlx5e_encap_entry *e,
214 			      struct list_head *flow_list)
215 {
216 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
217 	struct mlx5_esw_flow_attr *esw_attr;
218 	struct mlx5_flow_handle *rule;
219 	struct mlx5_flow_attr *attr;
220 	struct mlx5_flow_spec *spec;
221 	struct mlx5e_tc_flow *flow;
222 	int err;
223 
224 	list_for_each_entry(flow, flow_list, tmp_list) {
225 		if (!mlx5e_is_offloaded_flow(flow))
226 			continue;
227 
228 		attr = mlx5e_tc_get_encap_attr(flow);
229 		esw_attr = attr->esw_attr;
230 		/* mark the flow's encap dest as non-valid */
231 		esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
232 		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
233 
234 		/* Clear pkt_reformat before checking slow path flag. Because
235 		 * in next iteration, the same flow is already set slow path
236 		 * flag, but still need to clear the pkt_reformat.
237 		 */
238 		if (flow_flag_test(flow, SLOW))
239 			continue;
240 
241 		/* update from encap rule to slow path rule */
242 		spec = &flow->attr->parse_attr->spec;
243 		rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
244 
245 		if (IS_ERR(rule)) {
246 			err = PTR_ERR(rule);
247 			mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
248 				       err);
249 			continue;
250 		}
251 
252 		mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
253 		mlx5e_tc_unoffload_flow_post_acts(flow);
254 		flow->rule[0] = rule;
255 		/* was unset when fast path rule removed */
256 		flow_flag_set(flow, OFFLOADED);
257 	}
258 
259 	/* we know that the encap is valid */
260 	e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
261 	mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
262 	e->pkt_reformat = NULL;
263 }
264 
mlx5e_take_tmp_flow(struct mlx5e_tc_flow * flow,struct list_head * flow_list,int index)265 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
266 				struct list_head *flow_list,
267 				int index)
268 {
269 	if (IS_ERR(mlx5e_flow_get(flow))) {
270 		/* Flow is being deleted concurrently. Wait for it to be
271 		 * unoffloaded from hardware, otherwise deleting encap will
272 		 * fail.
273 		 */
274 		wait_for_completion(&flow->del_hw_done);
275 		return;
276 	}
277 	wait_for_completion(&flow->init_done);
278 
279 	flow->tmp_entry_index = index;
280 	list_add(&flow->tmp_list, flow_list);
281 }
282 
283 /* Takes reference to all flows attached to encap and adds the flows to
284  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
285  */
mlx5e_take_all_encap_flows(struct mlx5e_encap_entry * e,struct list_head * flow_list)286 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
287 {
288 	struct encap_flow_item *efi;
289 	struct mlx5e_tc_flow *flow;
290 
291 	list_for_each_entry(efi, &e->flows, list) {
292 		flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
293 		mlx5e_take_tmp_flow(flow, flow_list, efi->index);
294 	}
295 }
296 
297 /* Takes reference to all flows attached to route and adds the flows to
298  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
299  */
mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry * r,struct list_head * flow_list)300 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
301 					     struct list_head *flow_list)
302 {
303 	struct mlx5e_tc_flow *flow;
304 
305 	list_for_each_entry(flow, &r->decap_flows, decap_routes)
306 		mlx5e_take_tmp_flow(flow, flow_list, 0);
307 }
308 
309 typedef bool (match_cb)(struct mlx5e_encap_entry *);
310 
311 static struct mlx5e_encap_entry *
mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e,match_cb match)312 mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
313 			      struct mlx5e_encap_entry *e,
314 			      match_cb match)
315 {
316 	struct mlx5e_encap_entry *next = NULL;
317 
318 retry:
319 	rcu_read_lock();
320 
321 	/* find encap with non-zero reference counter value */
322 	for (next = e ?
323 		     list_next_or_null_rcu(&nhe->encap_list,
324 					   &e->encap_list,
325 					   struct mlx5e_encap_entry,
326 					   encap_list) :
327 		     list_first_or_null_rcu(&nhe->encap_list,
328 					    struct mlx5e_encap_entry,
329 					    encap_list);
330 	     next;
331 	     next = list_next_or_null_rcu(&nhe->encap_list,
332 					  &next->encap_list,
333 					  struct mlx5e_encap_entry,
334 					  encap_list))
335 		if (mlx5e_encap_take(next))
336 			break;
337 
338 	rcu_read_unlock();
339 
340 	/* release starting encap */
341 	if (e)
342 		mlx5e_encap_put(netdev_priv(e->out_dev), e);
343 	if (!next)
344 		return next;
345 
346 	/* wait for encap to be fully initialized */
347 	wait_for_completion(&next->res_ready);
348 	/* continue searching if encap entry is not in valid state after completion */
349 	if (!match(next)) {
350 		e = next;
351 		goto retry;
352 	}
353 
354 	return next;
355 }
356 
mlx5e_encap_valid(struct mlx5e_encap_entry * e)357 static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
358 {
359 	return e->flags & MLX5_ENCAP_ENTRY_VALID;
360 }
361 
362 static struct mlx5e_encap_entry *
mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e)363 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
364 			   struct mlx5e_encap_entry *e)
365 {
366 	return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
367 }
368 
mlx5e_encap_initialized(struct mlx5e_encap_entry * e)369 static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
370 {
371 	return e->compl_result >= 0;
372 }
373 
374 struct mlx5e_encap_entry *
mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e)375 mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
376 			  struct mlx5e_encap_entry *e)
377 {
378 	return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
379 }
380 
mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry * nhe)381 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
382 {
383 	struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
384 	struct mlx5e_encap_entry *e = NULL;
385 	struct mlx5e_tc_flow *flow;
386 	struct mlx5_fc *counter;
387 	struct neigh_table *tbl;
388 	bool neigh_used = false;
389 	struct neighbour *n;
390 	u64 lastuse;
391 
392 	if (m_neigh->family == AF_INET)
393 		tbl = &arp_tbl;
394 #if IS_ENABLED(CONFIG_IPV6)
395 	else if (m_neigh->family == AF_INET6)
396 		tbl = ipv6_stub->nd_tbl;
397 #endif
398 	else
399 		return;
400 
401 	/* mlx5e_get_next_valid_encap() releases previous encap before returning
402 	 * next one.
403 	 */
404 	while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
405 		struct mlx5e_priv *priv = netdev_priv(e->out_dev);
406 		struct encap_flow_item *efi, *tmp;
407 		struct mlx5_eswitch *esw;
408 		LIST_HEAD(flow_list);
409 
410 		esw = priv->mdev->priv.eswitch;
411 		mutex_lock(&esw->offloads.encap_tbl_lock);
412 		list_for_each_entry_safe(efi, tmp, &e->flows, list) {
413 			flow = container_of(efi, struct mlx5e_tc_flow,
414 					    encaps[efi->index]);
415 			if (IS_ERR(mlx5e_flow_get(flow)))
416 				continue;
417 			list_add(&flow->tmp_list, &flow_list);
418 
419 			if (mlx5e_is_offloaded_flow(flow)) {
420 				counter = mlx5e_tc_get_counter(flow);
421 				lastuse = mlx5_fc_query_lastuse(counter);
422 				if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
423 					neigh_used = true;
424 					break;
425 				}
426 			}
427 		}
428 		mutex_unlock(&esw->offloads.encap_tbl_lock);
429 
430 		mlx5e_put_flow_list(priv, &flow_list);
431 		if (neigh_used) {
432 			/* release current encap before breaking the loop */
433 			mlx5e_encap_put(priv, e);
434 			break;
435 		}
436 	}
437 
438 	trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
439 
440 	if (neigh_used) {
441 		nhe->reported_lastuse = jiffies;
442 
443 		/* find the relevant neigh according to the cached device and
444 		 * dst ip pair
445 		 */
446 		n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
447 		if (!n)
448 			return;
449 
450 		neigh_event_send(n, NULL);
451 		neigh_release(n);
452 	}
453 }
454 
mlx5e_encap_dealloc(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)455 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
456 {
457 	WARN_ON(!list_empty(&e->flows));
458 
459 	if (e->compl_result > 0) {
460 		mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
461 
462 		if (e->flags & MLX5_ENCAP_ENTRY_VALID)
463 			mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
464 	}
465 
466 	kfree(e->tun_info);
467 	kfree(e->encap_header);
468 	kfree_rcu(e, rcu);
469 }
470 
mlx5e_decap_dealloc(struct mlx5e_priv * priv,struct mlx5e_decap_entry * d)471 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
472 				struct mlx5e_decap_entry *d)
473 {
474 	WARN_ON(!list_empty(&d->flows));
475 
476 	if (!d->compl_result)
477 		mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
478 
479 	kfree_rcu(d, rcu);
480 }
481 
mlx5e_encap_put(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)482 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
483 {
484 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
485 
486 	if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
487 		return;
488 	list_del(&e->route_list);
489 	hash_del_rcu(&e->encap_hlist);
490 	mutex_unlock(&esw->offloads.encap_tbl_lock);
491 
492 	mlx5e_encap_dealloc(priv, e);
493 }
494 
mlx5e_decap_put(struct mlx5e_priv * priv,struct mlx5e_decap_entry * d)495 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
496 {
497 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
498 
499 	if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
500 		return;
501 	hash_del_rcu(&d->hlist);
502 	mutex_unlock(&esw->offloads.decap_tbl_lock);
503 
504 	mlx5e_decap_dealloc(priv, d);
505 }
506 
507 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
508 				     struct mlx5e_tc_flow *flow,
509 				     int out_index);
510 
mlx5e_detach_encap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,int out_index)511 void mlx5e_detach_encap(struct mlx5e_priv *priv,
512 			struct mlx5e_tc_flow *flow,
513 			struct mlx5_flow_attr *attr,
514 			int out_index)
515 {
516 	struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
517 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
518 
519 	if (!mlx5e_is_eswitch_flow(flow))
520 		return;
521 
522 	if (attr->esw_attr->dests[out_index].flags &
523 	    MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
524 		mlx5e_detach_encap_route(priv, flow, out_index);
525 
526 	/* flow wasn't fully initialized */
527 	if (!e)
528 		return;
529 
530 	mutex_lock(&esw->offloads.encap_tbl_lock);
531 	list_del(&flow->encaps[out_index].list);
532 	flow->encaps[out_index].e = NULL;
533 	if (!refcount_dec_and_test(&e->refcnt)) {
534 		mutex_unlock(&esw->offloads.encap_tbl_lock);
535 		return;
536 	}
537 	list_del(&e->route_list);
538 	hash_del_rcu(&e->encap_hlist);
539 	mutex_unlock(&esw->offloads.encap_tbl_lock);
540 
541 	mlx5e_encap_dealloc(priv, e);
542 }
543 
mlx5e_detach_decap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)544 void mlx5e_detach_decap(struct mlx5e_priv *priv,
545 			struct mlx5e_tc_flow *flow)
546 {
547 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
548 	struct mlx5e_decap_entry *d = flow->decap_reformat;
549 
550 	if (!d)
551 		return;
552 
553 	mutex_lock(&esw->offloads.decap_tbl_lock);
554 	list_del(&flow->l3_to_l2_reformat);
555 	flow->decap_reformat = NULL;
556 
557 	if (!refcount_dec_and_test(&d->refcnt)) {
558 		mutex_unlock(&esw->offloads.decap_tbl_lock);
559 		return;
560 	}
561 	hash_del_rcu(&d->hlist);
562 	mutex_unlock(&esw->offloads.decap_tbl_lock);
563 
564 	mlx5e_decap_dealloc(priv, d);
565 }
566 
mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key * a,struct mlx5e_encap_key * b)567 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
568 					   struct mlx5e_encap_key *b)
569 {
570 	return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
571 		a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
572 }
573 
cmp_decap_info(struct mlx5e_decap_key * a,struct mlx5e_decap_key * b)574 static int cmp_decap_info(struct mlx5e_decap_key *a,
575 			  struct mlx5e_decap_key *b)
576 {
577 	return memcmp(&a->key, &b->key, sizeof(b->key));
578 }
579 
hash_encap_info(struct mlx5e_encap_key * key)580 static int hash_encap_info(struct mlx5e_encap_key *key)
581 {
582 	return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
583 		     key->tc_tunnel->tunnel_type);
584 }
585 
hash_decap_info(struct mlx5e_decap_key * key)586 static int hash_decap_info(struct mlx5e_decap_key *key)
587 {
588 	return jhash(&key->key, sizeof(key->key), 0);
589 }
590 
mlx5e_encap_take(struct mlx5e_encap_entry * e)591 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
592 {
593 	return refcount_inc_not_zero(&e->refcnt);
594 }
595 
mlx5e_decap_take(struct mlx5e_decap_entry * e)596 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
597 {
598 	return refcount_inc_not_zero(&e->refcnt);
599 }
600 
601 static struct mlx5e_encap_entry *
mlx5e_encap_get(struct mlx5e_priv * priv,struct mlx5e_encap_key * key,uintptr_t hash_key)602 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
603 		uintptr_t hash_key)
604 {
605 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
606 	struct mlx5e_encap_key e_key;
607 	struct mlx5e_encap_entry *e;
608 
609 	hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
610 				   encap_hlist, hash_key) {
611 		e_key.ip_tun_key = &e->tun_info->key;
612 		e_key.tc_tunnel = e->tunnel;
613 		if (e->tunnel->encap_info_equal(&e_key, key) &&
614 		    mlx5e_encap_take(e))
615 			return e;
616 	}
617 
618 	return NULL;
619 }
620 
621 static struct mlx5e_decap_entry *
mlx5e_decap_get(struct mlx5e_priv * priv,struct mlx5e_decap_key * key,uintptr_t hash_key)622 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
623 		uintptr_t hash_key)
624 {
625 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
626 	struct mlx5e_decap_key r_key;
627 	struct mlx5e_decap_entry *e;
628 
629 	hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
630 				   hlist, hash_key) {
631 		r_key = e->key;
632 		if (!cmp_decap_info(&r_key, key) &&
633 		    mlx5e_decap_take(e))
634 			return e;
635 	}
636 	return NULL;
637 }
638 
mlx5e_dup_tun_info(const struct ip_tunnel_info * tun_info)639 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
640 {
641 	size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
642 
643 	return kmemdup(tun_info, tun_size, GFP_KERNEL);
644 }
645 
is_duplicated_encap_entry(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,int out_index,struct mlx5e_encap_entry * e,struct netlink_ext_ack * extack)646 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
647 				      struct mlx5e_tc_flow *flow,
648 				      int out_index,
649 				      struct mlx5e_encap_entry *e,
650 				      struct netlink_ext_ack *extack)
651 {
652 	int i;
653 
654 	for (i = 0; i < out_index; i++) {
655 		if (flow->encaps[i].e != e)
656 			continue;
657 		NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
658 		netdev_err(priv->netdev, "can't duplicate encap action\n");
659 		return true;
660 	}
661 
662 	return false;
663 }
664 
mlx5e_set_vf_tunnel(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,struct net_device * out_dev,int route_dev_ifindex,int out_index)665 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
666 			       struct mlx5_flow_attr *attr,
667 			       struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
668 			       struct net_device *out_dev,
669 			       int route_dev_ifindex,
670 			       int out_index)
671 {
672 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
673 	struct net_device *route_dev;
674 	u16 vport_num;
675 	int err = 0;
676 	u32 data;
677 
678 	route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
679 
680 	if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
681 	    !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
682 		goto out;
683 
684 	err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
685 	if (err)
686 		goto out;
687 
688 	attr->dest_chain = 0;
689 	attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
690 	esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
691 	data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
692 						       vport_num);
693 	err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
694 						   MLX5_FLOW_NAMESPACE_FDB,
695 						   VPORT_TO_REG, data);
696 	if (err >= 0) {
697 		esw_attr->dests[out_index].src_port_rewrite_act_id = err;
698 		err = 0;
699 	}
700 
701 out:
702 	if (route_dev)
703 		dev_put(route_dev);
704 	return err;
705 }
706 
mlx5e_update_vf_tunnel(struct mlx5_eswitch * esw,struct mlx5_esw_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,struct net_device * out_dev,int route_dev_ifindex,int out_index)707 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
708 				  struct mlx5_esw_flow_attr *attr,
709 				  struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
710 				  struct net_device *out_dev,
711 				  int route_dev_ifindex,
712 				  int out_index)
713 {
714 	int act_id = attr->dests[out_index].src_port_rewrite_act_id;
715 	struct net_device *route_dev;
716 	u16 vport_num;
717 	int err = 0;
718 	u32 data;
719 
720 	route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
721 
722 	if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
723 	    !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
724 		err = -ENODEV;
725 		goto out;
726 	}
727 
728 	err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
729 	if (err)
730 		goto out;
731 
732 	data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
733 						       vport_num);
734 	mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
735 
736 out:
737 	if (route_dev)
738 		dev_put(route_dev);
739 	return err;
740 }
741 
mlx5e_route_tbl_get_last_update(struct mlx5e_priv * priv)742 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
743 {
744 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
745 	struct mlx5_rep_uplink_priv *uplink_priv;
746 	struct mlx5e_rep_priv *uplink_rpriv;
747 	struct mlx5e_tc_tun_encap *encap;
748 	unsigned int ret;
749 
750 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
751 	uplink_priv = &uplink_rpriv->uplink_priv;
752 	encap = uplink_priv->encap;
753 
754 	spin_lock_bh(&encap->route_lock);
755 	ret = encap->route_tbl_last_update;
756 	spin_unlock_bh(&encap->route_lock);
757 	return ret;
758 }
759 
760 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
761 				    struct mlx5e_tc_flow *flow,
762 				    struct mlx5_flow_attr *attr,
763 				    struct mlx5e_encap_entry *e,
764 				    bool new_encap_entry,
765 				    unsigned long tbl_time_before,
766 				    int out_index);
767 
mlx5e_attach_encap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct net_device * mirred_dev,int out_index,struct netlink_ext_ack * extack,struct net_device ** encap_dev)768 int mlx5e_attach_encap(struct mlx5e_priv *priv,
769 		       struct mlx5e_tc_flow *flow,
770 		       struct mlx5_flow_attr *attr,
771 		       struct net_device *mirred_dev,
772 		       int out_index,
773 		       struct netlink_ext_ack *extack,
774 		       struct net_device **encap_dev)
775 {
776 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
777 	struct mlx5e_tc_flow_parse_attr *parse_attr;
778 	const struct ip_tunnel_info *tun_info;
779 	const struct mlx5e_mpls_info *mpls_info;
780 	unsigned long tbl_time_before = 0;
781 	struct mlx5e_encap_entry *e;
782 	struct mlx5e_encap_key key;
783 	bool entry_created = false;
784 	unsigned short family;
785 	uintptr_t hash_key;
786 	int err = 0;
787 
788 	parse_attr = attr->parse_attr;
789 	tun_info = parse_attr->tun_info[out_index];
790 	mpls_info = &parse_attr->mpls_info[out_index];
791 	family = ip_tunnel_info_af(tun_info);
792 	key.ip_tun_key = &tun_info->key;
793 	key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
794 	if (!key.tc_tunnel) {
795 		NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
796 		return -EOPNOTSUPP;
797 	}
798 
799 	hash_key = hash_encap_info(&key);
800 
801 	mutex_lock(&esw->offloads.encap_tbl_lock);
802 	e = mlx5e_encap_get(priv, &key, hash_key);
803 
804 	/* must verify if encap is valid or not */
805 	if (e) {
806 		/* Check that entry was not already attached to this flow */
807 		if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
808 			err = -EOPNOTSUPP;
809 			goto out_err;
810 		}
811 
812 		mutex_unlock(&esw->offloads.encap_tbl_lock);
813 		wait_for_completion(&e->res_ready);
814 
815 		/* Protect against concurrent neigh update. */
816 		mutex_lock(&esw->offloads.encap_tbl_lock);
817 		if (e->compl_result < 0) {
818 			err = -EREMOTEIO;
819 			goto out_err;
820 		}
821 		goto attach_flow;
822 	}
823 
824 	e = kzalloc(sizeof(*e), GFP_KERNEL);
825 	if (!e) {
826 		err = -ENOMEM;
827 		goto out_err;
828 	}
829 
830 	refcount_set(&e->refcnt, 1);
831 	init_completion(&e->res_ready);
832 	entry_created = true;
833 	INIT_LIST_HEAD(&e->route_list);
834 
835 	tun_info = mlx5e_dup_tun_info(tun_info);
836 	if (!tun_info) {
837 		err = -ENOMEM;
838 		goto out_err_init;
839 	}
840 	e->tun_info = tun_info;
841 	memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info));
842 	err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
843 	if (err)
844 		goto out_err_init;
845 
846 	INIT_LIST_HEAD(&e->flows);
847 	hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
848 	tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
849 	mutex_unlock(&esw->offloads.encap_tbl_lock);
850 
851 	if (family == AF_INET)
852 		err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
853 	else if (family == AF_INET6)
854 		err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
855 
856 	/* Protect against concurrent neigh update. */
857 	mutex_lock(&esw->offloads.encap_tbl_lock);
858 	complete_all(&e->res_ready);
859 	if (err) {
860 		e->compl_result = err;
861 		goto out_err;
862 	}
863 	e->compl_result = 1;
864 
865 attach_flow:
866 	err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created,
867 				       tbl_time_before, out_index);
868 	if (err)
869 		goto out_err;
870 
871 	err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index);
872 	if (err == -EOPNOTSUPP) {
873 		/* If device doesn't support int port offload,
874 		 * redirect to uplink vport.
875 		 */
876 		mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n");
877 		err = 0;
878 	} else if (err) {
879 		goto out_err;
880 	}
881 
882 	flow->encaps[out_index].e = e;
883 	list_add(&flow->encaps[out_index].list, &e->flows);
884 	flow->encaps[out_index].index = out_index;
885 	*encap_dev = e->out_dev;
886 	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
887 		attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
888 		attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
889 	} else {
890 		flow_flag_set(flow, SLOW);
891 	}
892 	mutex_unlock(&esw->offloads.encap_tbl_lock);
893 
894 	return err;
895 
896 out_err:
897 	mutex_unlock(&esw->offloads.encap_tbl_lock);
898 	if (e)
899 		mlx5e_encap_put(priv, e);
900 	return err;
901 
902 out_err_init:
903 	mutex_unlock(&esw->offloads.encap_tbl_lock);
904 	kfree(tun_info);
905 	kfree(e);
906 	return err;
907 }
908 
mlx5e_attach_decap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)909 int mlx5e_attach_decap(struct mlx5e_priv *priv,
910 		       struct mlx5e_tc_flow *flow,
911 		       struct netlink_ext_ack *extack)
912 {
913 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
914 	struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
915 	struct mlx5_pkt_reformat_params reformat_params;
916 	struct mlx5e_decap_entry *d;
917 	struct mlx5e_decap_key key;
918 	uintptr_t hash_key;
919 	int err = 0;
920 
921 	if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
922 		NL_SET_ERR_MSG_MOD(extack,
923 				   "encap header larger than max supported");
924 		return -EOPNOTSUPP;
925 	}
926 
927 	key.key = attr->eth;
928 	hash_key = hash_decap_info(&key);
929 	mutex_lock(&esw->offloads.decap_tbl_lock);
930 	d = mlx5e_decap_get(priv, &key, hash_key);
931 	if (d) {
932 		mutex_unlock(&esw->offloads.decap_tbl_lock);
933 		wait_for_completion(&d->res_ready);
934 		mutex_lock(&esw->offloads.decap_tbl_lock);
935 		if (d->compl_result) {
936 			err = -EREMOTEIO;
937 			goto out_free;
938 		}
939 		goto found;
940 	}
941 
942 	d = kzalloc(sizeof(*d), GFP_KERNEL);
943 	if (!d) {
944 		err = -ENOMEM;
945 		goto out_err;
946 	}
947 
948 	d->key = key;
949 	refcount_set(&d->refcnt, 1);
950 	init_completion(&d->res_ready);
951 	INIT_LIST_HEAD(&d->flows);
952 	hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
953 	mutex_unlock(&esw->offloads.decap_tbl_lock);
954 
955 	memset(&reformat_params, 0, sizeof(reformat_params));
956 	reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
957 	reformat_params.size = sizeof(attr->eth);
958 	reformat_params.data = &attr->eth;
959 	d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
960 						     &reformat_params,
961 						     MLX5_FLOW_NAMESPACE_FDB);
962 	if (IS_ERR(d->pkt_reformat)) {
963 		err = PTR_ERR(d->pkt_reformat);
964 		d->compl_result = err;
965 	}
966 	mutex_lock(&esw->offloads.decap_tbl_lock);
967 	complete_all(&d->res_ready);
968 	if (err)
969 		goto out_free;
970 
971 found:
972 	flow->decap_reformat = d;
973 	attr->decap_pkt_reformat = d->pkt_reformat;
974 	list_add(&flow->l3_to_l2_reformat, &d->flows);
975 	mutex_unlock(&esw->offloads.decap_tbl_lock);
976 	return 0;
977 
978 out_free:
979 	mutex_unlock(&esw->offloads.decap_tbl_lock);
980 	mlx5e_decap_put(priv, d);
981 	return err;
982 
983 out_err:
984 	mutex_unlock(&esw->offloads.decap_tbl_lock);
985 	return err;
986 }
987 
cmp_route_info(struct mlx5e_route_key * a,struct mlx5e_route_key * b)988 static int cmp_route_info(struct mlx5e_route_key *a,
989 			  struct mlx5e_route_key *b)
990 {
991 	if (a->ip_version == 4 && b->ip_version == 4)
992 		return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
993 			      sizeof(a->endpoint_ip.v4));
994 	else if (a->ip_version == 6 && b->ip_version == 6)
995 		return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
996 			      sizeof(a->endpoint_ip.v6));
997 	return 1;
998 }
999 
hash_route_info(struct mlx5e_route_key * key)1000 static u32 hash_route_info(struct mlx5e_route_key *key)
1001 {
1002 	if (key->ip_version == 4)
1003 		return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
1004 	return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
1005 }
1006 
mlx5e_route_dealloc(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1007 static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
1008 				struct mlx5e_route_entry *r)
1009 {
1010 	WARN_ON(!list_empty(&r->decap_flows));
1011 	WARN_ON(!list_empty(&r->encap_entries));
1012 
1013 	kfree_rcu(r, rcu);
1014 }
1015 
mlx5e_route_put(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1016 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1017 {
1018 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1019 
1020 	if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
1021 		return;
1022 
1023 	hash_del_rcu(&r->hlist);
1024 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1025 
1026 	mlx5e_route_dealloc(priv, r);
1027 }
1028 
mlx5e_route_put_locked(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1029 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1030 {
1031 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1032 
1033 	lockdep_assert_held(&esw->offloads.encap_tbl_lock);
1034 
1035 	if (!refcount_dec_and_test(&r->refcnt))
1036 		return;
1037 	hash_del_rcu(&r->hlist);
1038 	mlx5e_route_dealloc(priv, r);
1039 }
1040 
1041 static struct mlx5e_route_entry *
mlx5e_route_get(struct mlx5e_tc_tun_encap * encap,struct mlx5e_route_key * key,u32 hash_key)1042 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
1043 		u32 hash_key)
1044 {
1045 	struct mlx5e_route_key r_key;
1046 	struct mlx5e_route_entry *r;
1047 
1048 	hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
1049 		r_key = r->key;
1050 		if (!cmp_route_info(&r_key, key) &&
1051 		    refcount_inc_not_zero(&r->refcnt))
1052 			return r;
1053 	}
1054 	return NULL;
1055 }
1056 
1057 static struct mlx5e_route_entry *
mlx5e_route_get_create(struct mlx5e_priv * priv,struct mlx5e_route_key * key,int tunnel_dev_index,unsigned long * route_tbl_change_time)1058 mlx5e_route_get_create(struct mlx5e_priv *priv,
1059 		       struct mlx5e_route_key *key,
1060 		       int tunnel_dev_index,
1061 		       unsigned long *route_tbl_change_time)
1062 {
1063 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1064 	struct mlx5_rep_uplink_priv *uplink_priv;
1065 	struct mlx5e_rep_priv *uplink_rpriv;
1066 	struct mlx5e_tc_tun_encap *encap;
1067 	struct mlx5e_route_entry *r;
1068 	u32 hash_key;
1069 
1070 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1071 	uplink_priv = &uplink_rpriv->uplink_priv;
1072 	encap = uplink_priv->encap;
1073 
1074 	hash_key = hash_route_info(key);
1075 	spin_lock_bh(&encap->route_lock);
1076 	r = mlx5e_route_get(encap, key, hash_key);
1077 	spin_unlock_bh(&encap->route_lock);
1078 	if (r) {
1079 		if (!mlx5e_route_entry_valid(r)) {
1080 			mlx5e_route_put_locked(priv, r);
1081 			return ERR_PTR(-EINVAL);
1082 		}
1083 		return r;
1084 	}
1085 
1086 	r = kzalloc(sizeof(*r), GFP_KERNEL);
1087 	if (!r)
1088 		return ERR_PTR(-ENOMEM);
1089 
1090 	r->key = *key;
1091 	r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1092 	r->tunnel_dev_index = tunnel_dev_index;
1093 	refcount_set(&r->refcnt, 1);
1094 	INIT_LIST_HEAD(&r->decap_flows);
1095 	INIT_LIST_HEAD(&r->encap_entries);
1096 
1097 	spin_lock_bh(&encap->route_lock);
1098 	*route_tbl_change_time = encap->route_tbl_last_update;
1099 	hash_add(encap->route_tbl, &r->hlist, hash_key);
1100 	spin_unlock_bh(&encap->route_lock);
1101 
1102 	return r;
1103 }
1104 
1105 static struct mlx5e_route_entry *
mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap * encap,struct mlx5e_route_key * key)1106 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1107 {
1108 	u32 hash_key = hash_route_info(key);
1109 	struct mlx5e_route_entry *r;
1110 
1111 	spin_lock_bh(&encap->route_lock);
1112 	encap->route_tbl_last_update = jiffies;
1113 	r = mlx5e_route_get(encap, key, hash_key);
1114 	spin_unlock_bh(&encap->route_lock);
1115 
1116 	return r;
1117 }
1118 
1119 struct mlx5e_tc_fib_event_data {
1120 	struct work_struct work;
1121 	unsigned long event;
1122 	struct mlx5e_route_entry *r;
1123 	struct net_device *ul_dev;
1124 };
1125 
1126 static void mlx5e_tc_fib_event_work(struct work_struct *work);
1127 static struct mlx5e_tc_fib_event_data *
mlx5e_tc_init_fib_work(unsigned long event,struct net_device * ul_dev,gfp_t flags)1128 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1129 {
1130 	struct mlx5e_tc_fib_event_data *fib_work;
1131 
1132 	fib_work = kzalloc(sizeof(*fib_work), flags);
1133 	if (WARN_ON(!fib_work))
1134 		return NULL;
1135 
1136 	INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1137 	fib_work->event = event;
1138 	fib_work->ul_dev = ul_dev;
1139 
1140 	return fib_work;
1141 }
1142 
1143 static int
mlx5e_route_enqueue_update(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,unsigned long event)1144 mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1145 			   struct mlx5e_route_entry *r,
1146 			   unsigned long event)
1147 {
1148 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1149 	struct mlx5e_tc_fib_event_data *fib_work;
1150 	struct mlx5e_rep_priv *uplink_rpriv;
1151 	struct net_device *ul_dev;
1152 
1153 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1154 	ul_dev = uplink_rpriv->netdev;
1155 
1156 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1157 	if (!fib_work)
1158 		return -ENOMEM;
1159 
1160 	dev_hold(ul_dev);
1161 	refcount_inc(&r->refcnt);
1162 	fib_work->r = r;
1163 	queue_work(priv->wq, &fib_work->work);
1164 
1165 	return 0;
1166 }
1167 
mlx5e_attach_decap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1168 int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1169 			     struct mlx5e_tc_flow *flow)
1170 {
1171 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1172 	unsigned long tbl_time_before, tbl_time_after;
1173 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1174 	struct mlx5_flow_attr *attr = flow->attr;
1175 	struct mlx5_esw_flow_attr *esw_attr;
1176 	struct mlx5e_route_entry *r;
1177 	struct mlx5e_route_key key;
1178 	int err = 0;
1179 
1180 	esw_attr = attr->esw_attr;
1181 	parse_attr = attr->parse_attr;
1182 	mutex_lock(&esw->offloads.encap_tbl_lock);
1183 	if (!esw_attr->rx_tun_attr)
1184 		goto out;
1185 
1186 	tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1187 	tbl_time_after = tbl_time_before;
1188 	err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev);
1189 	if (err || !esw_attr->rx_tun_attr->decap_vport)
1190 		goto out;
1191 
1192 	key.ip_version = attr->tun_ip_version;
1193 	if (key.ip_version == 4)
1194 		key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1195 	else
1196 		key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1197 
1198 	r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
1199 				   &tbl_time_after);
1200 	if (IS_ERR(r)) {
1201 		err = PTR_ERR(r);
1202 		goto out;
1203 	}
1204 	/* Routing changed concurrently. FIB event handler might have missed new
1205 	 * entry, schedule update.
1206 	 */
1207 	if (tbl_time_before != tbl_time_after) {
1208 		err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1209 		if (err) {
1210 			mlx5e_route_put_locked(priv, r);
1211 			goto out;
1212 		}
1213 	}
1214 
1215 	flow->decap_route = r;
1216 	list_add(&flow->decap_routes, &r->decap_flows);
1217 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1218 	return 0;
1219 
1220 out:
1221 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1222 	return err;
1223 }
1224 
mlx5e_attach_encap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct mlx5e_encap_entry * e,bool new_encap_entry,unsigned long tbl_time_before,int out_index)1225 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1226 				    struct mlx5e_tc_flow *flow,
1227 				    struct mlx5_flow_attr *attr,
1228 				    struct mlx5e_encap_entry *e,
1229 				    bool new_encap_entry,
1230 				    unsigned long tbl_time_before,
1231 				    int out_index)
1232 {
1233 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1234 	unsigned long tbl_time_after = tbl_time_before;
1235 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1236 	const struct ip_tunnel_info *tun_info;
1237 	struct mlx5_esw_flow_attr *esw_attr;
1238 	struct mlx5e_route_entry *r;
1239 	struct mlx5e_route_key key;
1240 	unsigned short family;
1241 	int err = 0;
1242 
1243 	esw_attr = attr->esw_attr;
1244 	parse_attr = attr->parse_attr;
1245 	tun_info = parse_attr->tun_info[out_index];
1246 	family = ip_tunnel_info_af(tun_info);
1247 
1248 	if (family == AF_INET) {
1249 		key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1250 		key.ip_version = 4;
1251 	} else if (family == AF_INET6) {
1252 		key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1253 		key.ip_version = 6;
1254 	}
1255 
1256 	err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
1257 				  e->route_dev_ifindex, out_index);
1258 	if (err || !(esw_attr->dests[out_index].flags &
1259 		     MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1260 		return err;
1261 
1262 	r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
1263 				   &tbl_time_after);
1264 	if (IS_ERR(r))
1265 		return PTR_ERR(r);
1266 	/* Routing changed concurrently. FIB event handler might have missed new
1267 	 * entry, schedule update.
1268 	 */
1269 	if (tbl_time_before != tbl_time_after) {
1270 		err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1271 		if (err) {
1272 			mlx5e_route_put_locked(priv, r);
1273 			return err;
1274 		}
1275 	}
1276 
1277 	flow->encap_routes[out_index].r = r;
1278 	if (new_encap_entry)
1279 		list_add(&e->route_list, &r->encap_entries);
1280 	flow->encap_routes[out_index].index = out_index;
1281 	return 0;
1282 }
1283 
mlx5e_detach_decap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1284 void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1285 			      struct mlx5e_tc_flow *flow)
1286 {
1287 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1288 	struct mlx5e_route_entry *r = flow->decap_route;
1289 
1290 	if (!r)
1291 		return;
1292 
1293 	mutex_lock(&esw->offloads.encap_tbl_lock);
1294 	list_del(&flow->decap_routes);
1295 	flow->decap_route = NULL;
1296 
1297 	if (!refcount_dec_and_test(&r->refcnt)) {
1298 		mutex_unlock(&esw->offloads.encap_tbl_lock);
1299 		return;
1300 	}
1301 	hash_del_rcu(&r->hlist);
1302 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1303 
1304 	mlx5e_route_dealloc(priv, r);
1305 }
1306 
mlx5e_detach_encap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,int out_index)1307 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1308 				     struct mlx5e_tc_flow *flow,
1309 				     int out_index)
1310 {
1311 	struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1312 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1313 	struct mlx5e_encap_entry *e, *tmp;
1314 
1315 	if (!r)
1316 		return;
1317 
1318 	mutex_lock(&esw->offloads.encap_tbl_lock);
1319 	flow->encap_routes[out_index].r = NULL;
1320 
1321 	if (!refcount_dec_and_test(&r->refcnt)) {
1322 		mutex_unlock(&esw->offloads.encap_tbl_lock);
1323 		return;
1324 	}
1325 	list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1326 		list_del_init(&e->route_list);
1327 	hash_del_rcu(&r->hlist);
1328 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1329 
1330 	mlx5e_route_dealloc(priv, r);
1331 }
1332 
mlx5e_invalidate_encap(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * encap_flows)1333 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1334 				   struct mlx5e_encap_entry *e,
1335 				   struct list_head *encap_flows)
1336 {
1337 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1338 	struct mlx5e_tc_flow *flow;
1339 
1340 	list_for_each_entry(flow, encap_flows, tmp_list) {
1341 		struct mlx5_flow_attr *attr = flow->attr;
1342 		struct mlx5_esw_flow_attr *esw_attr;
1343 
1344 		if (!mlx5e_is_offloaded_flow(flow))
1345 			continue;
1346 		esw_attr = attr->esw_attr;
1347 
1348 		if (flow_flag_test(flow, SLOW))
1349 			mlx5e_tc_unoffload_from_slow_path(esw, flow);
1350 		else
1351 			mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1352 		mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
1353 		attr->modify_hdr = NULL;
1354 
1355 		esw_attr->dests[flow->tmp_entry_index].flags &=
1356 			~MLX5_ESW_DEST_ENCAP_VALID;
1357 		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1358 	}
1359 
1360 	e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1361 	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1362 		e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1363 		mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1364 		e->pkt_reformat = NULL;
1365 	}
1366 }
1367 
mlx5e_reoffload_encap(struct mlx5e_priv * priv,struct net_device * tunnel_dev,struct mlx5e_encap_entry * e,struct list_head * encap_flows)1368 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1369 				  struct net_device *tunnel_dev,
1370 				  struct mlx5e_encap_entry *e,
1371 				  struct list_head *encap_flows)
1372 {
1373 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1374 	struct mlx5e_tc_flow *flow;
1375 	int err;
1376 
1377 	err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
1378 		mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
1379 		mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
1380 	if (err)
1381 		mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1382 	e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1383 
1384 	list_for_each_entry(flow, encap_flows, tmp_list) {
1385 		struct mlx5e_tc_flow_parse_attr *parse_attr;
1386 		struct mlx5_esw_flow_attr *esw_attr;
1387 		struct mlx5_flow_handle *rule;
1388 		struct mlx5_flow_attr *attr;
1389 		struct mlx5_flow_spec *spec;
1390 
1391 		if (flow_flag_test(flow, FAILED))
1392 			continue;
1393 
1394 		spec = &flow->attr->parse_attr->spec;
1395 
1396 		attr = mlx5e_tc_get_encap_attr(flow);
1397 		esw_attr = attr->esw_attr;
1398 		parse_attr = attr->parse_attr;
1399 
1400 		err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
1401 					     e->out_dev, e->route_dev_ifindex,
1402 					     flow->tmp_entry_index);
1403 		if (err) {
1404 			mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1405 			continue;
1406 		}
1407 
1408 		err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
1409 		if (err) {
1410 			mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1411 				       err);
1412 			continue;
1413 		}
1414 
1415 		if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1416 			esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1417 			esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1418 			if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1419 				goto offload_to_slow_path;
1420 
1421 			err = mlx5e_tc_offload_flow_post_acts(flow);
1422 			if (err) {
1423 				mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
1424 					       err);
1425 				goto offload_to_slow_path;
1426 			}
1427 
1428 			/* update from slow path rule to encap rule */
1429 			rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
1430 			if (IS_ERR(rule)) {
1431 				mlx5e_tc_unoffload_flow_post_acts(flow);
1432 				err = PTR_ERR(rule);
1433 				mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1434 					       err);
1435 			} else {
1436 				flow->rule[0] = rule;
1437 			}
1438 		} else {
1439 offload_to_slow_path:
1440 			rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1441 			/* mark the flow's encap dest as non-valid */
1442 			esw_attr->dests[flow->tmp_entry_index].flags &=
1443 				~MLX5_ESW_DEST_ENCAP_VALID;
1444 
1445 			if (IS_ERR(rule)) {
1446 				err = PTR_ERR(rule);
1447 				mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1448 					       err);
1449 			} else {
1450 				flow->rule[0] = rule;
1451 			}
1452 		}
1453 		flow_flag_set(flow, OFFLOADED);
1454 	}
1455 }
1456 
mlx5e_update_route_encaps(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,struct list_head * flow_list,bool replace)1457 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1458 				     struct mlx5e_route_entry *r,
1459 				     struct list_head *flow_list,
1460 				     bool replace)
1461 {
1462 	struct net_device *tunnel_dev;
1463 	struct mlx5e_encap_entry *e;
1464 
1465 	tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1466 	if (!tunnel_dev)
1467 		return -ENODEV;
1468 
1469 	list_for_each_entry(e, &r->encap_entries, route_list) {
1470 		LIST_HEAD(encap_flows);
1471 
1472 		mlx5e_take_all_encap_flows(e, &encap_flows);
1473 		if (list_empty(&encap_flows))
1474 			continue;
1475 
1476 		if (mlx5e_route_entry_valid(r))
1477 			mlx5e_invalidate_encap(priv, e, &encap_flows);
1478 
1479 		if (!replace) {
1480 			list_splice(&encap_flows, flow_list);
1481 			continue;
1482 		}
1483 
1484 		mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
1485 		list_splice(&encap_flows, flow_list);
1486 	}
1487 
1488 	return 0;
1489 }
1490 
mlx5e_unoffload_flow_list(struct mlx5e_priv * priv,struct list_head * flow_list)1491 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1492 				      struct list_head *flow_list)
1493 {
1494 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1495 	struct mlx5e_tc_flow *flow;
1496 
1497 	list_for_each_entry(flow, flow_list, tmp_list)
1498 		if (mlx5e_is_offloaded_flow(flow))
1499 			mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1500 }
1501 
mlx5e_reoffload_decap(struct mlx5e_priv * priv,struct list_head * decap_flows)1502 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1503 				  struct list_head *decap_flows)
1504 {
1505 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1506 	struct mlx5e_tc_flow *flow;
1507 
1508 	list_for_each_entry(flow, decap_flows, tmp_list) {
1509 		struct mlx5e_tc_flow_parse_attr *parse_attr;
1510 		struct mlx5_flow_attr *attr = flow->attr;
1511 		struct mlx5_flow_handle *rule;
1512 		struct mlx5_flow_spec *spec;
1513 		int err;
1514 
1515 		if (flow_flag_test(flow, FAILED))
1516 			continue;
1517 
1518 		parse_attr = attr->parse_attr;
1519 		spec = &parse_attr->spec;
1520 		err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev);
1521 		if (err) {
1522 			mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1523 				       err);
1524 			continue;
1525 		}
1526 
1527 		rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1528 		if (IS_ERR(rule)) {
1529 			err = PTR_ERR(rule);
1530 			mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1531 				       err);
1532 		} else {
1533 			flow->rule[0] = rule;
1534 			flow_flag_set(flow, OFFLOADED);
1535 		}
1536 	}
1537 }
1538 
mlx5e_update_route_decap_flows(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,struct list_head * flow_list,bool replace)1539 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1540 					  struct mlx5e_route_entry *r,
1541 					  struct list_head *flow_list,
1542 					  bool replace)
1543 {
1544 	struct net_device *tunnel_dev;
1545 	LIST_HEAD(decap_flows);
1546 
1547 	tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1548 	if (!tunnel_dev)
1549 		return -ENODEV;
1550 
1551 	mlx5e_take_all_route_decap_flows(r, &decap_flows);
1552 	if (mlx5e_route_entry_valid(r))
1553 		mlx5e_unoffload_flow_list(priv, &decap_flows);
1554 	if (replace)
1555 		mlx5e_reoffload_decap(priv, &decap_flows);
1556 
1557 	list_splice(&decap_flows, flow_list);
1558 
1559 	return 0;
1560 }
1561 
mlx5e_tc_fib_event_work(struct work_struct * work)1562 static void mlx5e_tc_fib_event_work(struct work_struct *work)
1563 {
1564 	struct mlx5e_tc_fib_event_data *event_data =
1565 		container_of(work, struct mlx5e_tc_fib_event_data, work);
1566 	struct net_device *ul_dev = event_data->ul_dev;
1567 	struct mlx5e_priv *priv = netdev_priv(ul_dev);
1568 	struct mlx5e_route_entry *r = event_data->r;
1569 	struct mlx5_eswitch *esw;
1570 	LIST_HEAD(flow_list);
1571 	bool replace;
1572 	int err;
1573 
1574 	/* sync with concurrent neigh updates */
1575 	rtnl_lock();
1576 	esw = priv->mdev->priv.eswitch;
1577 	mutex_lock(&esw->offloads.encap_tbl_lock);
1578 	replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1579 
1580 	if (!mlx5e_route_entry_valid(r) && !replace)
1581 		goto out;
1582 
1583 	err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
1584 	if (err)
1585 		mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1586 			       err);
1587 
1588 	err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
1589 	if (err)
1590 		mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1591 			       err);
1592 
1593 	if (replace)
1594 		r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1595 out:
1596 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1597 	rtnl_unlock();
1598 
1599 	mlx5e_put_flow_list(priv, &flow_list);
1600 	mlx5e_route_put(priv, event_data->r);
1601 	dev_put(event_data->ul_dev);
1602 	kfree(event_data);
1603 }
1604 
1605 static struct mlx5e_tc_fib_event_data *
mlx5e_init_fib_work_ipv4(struct mlx5e_priv * priv,struct net_device * ul_dev,struct mlx5e_tc_tun_encap * encap,unsigned long event,struct fib_notifier_info * info)1606 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1607 			 struct net_device *ul_dev,
1608 			 struct mlx5e_tc_tun_encap *encap,
1609 			 unsigned long event,
1610 			 struct fib_notifier_info *info)
1611 {
1612 	struct fib_entry_notifier_info *fen_info;
1613 	struct mlx5e_tc_fib_event_data *fib_work;
1614 	struct mlx5e_route_entry *r;
1615 	struct mlx5e_route_key key;
1616 	struct net_device *fib_dev;
1617 
1618 	fen_info = container_of(info, struct fib_entry_notifier_info, info);
1619 	if (fen_info->fi->nh)
1620 		return NULL;
1621 	fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
1622 	if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1623 	    fen_info->dst_len != 32)
1624 		return NULL;
1625 
1626 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1627 	if (!fib_work)
1628 		return ERR_PTR(-ENOMEM);
1629 
1630 	key.endpoint_ip.v4 = htonl(fen_info->dst);
1631 	key.ip_version = 4;
1632 
1633 	/* Can't fail after this point because releasing reference to r
1634 	 * requires obtaining sleeping mutex which we can't do in atomic
1635 	 * context.
1636 	 */
1637 	r = mlx5e_route_lookup_for_update(encap, &key);
1638 	if (!r)
1639 		goto out;
1640 	fib_work->r = r;
1641 	dev_hold(ul_dev);
1642 
1643 	return fib_work;
1644 
1645 out:
1646 	kfree(fib_work);
1647 	return NULL;
1648 }
1649 
1650 static struct mlx5e_tc_fib_event_data *
mlx5e_init_fib_work_ipv6(struct mlx5e_priv * priv,struct net_device * ul_dev,struct mlx5e_tc_tun_encap * encap,unsigned long event,struct fib_notifier_info * info)1651 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1652 			 struct net_device *ul_dev,
1653 			 struct mlx5e_tc_tun_encap *encap,
1654 			 unsigned long event,
1655 			 struct fib_notifier_info *info)
1656 {
1657 	struct fib6_entry_notifier_info *fen_info;
1658 	struct mlx5e_tc_fib_event_data *fib_work;
1659 	struct mlx5e_route_entry *r;
1660 	struct mlx5e_route_key key;
1661 	struct net_device *fib_dev;
1662 
1663 	fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1664 	fib_dev = fib6_info_nh_dev(fen_info->rt);
1665 	if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1666 	    fen_info->rt->fib6_dst.plen != 128)
1667 		return NULL;
1668 
1669 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1670 	if (!fib_work)
1671 		return ERR_PTR(-ENOMEM);
1672 
1673 	memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1674 	       sizeof(fen_info->rt->fib6_dst.addr));
1675 	key.ip_version = 6;
1676 
1677 	/* Can't fail after this point because releasing reference to r
1678 	 * requires obtaining sleeping mutex which we can't do in atomic
1679 	 * context.
1680 	 */
1681 	r = mlx5e_route_lookup_for_update(encap, &key);
1682 	if (!r)
1683 		goto out;
1684 	fib_work->r = r;
1685 	dev_hold(ul_dev);
1686 
1687 	return fib_work;
1688 
1689 out:
1690 	kfree(fib_work);
1691 	return NULL;
1692 }
1693 
mlx5e_tc_tun_fib_event(struct notifier_block * nb,unsigned long event,void * ptr)1694 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1695 {
1696 	struct mlx5e_tc_fib_event_data *fib_work;
1697 	struct fib_notifier_info *info = ptr;
1698 	struct mlx5e_tc_tun_encap *encap;
1699 	struct net_device *ul_dev;
1700 	struct mlx5e_priv *priv;
1701 
1702 	encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1703 	priv = encap->priv;
1704 	ul_dev = priv->netdev;
1705 	priv = netdev_priv(ul_dev);
1706 
1707 	switch (event) {
1708 	case FIB_EVENT_ENTRY_REPLACE:
1709 	case FIB_EVENT_ENTRY_DEL:
1710 		if (info->family == AF_INET)
1711 			fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1712 		else if (info->family == AF_INET6)
1713 			fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1714 		else
1715 			return NOTIFY_DONE;
1716 
1717 		if (!IS_ERR_OR_NULL(fib_work)) {
1718 			queue_work(priv->wq, &fib_work->work);
1719 		} else if (IS_ERR(fib_work)) {
1720 			NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1721 			mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1722 				       PTR_ERR(fib_work));
1723 		}
1724 
1725 		break;
1726 	default:
1727 		return NOTIFY_DONE;
1728 	}
1729 
1730 	return NOTIFY_DONE;
1731 }
1732 
mlx5e_tc_tun_init(struct mlx5e_priv * priv)1733 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1734 {
1735 	struct mlx5e_tc_tun_encap *encap;
1736 	int err;
1737 
1738 	encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
1739 	if (!encap)
1740 		return ERR_PTR(-ENOMEM);
1741 
1742 	encap->priv = priv;
1743 	encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1744 	spin_lock_init(&encap->route_lock);
1745 	hash_init(encap->route_tbl);
1746 	err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
1747 				    NULL, NULL);
1748 	if (err) {
1749 		kvfree(encap);
1750 		return ERR_PTR(err);
1751 	}
1752 
1753 	return encap;
1754 }
1755 
mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap * encap)1756 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1757 {
1758 	if (!encap)
1759 		return;
1760 
1761 	unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
1762 	flush_workqueue(encap->priv->wq); /* flush fib event works */
1763 	kvfree(encap);
1764 }
1765