1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
3
4 #include <net/fib_notifier.h>
5 #include <net/nexthop.h>
6 #include "tc_tun_encap.h"
7 #include "en_tc.h"
8 #include "tc_tun.h"
9 #include "rep/tc.h"
10 #include "diag/en_tc_tracepoint.h"
11
12 enum {
13 MLX5E_ROUTE_ENTRY_VALID = BIT(0),
14 };
15
mlx5e_set_int_port_tunnel(struct mlx5e_priv * priv,struct mlx5_flow_attr * attr,struct mlx5e_encap_entry * e,int out_index)16 static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv,
17 struct mlx5_flow_attr *attr,
18 struct mlx5e_encap_entry *e,
19 int out_index)
20 {
21 struct net_device *route_dev;
22 int err = 0;
23
24 route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex);
25
26 if (!route_dev || !netif_is_ovs_master(route_dev))
27 goto out;
28
29 err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex,
30 MLX5E_TC_INT_PORT_EGRESS,
31 &attr->action, out_index);
32
33 out:
34 if (route_dev)
35 dev_put(route_dev);
36
37 return err;
38 }
39
40 struct mlx5e_route_key {
41 int ip_version;
42 union {
43 __be32 v4;
44 struct in6_addr v6;
45 } endpoint_ip;
46 };
47
48 struct mlx5e_route_entry {
49 struct mlx5e_route_key key;
50 struct list_head encap_entries;
51 struct list_head decap_flows;
52 u32 flags;
53 struct hlist_node hlist;
54 refcount_t refcnt;
55 int tunnel_dev_index;
56 struct rcu_head rcu;
57 };
58
59 struct mlx5e_tc_tun_encap {
60 struct mlx5e_priv *priv;
61 struct notifier_block fib_nb;
62 spinlock_t route_lock; /* protects route_tbl */
63 unsigned long route_tbl_last_update;
64 DECLARE_HASHTABLE(route_tbl, 8);
65 };
66
mlx5e_route_entry_valid(struct mlx5e_route_entry * r)67 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
68 {
69 return r->flags & MLX5E_ROUTE_ENTRY_VALID;
70 }
71
mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec)72 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
73 struct mlx5_flow_spec *spec)
74 {
75 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
76 struct mlx5_rx_tun_attr *tun_attr;
77 void *daddr, *saddr;
78 u8 ip_version;
79
80 tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
81 if (!tun_attr)
82 return -ENOMEM;
83
84 esw_attr->rx_tun_attr = tun_attr;
85 ip_version = mlx5e_tc_get_ip_version(spec, true);
86
87 if (ip_version == 4) {
88 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
89 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
90 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
91 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
92 tun_attr->dst_ip.v4 = *(__be32 *)daddr;
93 tun_attr->src_ip.v4 = *(__be32 *)saddr;
94 if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
95 return 0;
96 }
97 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
98 else if (ip_version == 6) {
99 int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
100 struct in6_addr zerov6 = {};
101
102 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
103 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
104 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
105 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
106 memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
107 memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
108 if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) ||
109 !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6)))
110 return 0;
111 }
112 #endif
113 /* Only set the flag if both src and dst ip addresses exist. They are
114 * required to establish routing.
115 */
116 flow_flag_set(flow, TUN_RX);
117 flow->attr->tun_ip_version = ip_version;
118 return 0;
119 }
120
mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr * esw_attr)121 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
122 {
123 bool all_flow_encaps_valid = true;
124 int i;
125
126 /* Flow can be associated with multiple encap entries.
127 * Before offloading the flow verify that all of them have
128 * a valid neighbour.
129 */
130 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
131 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
132 continue;
133 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
134 all_flow_encaps_valid = false;
135 break;
136 }
137 }
138
139 return all_flow_encaps_valid;
140 }
141
mlx5e_tc_encap_flows_add(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * flow_list)142 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
143 struct mlx5e_encap_entry *e,
144 struct list_head *flow_list)
145 {
146 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
147 struct mlx5_pkt_reformat_params reformat_params;
148 struct mlx5_esw_flow_attr *esw_attr;
149 struct mlx5_flow_handle *rule;
150 struct mlx5_flow_attr *attr;
151 struct mlx5_flow_spec *spec;
152 struct mlx5e_tc_flow *flow;
153 int err;
154
155 if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
156 return;
157
158 memset(&reformat_params, 0, sizeof(reformat_params));
159 reformat_params.type = e->reformat_type;
160 reformat_params.size = e->encap_size;
161 reformat_params.data = e->encap_header;
162 e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
163 &reformat_params,
164 MLX5_FLOW_NAMESPACE_FDB);
165 if (IS_ERR(e->pkt_reformat)) {
166 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
167 PTR_ERR(e->pkt_reformat));
168 return;
169 }
170 e->flags |= MLX5_ENCAP_ENTRY_VALID;
171 mlx5e_rep_queue_neigh_stats_work(priv);
172
173 list_for_each_entry(flow, flow_list, tmp_list) {
174 if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
175 continue;
176
177 spec = &flow->attr->parse_attr->spec;
178
179 attr = mlx5e_tc_get_encap_attr(flow);
180 esw_attr = attr->esw_attr;
181 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
182 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
183
184 /* Do not offload flows with unresolved neighbors */
185 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
186 continue;
187
188 err = mlx5e_tc_offload_flow_post_acts(flow);
189 if (err) {
190 mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
191 err);
192 continue;
193 }
194
195 /* update from slow path rule to encap rule */
196 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
197 if (IS_ERR(rule)) {
198 mlx5e_tc_unoffload_flow_post_acts(flow);
199 err = PTR_ERR(rule);
200 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
201 err);
202 continue;
203 }
204
205 mlx5e_tc_unoffload_from_slow_path(esw, flow);
206 flow->rule[0] = rule;
207 /* was unset when slow path rule removed */
208 flow_flag_set(flow, OFFLOADED);
209 }
210 }
211
mlx5e_tc_encap_flows_del(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * flow_list)212 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
213 struct mlx5e_encap_entry *e,
214 struct list_head *flow_list)
215 {
216 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
217 struct mlx5_esw_flow_attr *esw_attr;
218 struct mlx5_flow_handle *rule;
219 struct mlx5_flow_attr *attr;
220 struct mlx5_flow_spec *spec;
221 struct mlx5e_tc_flow *flow;
222 int err;
223
224 list_for_each_entry(flow, flow_list, tmp_list) {
225 if (!mlx5e_is_offloaded_flow(flow))
226 continue;
227
228 attr = mlx5e_tc_get_encap_attr(flow);
229 esw_attr = attr->esw_attr;
230 /* mark the flow's encap dest as non-valid */
231 esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
232 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
233
234 /* Clear pkt_reformat before checking slow path flag. Because
235 * in next iteration, the same flow is already set slow path
236 * flag, but still need to clear the pkt_reformat.
237 */
238 if (flow_flag_test(flow, SLOW))
239 continue;
240
241 /* update from encap rule to slow path rule */
242 spec = &flow->attr->parse_attr->spec;
243 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
244
245 if (IS_ERR(rule)) {
246 err = PTR_ERR(rule);
247 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
248 err);
249 continue;
250 }
251
252 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
253 mlx5e_tc_unoffload_flow_post_acts(flow);
254 flow->rule[0] = rule;
255 /* was unset when fast path rule removed */
256 flow_flag_set(flow, OFFLOADED);
257 }
258
259 /* we know that the encap is valid */
260 e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
261 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
262 e->pkt_reformat = NULL;
263 }
264
mlx5e_take_tmp_flow(struct mlx5e_tc_flow * flow,struct list_head * flow_list,int index)265 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
266 struct list_head *flow_list,
267 int index)
268 {
269 if (IS_ERR(mlx5e_flow_get(flow))) {
270 /* Flow is being deleted concurrently. Wait for it to be
271 * unoffloaded from hardware, otherwise deleting encap will
272 * fail.
273 */
274 wait_for_completion(&flow->del_hw_done);
275 return;
276 }
277 wait_for_completion(&flow->init_done);
278
279 flow->tmp_entry_index = index;
280 list_add(&flow->tmp_list, flow_list);
281 }
282
283 /* Takes reference to all flows attached to encap and adds the flows to
284 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
285 */
mlx5e_take_all_encap_flows(struct mlx5e_encap_entry * e,struct list_head * flow_list)286 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
287 {
288 struct encap_flow_item *efi;
289 struct mlx5e_tc_flow *flow;
290
291 list_for_each_entry(efi, &e->flows, list) {
292 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
293 mlx5e_take_tmp_flow(flow, flow_list, efi->index);
294 }
295 }
296
297 /* Takes reference to all flows attached to route and adds the flows to
298 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
299 */
mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry * r,struct list_head * flow_list)300 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
301 struct list_head *flow_list)
302 {
303 struct mlx5e_tc_flow *flow;
304
305 list_for_each_entry(flow, &r->decap_flows, decap_routes)
306 mlx5e_take_tmp_flow(flow, flow_list, 0);
307 }
308
309 typedef bool (match_cb)(struct mlx5e_encap_entry *);
310
311 static struct mlx5e_encap_entry *
mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e,match_cb match)312 mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
313 struct mlx5e_encap_entry *e,
314 match_cb match)
315 {
316 struct mlx5e_encap_entry *next = NULL;
317
318 retry:
319 rcu_read_lock();
320
321 /* find encap with non-zero reference counter value */
322 for (next = e ?
323 list_next_or_null_rcu(&nhe->encap_list,
324 &e->encap_list,
325 struct mlx5e_encap_entry,
326 encap_list) :
327 list_first_or_null_rcu(&nhe->encap_list,
328 struct mlx5e_encap_entry,
329 encap_list);
330 next;
331 next = list_next_or_null_rcu(&nhe->encap_list,
332 &next->encap_list,
333 struct mlx5e_encap_entry,
334 encap_list))
335 if (mlx5e_encap_take(next))
336 break;
337
338 rcu_read_unlock();
339
340 /* release starting encap */
341 if (e)
342 mlx5e_encap_put(netdev_priv(e->out_dev), e);
343 if (!next)
344 return next;
345
346 /* wait for encap to be fully initialized */
347 wait_for_completion(&next->res_ready);
348 /* continue searching if encap entry is not in valid state after completion */
349 if (!match(next)) {
350 e = next;
351 goto retry;
352 }
353
354 return next;
355 }
356
mlx5e_encap_valid(struct mlx5e_encap_entry * e)357 static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
358 {
359 return e->flags & MLX5_ENCAP_ENTRY_VALID;
360 }
361
362 static struct mlx5e_encap_entry *
mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e)363 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
364 struct mlx5e_encap_entry *e)
365 {
366 return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
367 }
368
mlx5e_encap_initialized(struct mlx5e_encap_entry * e)369 static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
370 {
371 return e->compl_result >= 0;
372 }
373
374 struct mlx5e_encap_entry *
mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e)375 mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
376 struct mlx5e_encap_entry *e)
377 {
378 return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
379 }
380
mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry * nhe)381 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
382 {
383 struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
384 struct mlx5e_encap_entry *e = NULL;
385 struct mlx5e_tc_flow *flow;
386 struct mlx5_fc *counter;
387 struct neigh_table *tbl;
388 bool neigh_used = false;
389 struct neighbour *n;
390 u64 lastuse;
391
392 if (m_neigh->family == AF_INET)
393 tbl = &arp_tbl;
394 #if IS_ENABLED(CONFIG_IPV6)
395 else if (m_neigh->family == AF_INET6)
396 tbl = ipv6_stub->nd_tbl;
397 #endif
398 else
399 return;
400
401 /* mlx5e_get_next_valid_encap() releases previous encap before returning
402 * next one.
403 */
404 while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
405 struct mlx5e_priv *priv = netdev_priv(e->out_dev);
406 struct encap_flow_item *efi, *tmp;
407 struct mlx5_eswitch *esw;
408 LIST_HEAD(flow_list);
409
410 esw = priv->mdev->priv.eswitch;
411 mutex_lock(&esw->offloads.encap_tbl_lock);
412 list_for_each_entry_safe(efi, tmp, &e->flows, list) {
413 flow = container_of(efi, struct mlx5e_tc_flow,
414 encaps[efi->index]);
415 if (IS_ERR(mlx5e_flow_get(flow)))
416 continue;
417 list_add(&flow->tmp_list, &flow_list);
418
419 if (mlx5e_is_offloaded_flow(flow)) {
420 counter = mlx5e_tc_get_counter(flow);
421 lastuse = mlx5_fc_query_lastuse(counter);
422 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
423 neigh_used = true;
424 break;
425 }
426 }
427 }
428 mutex_unlock(&esw->offloads.encap_tbl_lock);
429
430 mlx5e_put_flow_list(priv, &flow_list);
431 if (neigh_used) {
432 /* release current encap before breaking the loop */
433 mlx5e_encap_put(priv, e);
434 break;
435 }
436 }
437
438 trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
439
440 if (neigh_used) {
441 nhe->reported_lastuse = jiffies;
442
443 /* find the relevant neigh according to the cached device and
444 * dst ip pair
445 */
446 n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
447 if (!n)
448 return;
449
450 neigh_event_send(n, NULL);
451 neigh_release(n);
452 }
453 }
454
mlx5e_encap_dealloc(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)455 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
456 {
457 WARN_ON(!list_empty(&e->flows));
458
459 if (e->compl_result > 0) {
460 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
461
462 if (e->flags & MLX5_ENCAP_ENTRY_VALID)
463 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
464 }
465
466 kfree(e->tun_info);
467 kfree(e->encap_header);
468 kfree_rcu(e, rcu);
469 }
470
mlx5e_decap_dealloc(struct mlx5e_priv * priv,struct mlx5e_decap_entry * d)471 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
472 struct mlx5e_decap_entry *d)
473 {
474 WARN_ON(!list_empty(&d->flows));
475
476 if (!d->compl_result)
477 mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
478
479 kfree_rcu(d, rcu);
480 }
481
mlx5e_encap_put(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)482 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
483 {
484 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
485
486 if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
487 return;
488 list_del(&e->route_list);
489 hash_del_rcu(&e->encap_hlist);
490 mutex_unlock(&esw->offloads.encap_tbl_lock);
491
492 mlx5e_encap_dealloc(priv, e);
493 }
494
mlx5e_decap_put(struct mlx5e_priv * priv,struct mlx5e_decap_entry * d)495 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
496 {
497 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
498
499 if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
500 return;
501 hash_del_rcu(&d->hlist);
502 mutex_unlock(&esw->offloads.decap_tbl_lock);
503
504 mlx5e_decap_dealloc(priv, d);
505 }
506
507 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
508 struct mlx5e_tc_flow *flow,
509 int out_index);
510
mlx5e_detach_encap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,int out_index)511 void mlx5e_detach_encap(struct mlx5e_priv *priv,
512 struct mlx5e_tc_flow *flow,
513 struct mlx5_flow_attr *attr,
514 int out_index)
515 {
516 struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
517 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
518
519 if (!mlx5e_is_eswitch_flow(flow))
520 return;
521
522 if (attr->esw_attr->dests[out_index].flags &
523 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
524 mlx5e_detach_encap_route(priv, flow, out_index);
525
526 /* flow wasn't fully initialized */
527 if (!e)
528 return;
529
530 mutex_lock(&esw->offloads.encap_tbl_lock);
531 list_del(&flow->encaps[out_index].list);
532 flow->encaps[out_index].e = NULL;
533 if (!refcount_dec_and_test(&e->refcnt)) {
534 mutex_unlock(&esw->offloads.encap_tbl_lock);
535 return;
536 }
537 list_del(&e->route_list);
538 hash_del_rcu(&e->encap_hlist);
539 mutex_unlock(&esw->offloads.encap_tbl_lock);
540
541 mlx5e_encap_dealloc(priv, e);
542 }
543
mlx5e_detach_decap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)544 void mlx5e_detach_decap(struct mlx5e_priv *priv,
545 struct mlx5e_tc_flow *flow)
546 {
547 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
548 struct mlx5e_decap_entry *d = flow->decap_reformat;
549
550 if (!d)
551 return;
552
553 mutex_lock(&esw->offloads.decap_tbl_lock);
554 list_del(&flow->l3_to_l2_reformat);
555 flow->decap_reformat = NULL;
556
557 if (!refcount_dec_and_test(&d->refcnt)) {
558 mutex_unlock(&esw->offloads.decap_tbl_lock);
559 return;
560 }
561 hash_del_rcu(&d->hlist);
562 mutex_unlock(&esw->offloads.decap_tbl_lock);
563
564 mlx5e_decap_dealloc(priv, d);
565 }
566
mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key * a,struct mlx5e_encap_key * b)567 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
568 struct mlx5e_encap_key *b)
569 {
570 return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
571 a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
572 }
573
cmp_decap_info(struct mlx5e_decap_key * a,struct mlx5e_decap_key * b)574 static int cmp_decap_info(struct mlx5e_decap_key *a,
575 struct mlx5e_decap_key *b)
576 {
577 return memcmp(&a->key, &b->key, sizeof(b->key));
578 }
579
hash_encap_info(struct mlx5e_encap_key * key)580 static int hash_encap_info(struct mlx5e_encap_key *key)
581 {
582 return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
583 key->tc_tunnel->tunnel_type);
584 }
585
hash_decap_info(struct mlx5e_decap_key * key)586 static int hash_decap_info(struct mlx5e_decap_key *key)
587 {
588 return jhash(&key->key, sizeof(key->key), 0);
589 }
590
mlx5e_encap_take(struct mlx5e_encap_entry * e)591 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
592 {
593 return refcount_inc_not_zero(&e->refcnt);
594 }
595
mlx5e_decap_take(struct mlx5e_decap_entry * e)596 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
597 {
598 return refcount_inc_not_zero(&e->refcnt);
599 }
600
601 static struct mlx5e_encap_entry *
mlx5e_encap_get(struct mlx5e_priv * priv,struct mlx5e_encap_key * key,uintptr_t hash_key)602 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
603 uintptr_t hash_key)
604 {
605 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
606 struct mlx5e_encap_key e_key;
607 struct mlx5e_encap_entry *e;
608
609 hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
610 encap_hlist, hash_key) {
611 e_key.ip_tun_key = &e->tun_info->key;
612 e_key.tc_tunnel = e->tunnel;
613 if (e->tunnel->encap_info_equal(&e_key, key) &&
614 mlx5e_encap_take(e))
615 return e;
616 }
617
618 return NULL;
619 }
620
621 static struct mlx5e_decap_entry *
mlx5e_decap_get(struct mlx5e_priv * priv,struct mlx5e_decap_key * key,uintptr_t hash_key)622 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
623 uintptr_t hash_key)
624 {
625 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
626 struct mlx5e_decap_key r_key;
627 struct mlx5e_decap_entry *e;
628
629 hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
630 hlist, hash_key) {
631 r_key = e->key;
632 if (!cmp_decap_info(&r_key, key) &&
633 mlx5e_decap_take(e))
634 return e;
635 }
636 return NULL;
637 }
638
mlx5e_dup_tun_info(const struct ip_tunnel_info * tun_info)639 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
640 {
641 size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
642
643 return kmemdup(tun_info, tun_size, GFP_KERNEL);
644 }
645
is_duplicated_encap_entry(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,int out_index,struct mlx5e_encap_entry * e,struct netlink_ext_ack * extack)646 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
647 struct mlx5e_tc_flow *flow,
648 int out_index,
649 struct mlx5e_encap_entry *e,
650 struct netlink_ext_ack *extack)
651 {
652 int i;
653
654 for (i = 0; i < out_index; i++) {
655 if (flow->encaps[i].e != e)
656 continue;
657 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
658 netdev_err(priv->netdev, "can't duplicate encap action\n");
659 return true;
660 }
661
662 return false;
663 }
664
mlx5e_set_vf_tunnel(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,struct net_device * out_dev,int route_dev_ifindex,int out_index)665 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
666 struct mlx5_flow_attr *attr,
667 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
668 struct net_device *out_dev,
669 int route_dev_ifindex,
670 int out_index)
671 {
672 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
673 struct net_device *route_dev;
674 u16 vport_num;
675 int err = 0;
676 u32 data;
677
678 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
679
680 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
681 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
682 goto out;
683
684 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
685 if (err)
686 goto out;
687
688 attr->dest_chain = 0;
689 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
690 esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
691 data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
692 vport_num);
693 err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
694 MLX5_FLOW_NAMESPACE_FDB,
695 VPORT_TO_REG, data);
696 if (err >= 0) {
697 esw_attr->dests[out_index].src_port_rewrite_act_id = err;
698 err = 0;
699 }
700
701 out:
702 if (route_dev)
703 dev_put(route_dev);
704 return err;
705 }
706
mlx5e_update_vf_tunnel(struct mlx5_eswitch * esw,struct mlx5_esw_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,struct net_device * out_dev,int route_dev_ifindex,int out_index)707 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
708 struct mlx5_esw_flow_attr *attr,
709 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
710 struct net_device *out_dev,
711 int route_dev_ifindex,
712 int out_index)
713 {
714 int act_id = attr->dests[out_index].src_port_rewrite_act_id;
715 struct net_device *route_dev;
716 u16 vport_num;
717 int err = 0;
718 u32 data;
719
720 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
721
722 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
723 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
724 err = -ENODEV;
725 goto out;
726 }
727
728 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
729 if (err)
730 goto out;
731
732 data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
733 vport_num);
734 mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
735
736 out:
737 if (route_dev)
738 dev_put(route_dev);
739 return err;
740 }
741
mlx5e_route_tbl_get_last_update(struct mlx5e_priv * priv)742 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
743 {
744 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
745 struct mlx5_rep_uplink_priv *uplink_priv;
746 struct mlx5e_rep_priv *uplink_rpriv;
747 struct mlx5e_tc_tun_encap *encap;
748 unsigned int ret;
749
750 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
751 uplink_priv = &uplink_rpriv->uplink_priv;
752 encap = uplink_priv->encap;
753
754 spin_lock_bh(&encap->route_lock);
755 ret = encap->route_tbl_last_update;
756 spin_unlock_bh(&encap->route_lock);
757 return ret;
758 }
759
760 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
761 struct mlx5e_tc_flow *flow,
762 struct mlx5_flow_attr *attr,
763 struct mlx5e_encap_entry *e,
764 bool new_encap_entry,
765 unsigned long tbl_time_before,
766 int out_index);
767
mlx5e_attach_encap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct net_device * mirred_dev,int out_index,struct netlink_ext_ack * extack,struct net_device ** encap_dev)768 int mlx5e_attach_encap(struct mlx5e_priv *priv,
769 struct mlx5e_tc_flow *flow,
770 struct mlx5_flow_attr *attr,
771 struct net_device *mirred_dev,
772 int out_index,
773 struct netlink_ext_ack *extack,
774 struct net_device **encap_dev)
775 {
776 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
777 struct mlx5e_tc_flow_parse_attr *parse_attr;
778 const struct ip_tunnel_info *tun_info;
779 const struct mlx5e_mpls_info *mpls_info;
780 unsigned long tbl_time_before = 0;
781 struct mlx5e_encap_entry *e;
782 struct mlx5e_encap_key key;
783 bool entry_created = false;
784 unsigned short family;
785 uintptr_t hash_key;
786 int err = 0;
787
788 parse_attr = attr->parse_attr;
789 tun_info = parse_attr->tun_info[out_index];
790 mpls_info = &parse_attr->mpls_info[out_index];
791 family = ip_tunnel_info_af(tun_info);
792 key.ip_tun_key = &tun_info->key;
793 key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
794 if (!key.tc_tunnel) {
795 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
796 return -EOPNOTSUPP;
797 }
798
799 hash_key = hash_encap_info(&key);
800
801 mutex_lock(&esw->offloads.encap_tbl_lock);
802 e = mlx5e_encap_get(priv, &key, hash_key);
803
804 /* must verify if encap is valid or not */
805 if (e) {
806 /* Check that entry was not already attached to this flow */
807 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
808 err = -EOPNOTSUPP;
809 goto out_err;
810 }
811
812 mutex_unlock(&esw->offloads.encap_tbl_lock);
813 wait_for_completion(&e->res_ready);
814
815 /* Protect against concurrent neigh update. */
816 mutex_lock(&esw->offloads.encap_tbl_lock);
817 if (e->compl_result < 0) {
818 err = -EREMOTEIO;
819 goto out_err;
820 }
821 goto attach_flow;
822 }
823
824 e = kzalloc(sizeof(*e), GFP_KERNEL);
825 if (!e) {
826 err = -ENOMEM;
827 goto out_err;
828 }
829
830 refcount_set(&e->refcnt, 1);
831 init_completion(&e->res_ready);
832 entry_created = true;
833 INIT_LIST_HEAD(&e->route_list);
834
835 tun_info = mlx5e_dup_tun_info(tun_info);
836 if (!tun_info) {
837 err = -ENOMEM;
838 goto out_err_init;
839 }
840 e->tun_info = tun_info;
841 memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info));
842 err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
843 if (err)
844 goto out_err_init;
845
846 INIT_LIST_HEAD(&e->flows);
847 hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
848 tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
849 mutex_unlock(&esw->offloads.encap_tbl_lock);
850
851 if (family == AF_INET)
852 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
853 else if (family == AF_INET6)
854 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
855
856 /* Protect against concurrent neigh update. */
857 mutex_lock(&esw->offloads.encap_tbl_lock);
858 complete_all(&e->res_ready);
859 if (err) {
860 e->compl_result = err;
861 goto out_err;
862 }
863 e->compl_result = 1;
864
865 attach_flow:
866 err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created,
867 tbl_time_before, out_index);
868 if (err)
869 goto out_err;
870
871 err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index);
872 if (err == -EOPNOTSUPP) {
873 /* If device doesn't support int port offload,
874 * redirect to uplink vport.
875 */
876 mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n");
877 err = 0;
878 } else if (err) {
879 goto out_err;
880 }
881
882 flow->encaps[out_index].e = e;
883 list_add(&flow->encaps[out_index].list, &e->flows);
884 flow->encaps[out_index].index = out_index;
885 *encap_dev = e->out_dev;
886 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
887 attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
888 attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
889 } else {
890 flow_flag_set(flow, SLOW);
891 }
892 mutex_unlock(&esw->offloads.encap_tbl_lock);
893
894 return err;
895
896 out_err:
897 mutex_unlock(&esw->offloads.encap_tbl_lock);
898 if (e)
899 mlx5e_encap_put(priv, e);
900 return err;
901
902 out_err_init:
903 mutex_unlock(&esw->offloads.encap_tbl_lock);
904 kfree(tun_info);
905 kfree(e);
906 return err;
907 }
908
mlx5e_attach_decap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)909 int mlx5e_attach_decap(struct mlx5e_priv *priv,
910 struct mlx5e_tc_flow *flow,
911 struct netlink_ext_ack *extack)
912 {
913 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
914 struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
915 struct mlx5_pkt_reformat_params reformat_params;
916 struct mlx5e_decap_entry *d;
917 struct mlx5e_decap_key key;
918 uintptr_t hash_key;
919 int err = 0;
920
921 if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
922 NL_SET_ERR_MSG_MOD(extack,
923 "encap header larger than max supported");
924 return -EOPNOTSUPP;
925 }
926
927 key.key = attr->eth;
928 hash_key = hash_decap_info(&key);
929 mutex_lock(&esw->offloads.decap_tbl_lock);
930 d = mlx5e_decap_get(priv, &key, hash_key);
931 if (d) {
932 mutex_unlock(&esw->offloads.decap_tbl_lock);
933 wait_for_completion(&d->res_ready);
934 mutex_lock(&esw->offloads.decap_tbl_lock);
935 if (d->compl_result) {
936 err = -EREMOTEIO;
937 goto out_free;
938 }
939 goto found;
940 }
941
942 d = kzalloc(sizeof(*d), GFP_KERNEL);
943 if (!d) {
944 err = -ENOMEM;
945 goto out_err;
946 }
947
948 d->key = key;
949 refcount_set(&d->refcnt, 1);
950 init_completion(&d->res_ready);
951 INIT_LIST_HEAD(&d->flows);
952 hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
953 mutex_unlock(&esw->offloads.decap_tbl_lock);
954
955 memset(&reformat_params, 0, sizeof(reformat_params));
956 reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
957 reformat_params.size = sizeof(attr->eth);
958 reformat_params.data = &attr->eth;
959 d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
960 &reformat_params,
961 MLX5_FLOW_NAMESPACE_FDB);
962 if (IS_ERR(d->pkt_reformat)) {
963 err = PTR_ERR(d->pkt_reformat);
964 d->compl_result = err;
965 }
966 mutex_lock(&esw->offloads.decap_tbl_lock);
967 complete_all(&d->res_ready);
968 if (err)
969 goto out_free;
970
971 found:
972 flow->decap_reformat = d;
973 attr->decap_pkt_reformat = d->pkt_reformat;
974 list_add(&flow->l3_to_l2_reformat, &d->flows);
975 mutex_unlock(&esw->offloads.decap_tbl_lock);
976 return 0;
977
978 out_free:
979 mutex_unlock(&esw->offloads.decap_tbl_lock);
980 mlx5e_decap_put(priv, d);
981 return err;
982
983 out_err:
984 mutex_unlock(&esw->offloads.decap_tbl_lock);
985 return err;
986 }
987
cmp_route_info(struct mlx5e_route_key * a,struct mlx5e_route_key * b)988 static int cmp_route_info(struct mlx5e_route_key *a,
989 struct mlx5e_route_key *b)
990 {
991 if (a->ip_version == 4 && b->ip_version == 4)
992 return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
993 sizeof(a->endpoint_ip.v4));
994 else if (a->ip_version == 6 && b->ip_version == 6)
995 return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
996 sizeof(a->endpoint_ip.v6));
997 return 1;
998 }
999
hash_route_info(struct mlx5e_route_key * key)1000 static u32 hash_route_info(struct mlx5e_route_key *key)
1001 {
1002 if (key->ip_version == 4)
1003 return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
1004 return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
1005 }
1006
mlx5e_route_dealloc(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1007 static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
1008 struct mlx5e_route_entry *r)
1009 {
1010 WARN_ON(!list_empty(&r->decap_flows));
1011 WARN_ON(!list_empty(&r->encap_entries));
1012
1013 kfree_rcu(r, rcu);
1014 }
1015
mlx5e_route_put(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1016 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1017 {
1018 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1019
1020 if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
1021 return;
1022
1023 hash_del_rcu(&r->hlist);
1024 mutex_unlock(&esw->offloads.encap_tbl_lock);
1025
1026 mlx5e_route_dealloc(priv, r);
1027 }
1028
mlx5e_route_put_locked(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1029 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1030 {
1031 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1032
1033 lockdep_assert_held(&esw->offloads.encap_tbl_lock);
1034
1035 if (!refcount_dec_and_test(&r->refcnt))
1036 return;
1037 hash_del_rcu(&r->hlist);
1038 mlx5e_route_dealloc(priv, r);
1039 }
1040
1041 static struct mlx5e_route_entry *
mlx5e_route_get(struct mlx5e_tc_tun_encap * encap,struct mlx5e_route_key * key,u32 hash_key)1042 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
1043 u32 hash_key)
1044 {
1045 struct mlx5e_route_key r_key;
1046 struct mlx5e_route_entry *r;
1047
1048 hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
1049 r_key = r->key;
1050 if (!cmp_route_info(&r_key, key) &&
1051 refcount_inc_not_zero(&r->refcnt))
1052 return r;
1053 }
1054 return NULL;
1055 }
1056
1057 static struct mlx5e_route_entry *
mlx5e_route_get_create(struct mlx5e_priv * priv,struct mlx5e_route_key * key,int tunnel_dev_index,unsigned long * route_tbl_change_time)1058 mlx5e_route_get_create(struct mlx5e_priv *priv,
1059 struct mlx5e_route_key *key,
1060 int tunnel_dev_index,
1061 unsigned long *route_tbl_change_time)
1062 {
1063 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1064 struct mlx5_rep_uplink_priv *uplink_priv;
1065 struct mlx5e_rep_priv *uplink_rpriv;
1066 struct mlx5e_tc_tun_encap *encap;
1067 struct mlx5e_route_entry *r;
1068 u32 hash_key;
1069
1070 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1071 uplink_priv = &uplink_rpriv->uplink_priv;
1072 encap = uplink_priv->encap;
1073
1074 hash_key = hash_route_info(key);
1075 spin_lock_bh(&encap->route_lock);
1076 r = mlx5e_route_get(encap, key, hash_key);
1077 spin_unlock_bh(&encap->route_lock);
1078 if (r) {
1079 if (!mlx5e_route_entry_valid(r)) {
1080 mlx5e_route_put_locked(priv, r);
1081 return ERR_PTR(-EINVAL);
1082 }
1083 return r;
1084 }
1085
1086 r = kzalloc(sizeof(*r), GFP_KERNEL);
1087 if (!r)
1088 return ERR_PTR(-ENOMEM);
1089
1090 r->key = *key;
1091 r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1092 r->tunnel_dev_index = tunnel_dev_index;
1093 refcount_set(&r->refcnt, 1);
1094 INIT_LIST_HEAD(&r->decap_flows);
1095 INIT_LIST_HEAD(&r->encap_entries);
1096
1097 spin_lock_bh(&encap->route_lock);
1098 *route_tbl_change_time = encap->route_tbl_last_update;
1099 hash_add(encap->route_tbl, &r->hlist, hash_key);
1100 spin_unlock_bh(&encap->route_lock);
1101
1102 return r;
1103 }
1104
1105 static struct mlx5e_route_entry *
mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap * encap,struct mlx5e_route_key * key)1106 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1107 {
1108 u32 hash_key = hash_route_info(key);
1109 struct mlx5e_route_entry *r;
1110
1111 spin_lock_bh(&encap->route_lock);
1112 encap->route_tbl_last_update = jiffies;
1113 r = mlx5e_route_get(encap, key, hash_key);
1114 spin_unlock_bh(&encap->route_lock);
1115
1116 return r;
1117 }
1118
1119 struct mlx5e_tc_fib_event_data {
1120 struct work_struct work;
1121 unsigned long event;
1122 struct mlx5e_route_entry *r;
1123 struct net_device *ul_dev;
1124 };
1125
1126 static void mlx5e_tc_fib_event_work(struct work_struct *work);
1127 static struct mlx5e_tc_fib_event_data *
mlx5e_tc_init_fib_work(unsigned long event,struct net_device * ul_dev,gfp_t flags)1128 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1129 {
1130 struct mlx5e_tc_fib_event_data *fib_work;
1131
1132 fib_work = kzalloc(sizeof(*fib_work), flags);
1133 if (WARN_ON(!fib_work))
1134 return NULL;
1135
1136 INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1137 fib_work->event = event;
1138 fib_work->ul_dev = ul_dev;
1139
1140 return fib_work;
1141 }
1142
1143 static int
mlx5e_route_enqueue_update(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,unsigned long event)1144 mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1145 struct mlx5e_route_entry *r,
1146 unsigned long event)
1147 {
1148 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1149 struct mlx5e_tc_fib_event_data *fib_work;
1150 struct mlx5e_rep_priv *uplink_rpriv;
1151 struct net_device *ul_dev;
1152
1153 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1154 ul_dev = uplink_rpriv->netdev;
1155
1156 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1157 if (!fib_work)
1158 return -ENOMEM;
1159
1160 dev_hold(ul_dev);
1161 refcount_inc(&r->refcnt);
1162 fib_work->r = r;
1163 queue_work(priv->wq, &fib_work->work);
1164
1165 return 0;
1166 }
1167
mlx5e_attach_decap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1168 int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1169 struct mlx5e_tc_flow *flow)
1170 {
1171 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1172 unsigned long tbl_time_before, tbl_time_after;
1173 struct mlx5e_tc_flow_parse_attr *parse_attr;
1174 struct mlx5_flow_attr *attr = flow->attr;
1175 struct mlx5_esw_flow_attr *esw_attr;
1176 struct mlx5e_route_entry *r;
1177 struct mlx5e_route_key key;
1178 int err = 0;
1179
1180 esw_attr = attr->esw_attr;
1181 parse_attr = attr->parse_attr;
1182 mutex_lock(&esw->offloads.encap_tbl_lock);
1183 if (!esw_attr->rx_tun_attr)
1184 goto out;
1185
1186 tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1187 tbl_time_after = tbl_time_before;
1188 err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev);
1189 if (err || !esw_attr->rx_tun_attr->decap_vport)
1190 goto out;
1191
1192 key.ip_version = attr->tun_ip_version;
1193 if (key.ip_version == 4)
1194 key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1195 else
1196 key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1197
1198 r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
1199 &tbl_time_after);
1200 if (IS_ERR(r)) {
1201 err = PTR_ERR(r);
1202 goto out;
1203 }
1204 /* Routing changed concurrently. FIB event handler might have missed new
1205 * entry, schedule update.
1206 */
1207 if (tbl_time_before != tbl_time_after) {
1208 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1209 if (err) {
1210 mlx5e_route_put_locked(priv, r);
1211 goto out;
1212 }
1213 }
1214
1215 flow->decap_route = r;
1216 list_add(&flow->decap_routes, &r->decap_flows);
1217 mutex_unlock(&esw->offloads.encap_tbl_lock);
1218 return 0;
1219
1220 out:
1221 mutex_unlock(&esw->offloads.encap_tbl_lock);
1222 return err;
1223 }
1224
mlx5e_attach_encap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct mlx5e_encap_entry * e,bool new_encap_entry,unsigned long tbl_time_before,int out_index)1225 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1226 struct mlx5e_tc_flow *flow,
1227 struct mlx5_flow_attr *attr,
1228 struct mlx5e_encap_entry *e,
1229 bool new_encap_entry,
1230 unsigned long tbl_time_before,
1231 int out_index)
1232 {
1233 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1234 unsigned long tbl_time_after = tbl_time_before;
1235 struct mlx5e_tc_flow_parse_attr *parse_attr;
1236 const struct ip_tunnel_info *tun_info;
1237 struct mlx5_esw_flow_attr *esw_attr;
1238 struct mlx5e_route_entry *r;
1239 struct mlx5e_route_key key;
1240 unsigned short family;
1241 int err = 0;
1242
1243 esw_attr = attr->esw_attr;
1244 parse_attr = attr->parse_attr;
1245 tun_info = parse_attr->tun_info[out_index];
1246 family = ip_tunnel_info_af(tun_info);
1247
1248 if (family == AF_INET) {
1249 key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1250 key.ip_version = 4;
1251 } else if (family == AF_INET6) {
1252 key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1253 key.ip_version = 6;
1254 }
1255
1256 err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
1257 e->route_dev_ifindex, out_index);
1258 if (err || !(esw_attr->dests[out_index].flags &
1259 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1260 return err;
1261
1262 r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
1263 &tbl_time_after);
1264 if (IS_ERR(r))
1265 return PTR_ERR(r);
1266 /* Routing changed concurrently. FIB event handler might have missed new
1267 * entry, schedule update.
1268 */
1269 if (tbl_time_before != tbl_time_after) {
1270 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1271 if (err) {
1272 mlx5e_route_put_locked(priv, r);
1273 return err;
1274 }
1275 }
1276
1277 flow->encap_routes[out_index].r = r;
1278 if (new_encap_entry)
1279 list_add(&e->route_list, &r->encap_entries);
1280 flow->encap_routes[out_index].index = out_index;
1281 return 0;
1282 }
1283
mlx5e_detach_decap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1284 void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1285 struct mlx5e_tc_flow *flow)
1286 {
1287 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1288 struct mlx5e_route_entry *r = flow->decap_route;
1289
1290 if (!r)
1291 return;
1292
1293 mutex_lock(&esw->offloads.encap_tbl_lock);
1294 list_del(&flow->decap_routes);
1295 flow->decap_route = NULL;
1296
1297 if (!refcount_dec_and_test(&r->refcnt)) {
1298 mutex_unlock(&esw->offloads.encap_tbl_lock);
1299 return;
1300 }
1301 hash_del_rcu(&r->hlist);
1302 mutex_unlock(&esw->offloads.encap_tbl_lock);
1303
1304 mlx5e_route_dealloc(priv, r);
1305 }
1306
mlx5e_detach_encap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,int out_index)1307 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1308 struct mlx5e_tc_flow *flow,
1309 int out_index)
1310 {
1311 struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1312 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1313 struct mlx5e_encap_entry *e, *tmp;
1314
1315 if (!r)
1316 return;
1317
1318 mutex_lock(&esw->offloads.encap_tbl_lock);
1319 flow->encap_routes[out_index].r = NULL;
1320
1321 if (!refcount_dec_and_test(&r->refcnt)) {
1322 mutex_unlock(&esw->offloads.encap_tbl_lock);
1323 return;
1324 }
1325 list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1326 list_del_init(&e->route_list);
1327 hash_del_rcu(&r->hlist);
1328 mutex_unlock(&esw->offloads.encap_tbl_lock);
1329
1330 mlx5e_route_dealloc(priv, r);
1331 }
1332
mlx5e_invalidate_encap(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * encap_flows)1333 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1334 struct mlx5e_encap_entry *e,
1335 struct list_head *encap_flows)
1336 {
1337 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1338 struct mlx5e_tc_flow *flow;
1339
1340 list_for_each_entry(flow, encap_flows, tmp_list) {
1341 struct mlx5_flow_attr *attr = flow->attr;
1342 struct mlx5_esw_flow_attr *esw_attr;
1343
1344 if (!mlx5e_is_offloaded_flow(flow))
1345 continue;
1346 esw_attr = attr->esw_attr;
1347
1348 if (flow_flag_test(flow, SLOW))
1349 mlx5e_tc_unoffload_from_slow_path(esw, flow);
1350 else
1351 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1352 mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
1353 attr->modify_hdr = NULL;
1354
1355 esw_attr->dests[flow->tmp_entry_index].flags &=
1356 ~MLX5_ESW_DEST_ENCAP_VALID;
1357 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1358 }
1359
1360 e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1361 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1362 e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1363 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1364 e->pkt_reformat = NULL;
1365 }
1366 }
1367
mlx5e_reoffload_encap(struct mlx5e_priv * priv,struct net_device * tunnel_dev,struct mlx5e_encap_entry * e,struct list_head * encap_flows)1368 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1369 struct net_device *tunnel_dev,
1370 struct mlx5e_encap_entry *e,
1371 struct list_head *encap_flows)
1372 {
1373 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1374 struct mlx5e_tc_flow *flow;
1375 int err;
1376
1377 err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
1378 mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
1379 mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
1380 if (err)
1381 mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1382 e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1383
1384 list_for_each_entry(flow, encap_flows, tmp_list) {
1385 struct mlx5e_tc_flow_parse_attr *parse_attr;
1386 struct mlx5_esw_flow_attr *esw_attr;
1387 struct mlx5_flow_handle *rule;
1388 struct mlx5_flow_attr *attr;
1389 struct mlx5_flow_spec *spec;
1390
1391 if (flow_flag_test(flow, FAILED))
1392 continue;
1393
1394 spec = &flow->attr->parse_attr->spec;
1395
1396 attr = mlx5e_tc_get_encap_attr(flow);
1397 esw_attr = attr->esw_attr;
1398 parse_attr = attr->parse_attr;
1399
1400 err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
1401 e->out_dev, e->route_dev_ifindex,
1402 flow->tmp_entry_index);
1403 if (err) {
1404 mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1405 continue;
1406 }
1407
1408 err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
1409 if (err) {
1410 mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1411 err);
1412 continue;
1413 }
1414
1415 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1416 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1417 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1418 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1419 goto offload_to_slow_path;
1420
1421 err = mlx5e_tc_offload_flow_post_acts(flow);
1422 if (err) {
1423 mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
1424 err);
1425 goto offload_to_slow_path;
1426 }
1427
1428 /* update from slow path rule to encap rule */
1429 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
1430 if (IS_ERR(rule)) {
1431 mlx5e_tc_unoffload_flow_post_acts(flow);
1432 err = PTR_ERR(rule);
1433 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1434 err);
1435 } else {
1436 flow->rule[0] = rule;
1437 }
1438 } else {
1439 offload_to_slow_path:
1440 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1441 /* mark the flow's encap dest as non-valid */
1442 esw_attr->dests[flow->tmp_entry_index].flags &=
1443 ~MLX5_ESW_DEST_ENCAP_VALID;
1444
1445 if (IS_ERR(rule)) {
1446 err = PTR_ERR(rule);
1447 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1448 err);
1449 } else {
1450 flow->rule[0] = rule;
1451 }
1452 }
1453 flow_flag_set(flow, OFFLOADED);
1454 }
1455 }
1456
mlx5e_update_route_encaps(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,struct list_head * flow_list,bool replace)1457 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1458 struct mlx5e_route_entry *r,
1459 struct list_head *flow_list,
1460 bool replace)
1461 {
1462 struct net_device *tunnel_dev;
1463 struct mlx5e_encap_entry *e;
1464
1465 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1466 if (!tunnel_dev)
1467 return -ENODEV;
1468
1469 list_for_each_entry(e, &r->encap_entries, route_list) {
1470 LIST_HEAD(encap_flows);
1471
1472 mlx5e_take_all_encap_flows(e, &encap_flows);
1473 if (list_empty(&encap_flows))
1474 continue;
1475
1476 if (mlx5e_route_entry_valid(r))
1477 mlx5e_invalidate_encap(priv, e, &encap_flows);
1478
1479 if (!replace) {
1480 list_splice(&encap_flows, flow_list);
1481 continue;
1482 }
1483
1484 mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
1485 list_splice(&encap_flows, flow_list);
1486 }
1487
1488 return 0;
1489 }
1490
mlx5e_unoffload_flow_list(struct mlx5e_priv * priv,struct list_head * flow_list)1491 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1492 struct list_head *flow_list)
1493 {
1494 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1495 struct mlx5e_tc_flow *flow;
1496
1497 list_for_each_entry(flow, flow_list, tmp_list)
1498 if (mlx5e_is_offloaded_flow(flow))
1499 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1500 }
1501
mlx5e_reoffload_decap(struct mlx5e_priv * priv,struct list_head * decap_flows)1502 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1503 struct list_head *decap_flows)
1504 {
1505 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1506 struct mlx5e_tc_flow *flow;
1507
1508 list_for_each_entry(flow, decap_flows, tmp_list) {
1509 struct mlx5e_tc_flow_parse_attr *parse_attr;
1510 struct mlx5_flow_attr *attr = flow->attr;
1511 struct mlx5_flow_handle *rule;
1512 struct mlx5_flow_spec *spec;
1513 int err;
1514
1515 if (flow_flag_test(flow, FAILED))
1516 continue;
1517
1518 parse_attr = attr->parse_attr;
1519 spec = &parse_attr->spec;
1520 err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev);
1521 if (err) {
1522 mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1523 err);
1524 continue;
1525 }
1526
1527 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1528 if (IS_ERR(rule)) {
1529 err = PTR_ERR(rule);
1530 mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1531 err);
1532 } else {
1533 flow->rule[0] = rule;
1534 flow_flag_set(flow, OFFLOADED);
1535 }
1536 }
1537 }
1538
mlx5e_update_route_decap_flows(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,struct list_head * flow_list,bool replace)1539 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1540 struct mlx5e_route_entry *r,
1541 struct list_head *flow_list,
1542 bool replace)
1543 {
1544 struct net_device *tunnel_dev;
1545 LIST_HEAD(decap_flows);
1546
1547 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1548 if (!tunnel_dev)
1549 return -ENODEV;
1550
1551 mlx5e_take_all_route_decap_flows(r, &decap_flows);
1552 if (mlx5e_route_entry_valid(r))
1553 mlx5e_unoffload_flow_list(priv, &decap_flows);
1554 if (replace)
1555 mlx5e_reoffload_decap(priv, &decap_flows);
1556
1557 list_splice(&decap_flows, flow_list);
1558
1559 return 0;
1560 }
1561
mlx5e_tc_fib_event_work(struct work_struct * work)1562 static void mlx5e_tc_fib_event_work(struct work_struct *work)
1563 {
1564 struct mlx5e_tc_fib_event_data *event_data =
1565 container_of(work, struct mlx5e_tc_fib_event_data, work);
1566 struct net_device *ul_dev = event_data->ul_dev;
1567 struct mlx5e_priv *priv = netdev_priv(ul_dev);
1568 struct mlx5e_route_entry *r = event_data->r;
1569 struct mlx5_eswitch *esw;
1570 LIST_HEAD(flow_list);
1571 bool replace;
1572 int err;
1573
1574 /* sync with concurrent neigh updates */
1575 rtnl_lock();
1576 esw = priv->mdev->priv.eswitch;
1577 mutex_lock(&esw->offloads.encap_tbl_lock);
1578 replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1579
1580 if (!mlx5e_route_entry_valid(r) && !replace)
1581 goto out;
1582
1583 err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
1584 if (err)
1585 mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1586 err);
1587
1588 err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
1589 if (err)
1590 mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1591 err);
1592
1593 if (replace)
1594 r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1595 out:
1596 mutex_unlock(&esw->offloads.encap_tbl_lock);
1597 rtnl_unlock();
1598
1599 mlx5e_put_flow_list(priv, &flow_list);
1600 mlx5e_route_put(priv, event_data->r);
1601 dev_put(event_data->ul_dev);
1602 kfree(event_data);
1603 }
1604
1605 static struct mlx5e_tc_fib_event_data *
mlx5e_init_fib_work_ipv4(struct mlx5e_priv * priv,struct net_device * ul_dev,struct mlx5e_tc_tun_encap * encap,unsigned long event,struct fib_notifier_info * info)1606 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1607 struct net_device *ul_dev,
1608 struct mlx5e_tc_tun_encap *encap,
1609 unsigned long event,
1610 struct fib_notifier_info *info)
1611 {
1612 struct fib_entry_notifier_info *fen_info;
1613 struct mlx5e_tc_fib_event_data *fib_work;
1614 struct mlx5e_route_entry *r;
1615 struct mlx5e_route_key key;
1616 struct net_device *fib_dev;
1617
1618 fen_info = container_of(info, struct fib_entry_notifier_info, info);
1619 if (fen_info->fi->nh)
1620 return NULL;
1621 fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
1622 if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1623 fen_info->dst_len != 32)
1624 return NULL;
1625
1626 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1627 if (!fib_work)
1628 return ERR_PTR(-ENOMEM);
1629
1630 key.endpoint_ip.v4 = htonl(fen_info->dst);
1631 key.ip_version = 4;
1632
1633 /* Can't fail after this point because releasing reference to r
1634 * requires obtaining sleeping mutex which we can't do in atomic
1635 * context.
1636 */
1637 r = mlx5e_route_lookup_for_update(encap, &key);
1638 if (!r)
1639 goto out;
1640 fib_work->r = r;
1641 dev_hold(ul_dev);
1642
1643 return fib_work;
1644
1645 out:
1646 kfree(fib_work);
1647 return NULL;
1648 }
1649
1650 static struct mlx5e_tc_fib_event_data *
mlx5e_init_fib_work_ipv6(struct mlx5e_priv * priv,struct net_device * ul_dev,struct mlx5e_tc_tun_encap * encap,unsigned long event,struct fib_notifier_info * info)1651 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1652 struct net_device *ul_dev,
1653 struct mlx5e_tc_tun_encap *encap,
1654 unsigned long event,
1655 struct fib_notifier_info *info)
1656 {
1657 struct fib6_entry_notifier_info *fen_info;
1658 struct mlx5e_tc_fib_event_data *fib_work;
1659 struct mlx5e_route_entry *r;
1660 struct mlx5e_route_key key;
1661 struct net_device *fib_dev;
1662
1663 fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1664 fib_dev = fib6_info_nh_dev(fen_info->rt);
1665 if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1666 fen_info->rt->fib6_dst.plen != 128)
1667 return NULL;
1668
1669 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1670 if (!fib_work)
1671 return ERR_PTR(-ENOMEM);
1672
1673 memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1674 sizeof(fen_info->rt->fib6_dst.addr));
1675 key.ip_version = 6;
1676
1677 /* Can't fail after this point because releasing reference to r
1678 * requires obtaining sleeping mutex which we can't do in atomic
1679 * context.
1680 */
1681 r = mlx5e_route_lookup_for_update(encap, &key);
1682 if (!r)
1683 goto out;
1684 fib_work->r = r;
1685 dev_hold(ul_dev);
1686
1687 return fib_work;
1688
1689 out:
1690 kfree(fib_work);
1691 return NULL;
1692 }
1693
mlx5e_tc_tun_fib_event(struct notifier_block * nb,unsigned long event,void * ptr)1694 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1695 {
1696 struct mlx5e_tc_fib_event_data *fib_work;
1697 struct fib_notifier_info *info = ptr;
1698 struct mlx5e_tc_tun_encap *encap;
1699 struct net_device *ul_dev;
1700 struct mlx5e_priv *priv;
1701
1702 encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1703 priv = encap->priv;
1704 ul_dev = priv->netdev;
1705 priv = netdev_priv(ul_dev);
1706
1707 switch (event) {
1708 case FIB_EVENT_ENTRY_REPLACE:
1709 case FIB_EVENT_ENTRY_DEL:
1710 if (info->family == AF_INET)
1711 fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1712 else if (info->family == AF_INET6)
1713 fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1714 else
1715 return NOTIFY_DONE;
1716
1717 if (!IS_ERR_OR_NULL(fib_work)) {
1718 queue_work(priv->wq, &fib_work->work);
1719 } else if (IS_ERR(fib_work)) {
1720 NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1721 mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1722 PTR_ERR(fib_work));
1723 }
1724
1725 break;
1726 default:
1727 return NOTIFY_DONE;
1728 }
1729
1730 return NOTIFY_DONE;
1731 }
1732
mlx5e_tc_tun_init(struct mlx5e_priv * priv)1733 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1734 {
1735 struct mlx5e_tc_tun_encap *encap;
1736 int err;
1737
1738 encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
1739 if (!encap)
1740 return ERR_PTR(-ENOMEM);
1741
1742 encap->priv = priv;
1743 encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1744 spin_lock_init(&encap->route_lock);
1745 hash_init(encap->route_tbl);
1746 err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
1747 NULL, NULL);
1748 if (err) {
1749 kvfree(encap);
1750 return ERR_PTR(err);
1751 }
1752
1753 return encap;
1754 }
1755
mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap * encap)1756 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1757 {
1758 if (!encap)
1759 return;
1760
1761 unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
1762 flush_workqueue(encap->priv->wq); /* flush fib event works */
1763 kvfree(encap);
1764 }
1765