1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
3
4 #include <net/fib_notifier.h>
5 #include <net/nexthop.h>
6 #include <net/ip_tunnels.h>
7 #include "tc_tun_encap.h"
8 #include "en_tc.h"
9 #include "tc_tun.h"
10 #include "rep/tc.h"
11 #include "diag/en_tc_tracepoint.h"
12
13 enum {
14 MLX5E_ROUTE_ENTRY_VALID = BIT(0),
15 };
16
mlx5e_set_int_port_tunnel(struct mlx5e_priv * priv,struct mlx5_flow_attr * attr,struct mlx5e_encap_entry * e,int out_index)17 static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv,
18 struct mlx5_flow_attr *attr,
19 struct mlx5e_encap_entry *e,
20 int out_index)
21 {
22 struct net_device *route_dev;
23 int err = 0;
24
25 route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex);
26
27 if (!route_dev || !netif_is_ovs_master(route_dev) ||
28 attr->parse_attr->filter_dev == e->out_dev)
29 goto out;
30
31 err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex,
32 MLX5E_TC_INT_PORT_EGRESS,
33 &attr->action, out_index);
34
35 out:
36 if (route_dev)
37 dev_put(route_dev);
38
39 return err;
40 }
41
42 struct mlx5e_route_key {
43 int ip_version;
44 union {
45 __be32 v4;
46 struct in6_addr v6;
47 } endpoint_ip;
48 };
49
50 struct mlx5e_route_entry {
51 struct mlx5e_route_key key;
52 struct list_head encap_entries;
53 struct list_head decap_flows;
54 u32 flags;
55 struct hlist_node hlist;
56 refcount_t refcnt;
57 int tunnel_dev_index;
58 struct rcu_head rcu;
59 };
60
61 struct mlx5e_tc_tun_encap {
62 struct mlx5e_priv *priv;
63 struct notifier_block fib_nb;
64 spinlock_t route_lock; /* protects route_tbl */
65 unsigned long route_tbl_last_update;
66 DECLARE_HASHTABLE(route_tbl, 8);
67 };
68
mlx5e_route_entry_valid(struct mlx5e_route_entry * r)69 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
70 {
71 return r->flags & MLX5E_ROUTE_ENTRY_VALID;
72 }
73
mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec)74 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
75 struct mlx5_flow_spec *spec)
76 {
77 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
78 struct mlx5_rx_tun_attr *tun_attr;
79 void *daddr, *saddr;
80 u8 ip_version;
81
82 tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
83 if (!tun_attr)
84 return -ENOMEM;
85
86 esw_attr->rx_tun_attr = tun_attr;
87 ip_version = mlx5e_tc_get_ip_version(spec, true);
88
89 if (ip_version == 4) {
90 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
91 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
92 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
93 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
94 tun_attr->dst_ip.v4 = *(__be32 *)daddr;
95 tun_attr->src_ip.v4 = *(__be32 *)saddr;
96 if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
97 return 0;
98 }
99 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
100 else if (ip_version == 6) {
101 int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
102
103 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
104 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
105 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
106 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
107 memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
108 memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
109 if (ipv6_addr_any(&tun_attr->dst_ip.v6) ||
110 ipv6_addr_any(&tun_attr->src_ip.v6))
111 return 0;
112 }
113 #endif
114 /* Only set the flag if both src and dst ip addresses exist. They are
115 * required to establish routing.
116 */
117 flow_flag_set(flow, TUN_RX);
118 flow->attr->tun_ip_version = ip_version;
119 return 0;
120 }
121
mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr * esw_attr)122 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
123 {
124 bool all_flow_encaps_valid = true;
125 int i;
126
127 /* Flow can be associated with multiple encap entries.
128 * Before offloading the flow verify that all of them have
129 * a valid neighbour.
130 */
131 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
132 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
133 continue;
134 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
135 all_flow_encaps_valid = false;
136 break;
137 }
138 }
139
140 return all_flow_encaps_valid;
141 }
142
mlx5e_tc_encap_flows_add(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * flow_list)143 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
144 struct mlx5e_encap_entry *e,
145 struct list_head *flow_list)
146 {
147 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
148 struct mlx5_pkt_reformat_params reformat_params;
149 struct mlx5_esw_flow_attr *esw_attr;
150 struct mlx5_flow_handle *rule;
151 struct mlx5_flow_attr *attr;
152 struct mlx5_flow_spec *spec;
153 struct mlx5e_tc_flow *flow;
154 int err;
155
156 if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
157 return;
158
159 memset(&reformat_params, 0, sizeof(reformat_params));
160 reformat_params.type = e->reformat_type;
161 reformat_params.size = e->encap_size;
162 reformat_params.data = e->encap_header;
163 e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
164 &reformat_params,
165 MLX5_FLOW_NAMESPACE_FDB);
166 if (IS_ERR(e->pkt_reformat)) {
167 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
168 PTR_ERR(e->pkt_reformat));
169 return;
170 }
171 e->flags |= MLX5_ENCAP_ENTRY_VALID;
172 mlx5e_rep_queue_neigh_stats_work(priv);
173
174 list_for_each_entry(flow, flow_list, tmp_list) {
175 if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
176 continue;
177
178 spec = &flow->attr->parse_attr->spec;
179
180 attr = mlx5e_tc_get_encap_attr(flow);
181 esw_attr = attr->esw_attr;
182 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
183 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
184
185 /* Do not offload flows with unresolved neighbors */
186 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
187 continue;
188
189 err = mlx5e_tc_offload_flow_post_acts(flow);
190 if (err) {
191 mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
192 err);
193 continue;
194 }
195
196 /* update from slow path rule to encap rule */
197 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
198 if (IS_ERR(rule)) {
199 mlx5e_tc_unoffload_flow_post_acts(flow);
200 err = PTR_ERR(rule);
201 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
202 err);
203 continue;
204 }
205
206 mlx5e_tc_unoffload_from_slow_path(esw, flow);
207 flow->rule[0] = rule;
208 /* was unset when slow path rule removed */
209 flow_flag_set(flow, OFFLOADED);
210 }
211 }
212
mlx5e_tc_encap_flows_del(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * flow_list)213 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
214 struct mlx5e_encap_entry *e,
215 struct list_head *flow_list)
216 {
217 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
218 struct mlx5_esw_flow_attr *esw_attr;
219 struct mlx5_flow_handle *rule;
220 struct mlx5_flow_attr *attr;
221 struct mlx5_flow_spec *spec;
222 struct mlx5e_tc_flow *flow;
223 int err;
224
225 list_for_each_entry(flow, flow_list, tmp_list) {
226 if (!mlx5e_is_offloaded_flow(flow))
227 continue;
228
229 attr = mlx5e_tc_get_encap_attr(flow);
230 esw_attr = attr->esw_attr;
231 /* mark the flow's encap dest as non-valid */
232 esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
233 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
234
235 /* Clear pkt_reformat before checking slow path flag. Because
236 * in next iteration, the same flow is already set slow path
237 * flag, but still need to clear the pkt_reformat.
238 */
239 if (flow_flag_test(flow, SLOW))
240 continue;
241
242 /* update from encap rule to slow path rule */
243 spec = &flow->attr->parse_attr->spec;
244 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
245
246 if (IS_ERR(rule)) {
247 err = PTR_ERR(rule);
248 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
249 err);
250 continue;
251 }
252
253 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
254 mlx5e_tc_unoffload_flow_post_acts(flow);
255 flow->rule[0] = rule;
256 /* was unset when fast path rule removed */
257 flow_flag_set(flow, OFFLOADED);
258 }
259
260 /* we know that the encap is valid */
261 e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
262 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
263 e->pkt_reformat = NULL;
264 }
265
mlx5e_take_tmp_flow(struct mlx5e_tc_flow * flow,struct list_head * flow_list,int index)266 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
267 struct list_head *flow_list,
268 int index)
269 {
270 if (IS_ERR(mlx5e_flow_get(flow))) {
271 /* Flow is being deleted concurrently. Wait for it to be
272 * unoffloaded from hardware, otherwise deleting encap will
273 * fail.
274 */
275 wait_for_completion(&flow->del_hw_done);
276 return;
277 }
278 wait_for_completion(&flow->init_done);
279
280 flow->tmp_entry_index = index;
281 list_add(&flow->tmp_list, flow_list);
282 }
283
284 /* Takes reference to all flows attached to encap and adds the flows to
285 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
286 */
mlx5e_take_all_encap_flows(struct mlx5e_encap_entry * e,struct list_head * flow_list)287 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
288 {
289 struct encap_flow_item *efi;
290 struct mlx5e_tc_flow *flow;
291
292 list_for_each_entry(efi, &e->flows, list) {
293 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
294 mlx5e_take_tmp_flow(flow, flow_list, efi->index);
295 }
296 }
297
298 /* Takes reference to all flows attached to route and adds the flows to
299 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
300 */
mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry * r,struct list_head * flow_list)301 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
302 struct list_head *flow_list)
303 {
304 struct mlx5e_tc_flow *flow;
305
306 list_for_each_entry(flow, &r->decap_flows, decap_routes)
307 mlx5e_take_tmp_flow(flow, flow_list, 0);
308 }
309
310 typedef bool (match_cb)(struct mlx5e_encap_entry *);
311
312 static struct mlx5e_encap_entry *
mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e,match_cb match)313 mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
314 struct mlx5e_encap_entry *e,
315 match_cb match)
316 {
317 struct mlx5e_encap_entry *next = NULL;
318
319 retry:
320 rcu_read_lock();
321
322 /* find encap with non-zero reference counter value */
323 for (next = e ?
324 list_next_or_null_rcu(&nhe->encap_list,
325 &e->encap_list,
326 struct mlx5e_encap_entry,
327 encap_list) :
328 list_first_or_null_rcu(&nhe->encap_list,
329 struct mlx5e_encap_entry,
330 encap_list);
331 next;
332 next = list_next_or_null_rcu(&nhe->encap_list,
333 &next->encap_list,
334 struct mlx5e_encap_entry,
335 encap_list))
336 if (mlx5e_encap_take(next))
337 break;
338
339 rcu_read_unlock();
340
341 /* release starting encap */
342 if (e)
343 mlx5e_encap_put(netdev_priv(e->out_dev), e);
344 if (!next)
345 return next;
346
347 /* wait for encap to be fully initialized */
348 wait_for_completion(&next->res_ready);
349 /* continue searching if encap entry is not in valid state after completion */
350 if (!match(next)) {
351 e = next;
352 goto retry;
353 }
354
355 return next;
356 }
357
mlx5e_encap_valid(struct mlx5e_encap_entry * e)358 static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
359 {
360 return e->flags & MLX5_ENCAP_ENTRY_VALID;
361 }
362
363 static struct mlx5e_encap_entry *
mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e)364 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
365 struct mlx5e_encap_entry *e)
366 {
367 return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
368 }
369
mlx5e_encap_initialized(struct mlx5e_encap_entry * e)370 static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
371 {
372 return e->compl_result >= 0;
373 }
374
375 struct mlx5e_encap_entry *
mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e)376 mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
377 struct mlx5e_encap_entry *e)
378 {
379 return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
380 }
381
mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry * nhe)382 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
383 {
384 struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
385 struct mlx5e_encap_entry *e = NULL;
386 struct mlx5e_tc_flow *flow;
387 struct mlx5_fc *counter;
388 struct neigh_table *tbl;
389 bool neigh_used = false;
390 struct neighbour *n;
391 u64 lastuse;
392
393 if (m_neigh->family == AF_INET)
394 tbl = &arp_tbl;
395 #if IS_ENABLED(CONFIG_IPV6)
396 else if (m_neigh->family == AF_INET6)
397 tbl = ipv6_stub->nd_tbl;
398 #endif
399 else
400 return;
401
402 /* mlx5e_get_next_valid_encap() releases previous encap before returning
403 * next one.
404 */
405 while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
406 struct mlx5e_priv *priv = netdev_priv(e->out_dev);
407 struct encap_flow_item *efi, *tmp;
408 struct mlx5_eswitch *esw;
409 LIST_HEAD(flow_list);
410
411 esw = priv->mdev->priv.eswitch;
412 mutex_lock(&esw->offloads.encap_tbl_lock);
413 list_for_each_entry_safe(efi, tmp, &e->flows, list) {
414 flow = container_of(efi, struct mlx5e_tc_flow,
415 encaps[efi->index]);
416 if (IS_ERR(mlx5e_flow_get(flow)))
417 continue;
418 list_add(&flow->tmp_list, &flow_list);
419
420 if (mlx5e_is_offloaded_flow(flow)) {
421 counter = mlx5e_tc_get_counter(flow);
422 lastuse = mlx5_fc_query_lastuse(counter);
423 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
424 neigh_used = true;
425 break;
426 }
427 }
428 }
429 mutex_unlock(&esw->offloads.encap_tbl_lock);
430
431 mlx5e_put_flow_list(priv, &flow_list);
432 if (neigh_used) {
433 /* release current encap before breaking the loop */
434 mlx5e_encap_put(priv, e);
435 break;
436 }
437 }
438
439 trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
440
441 if (neigh_used) {
442 nhe->reported_lastuse = jiffies;
443
444 /* find the relevant neigh according to the cached device and
445 * dst ip pair
446 */
447 n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
448 if (!n)
449 return;
450
451 neigh_event_send(n, NULL);
452 neigh_release(n);
453 }
454 }
455
mlx5e_encap_dealloc(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)456 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
457 {
458 WARN_ON(!list_empty(&e->flows));
459
460 if (e->compl_result > 0) {
461 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
462
463 if (e->flags & MLX5_ENCAP_ENTRY_VALID)
464 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
465 }
466
467 kfree(e->tun_info);
468 kfree(e->encap_header);
469 kfree_rcu(e, rcu);
470 }
471
mlx5e_decap_dealloc(struct mlx5e_priv * priv,struct mlx5e_decap_entry * d)472 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
473 struct mlx5e_decap_entry *d)
474 {
475 WARN_ON(!list_empty(&d->flows));
476
477 if (!d->compl_result)
478 mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
479
480 kfree_rcu(d, rcu);
481 }
482
mlx5e_encap_put(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)483 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
484 {
485 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
486
487 if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
488 return;
489 list_del(&e->route_list);
490 hash_del_rcu(&e->encap_hlist);
491 mutex_unlock(&esw->offloads.encap_tbl_lock);
492
493 mlx5e_encap_dealloc(priv, e);
494 }
495
mlx5e_encap_put_locked(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)496 static void mlx5e_encap_put_locked(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
497 {
498 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
499
500 lockdep_assert_held(&esw->offloads.encap_tbl_lock);
501
502 if (!refcount_dec_and_test(&e->refcnt))
503 return;
504 list_del(&e->route_list);
505 hash_del_rcu(&e->encap_hlist);
506 mlx5e_encap_dealloc(priv, e);
507 }
508
mlx5e_decap_put(struct mlx5e_priv * priv,struct mlx5e_decap_entry * d)509 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
510 {
511 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
512
513 if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
514 return;
515 hash_del_rcu(&d->hlist);
516 mutex_unlock(&esw->offloads.decap_tbl_lock);
517
518 mlx5e_decap_dealloc(priv, d);
519 }
520
521 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
522 struct mlx5e_tc_flow *flow,
523 int out_index);
524
mlx5e_detach_encap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,int out_index)525 void mlx5e_detach_encap(struct mlx5e_priv *priv,
526 struct mlx5e_tc_flow *flow,
527 struct mlx5_flow_attr *attr,
528 int out_index)
529 {
530 struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
531 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
532
533 if (!mlx5e_is_eswitch_flow(flow))
534 return;
535
536 if (attr->esw_attr->dests[out_index].flags &
537 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
538 mlx5e_detach_encap_route(priv, flow, out_index);
539
540 /* flow wasn't fully initialized */
541 if (!e)
542 return;
543
544 mutex_lock(&esw->offloads.encap_tbl_lock);
545 list_del(&flow->encaps[out_index].list);
546 flow->encaps[out_index].e = NULL;
547 if (!refcount_dec_and_test(&e->refcnt)) {
548 mutex_unlock(&esw->offloads.encap_tbl_lock);
549 return;
550 }
551 list_del(&e->route_list);
552 hash_del_rcu(&e->encap_hlist);
553 mutex_unlock(&esw->offloads.encap_tbl_lock);
554
555 mlx5e_encap_dealloc(priv, e);
556 }
557
mlx5e_detach_decap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)558 void mlx5e_detach_decap(struct mlx5e_priv *priv,
559 struct mlx5e_tc_flow *flow)
560 {
561 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
562 struct mlx5e_decap_entry *d = flow->decap_reformat;
563
564 if (!d)
565 return;
566
567 mutex_lock(&esw->offloads.decap_tbl_lock);
568 list_del(&flow->l3_to_l2_reformat);
569 flow->decap_reformat = NULL;
570
571 if (!refcount_dec_and_test(&d->refcnt)) {
572 mutex_unlock(&esw->offloads.decap_tbl_lock);
573 return;
574 }
575 hash_del_rcu(&d->hlist);
576 mutex_unlock(&esw->offloads.decap_tbl_lock);
577
578 mlx5e_decap_dealloc(priv, d);
579 }
580
mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key * a,struct mlx5e_encap_key * b)581 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
582 struct mlx5e_encap_key *b)
583 {
584 return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
585 a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
586 }
587
mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key * a,struct mlx5e_encap_key * b,__be16 tun_flags)588 bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a,
589 struct mlx5e_encap_key *b,
590 __be16 tun_flags)
591 {
592 struct ip_tunnel_info *a_info;
593 struct ip_tunnel_info *b_info;
594 bool a_has_opts, b_has_opts;
595
596 if (!mlx5e_tc_tun_encap_info_equal_generic(a, b))
597 return false;
598
599 a_has_opts = !!(a->ip_tun_key->tun_flags & tun_flags);
600 b_has_opts = !!(b->ip_tun_key->tun_flags & tun_flags);
601
602 /* keys are equal when both don't have any options attached */
603 if (!a_has_opts && !b_has_opts)
604 return true;
605
606 if (a_has_opts != b_has_opts)
607 return false;
608
609 /* options stored in memory next to ip_tunnel_info struct */
610 a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key);
611 b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key);
612
613 return a_info->options_len == b_info->options_len &&
614 !memcmp(ip_tunnel_info_opts(a_info),
615 ip_tunnel_info_opts(b_info),
616 a_info->options_len);
617 }
618
cmp_decap_info(struct mlx5e_decap_key * a,struct mlx5e_decap_key * b)619 static int cmp_decap_info(struct mlx5e_decap_key *a,
620 struct mlx5e_decap_key *b)
621 {
622 return memcmp(&a->key, &b->key, sizeof(b->key));
623 }
624
hash_encap_info(struct mlx5e_encap_key * key)625 static int hash_encap_info(struct mlx5e_encap_key *key)
626 {
627 return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
628 key->tc_tunnel->tunnel_type);
629 }
630
hash_decap_info(struct mlx5e_decap_key * key)631 static int hash_decap_info(struct mlx5e_decap_key *key)
632 {
633 return jhash(&key->key, sizeof(key->key), 0);
634 }
635
mlx5e_encap_take(struct mlx5e_encap_entry * e)636 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
637 {
638 return refcount_inc_not_zero(&e->refcnt);
639 }
640
mlx5e_decap_take(struct mlx5e_decap_entry * e)641 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
642 {
643 return refcount_inc_not_zero(&e->refcnt);
644 }
645
646 static struct mlx5e_encap_entry *
mlx5e_encap_get(struct mlx5e_priv * priv,struct mlx5e_encap_key * key,uintptr_t hash_key)647 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
648 uintptr_t hash_key)
649 {
650 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
651 struct mlx5e_encap_key e_key;
652 struct mlx5e_encap_entry *e;
653
654 hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
655 encap_hlist, hash_key) {
656 e_key.ip_tun_key = &e->tun_info->key;
657 e_key.tc_tunnel = e->tunnel;
658 if (e->tunnel->encap_info_equal(&e_key, key) &&
659 mlx5e_encap_take(e))
660 return e;
661 }
662
663 return NULL;
664 }
665
666 static struct mlx5e_decap_entry *
mlx5e_decap_get(struct mlx5e_priv * priv,struct mlx5e_decap_key * key,uintptr_t hash_key)667 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
668 uintptr_t hash_key)
669 {
670 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
671 struct mlx5e_decap_key r_key;
672 struct mlx5e_decap_entry *e;
673
674 hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
675 hlist, hash_key) {
676 r_key = e->key;
677 if (!cmp_decap_info(&r_key, key) &&
678 mlx5e_decap_take(e))
679 return e;
680 }
681 return NULL;
682 }
683
mlx5e_dup_tun_info(const struct ip_tunnel_info * tun_info)684 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
685 {
686 size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
687
688 return kmemdup(tun_info, tun_size, GFP_KERNEL);
689 }
690
is_duplicated_encap_entry(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,int out_index,struct mlx5e_encap_entry * e,struct netlink_ext_ack * extack)691 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
692 struct mlx5e_tc_flow *flow,
693 int out_index,
694 struct mlx5e_encap_entry *e,
695 struct netlink_ext_ack *extack)
696 {
697 int i;
698
699 for (i = 0; i < out_index; i++) {
700 if (flow->encaps[i].e != e)
701 continue;
702 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
703 netdev_err(priv->netdev, "can't duplicate encap action\n");
704 return true;
705 }
706
707 return false;
708 }
709
mlx5e_set_vf_tunnel(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,struct net_device * out_dev,int route_dev_ifindex,int out_index)710 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
711 struct mlx5_flow_attr *attr,
712 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
713 struct net_device *out_dev,
714 int route_dev_ifindex,
715 int out_index)
716 {
717 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
718 struct net_device *route_dev;
719 u16 vport_num;
720 int err = 0;
721 u32 data;
722
723 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
724
725 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
726 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
727 goto out;
728
729 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
730 if (err)
731 goto out;
732
733 attr->dest_chain = 0;
734 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
735 esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
736 data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
737 vport_num);
738 err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
739 MLX5_FLOW_NAMESPACE_FDB,
740 VPORT_TO_REG, data);
741 if (err >= 0) {
742 esw_attr->dests[out_index].src_port_rewrite_act_id = err;
743 err = 0;
744 }
745
746 out:
747 if (route_dev)
748 dev_put(route_dev);
749 return err;
750 }
751
mlx5e_update_vf_tunnel(struct mlx5_eswitch * esw,struct mlx5_esw_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,struct net_device * out_dev,int route_dev_ifindex,int out_index)752 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
753 struct mlx5_esw_flow_attr *attr,
754 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
755 struct net_device *out_dev,
756 int route_dev_ifindex,
757 int out_index)
758 {
759 int act_id = attr->dests[out_index].src_port_rewrite_act_id;
760 struct net_device *route_dev;
761 u16 vport_num;
762 int err = 0;
763 u32 data;
764
765 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
766
767 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
768 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
769 err = -ENODEV;
770 goto out;
771 }
772
773 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
774 if (err)
775 goto out;
776
777 data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
778 vport_num);
779 mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
780
781 out:
782 if (route_dev)
783 dev_put(route_dev);
784 return err;
785 }
786
mlx5e_route_tbl_get_last_update(struct mlx5e_priv * priv)787 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
788 {
789 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
790 struct mlx5_rep_uplink_priv *uplink_priv;
791 struct mlx5e_rep_priv *uplink_rpriv;
792 struct mlx5e_tc_tun_encap *encap;
793 unsigned int ret;
794
795 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
796 uplink_priv = &uplink_rpriv->uplink_priv;
797 encap = uplink_priv->encap;
798
799 spin_lock_bh(&encap->route_lock);
800 ret = encap->route_tbl_last_update;
801 spin_unlock_bh(&encap->route_lock);
802 return ret;
803 }
804
805 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
806 struct mlx5e_tc_flow *flow,
807 struct mlx5_flow_attr *attr,
808 struct mlx5e_encap_entry *e,
809 bool new_encap_entry,
810 unsigned long tbl_time_before,
811 int out_index);
812
mlx5e_attach_encap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct net_device * mirred_dev,int out_index,struct netlink_ext_ack * extack,struct net_device ** encap_dev)813 int mlx5e_attach_encap(struct mlx5e_priv *priv,
814 struct mlx5e_tc_flow *flow,
815 struct mlx5_flow_attr *attr,
816 struct net_device *mirred_dev,
817 int out_index,
818 struct netlink_ext_ack *extack,
819 struct net_device **encap_dev)
820 {
821 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
822 struct mlx5e_tc_flow_parse_attr *parse_attr;
823 const struct ip_tunnel_info *tun_info;
824 const struct mlx5e_mpls_info *mpls_info;
825 unsigned long tbl_time_before = 0;
826 struct mlx5e_encap_entry *e;
827 struct mlx5e_encap_key key;
828 bool entry_created = false;
829 unsigned short family;
830 uintptr_t hash_key;
831 int err = 0;
832
833 lockdep_assert_held(&esw->offloads.encap_tbl_lock);
834
835 parse_attr = attr->parse_attr;
836 tun_info = parse_attr->tun_info[out_index];
837 mpls_info = &parse_attr->mpls_info[out_index];
838 family = ip_tunnel_info_af(tun_info);
839 key.ip_tun_key = &tun_info->key;
840 key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
841 if (!key.tc_tunnel) {
842 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
843 return -EOPNOTSUPP;
844 }
845
846 hash_key = hash_encap_info(&key);
847
848 e = mlx5e_encap_get(priv, &key, hash_key);
849
850 /* must verify if encap is valid or not */
851 if (e) {
852 /* Check that entry was not already attached to this flow */
853 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
854 err = -EOPNOTSUPP;
855 goto out_err;
856 }
857
858 goto attach_flow;
859 }
860
861 e = kzalloc(sizeof(*e), GFP_KERNEL);
862 if (!e) {
863 err = -ENOMEM;
864 goto out_err;
865 }
866
867 refcount_set(&e->refcnt, 1);
868 init_completion(&e->res_ready);
869 entry_created = true;
870 INIT_LIST_HEAD(&e->route_list);
871
872 tun_info = mlx5e_dup_tun_info(tun_info);
873 if (!tun_info) {
874 err = -ENOMEM;
875 goto out_err_init;
876 }
877 e->tun_info = tun_info;
878 memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info));
879 err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
880 if (err)
881 goto out_err_init;
882
883 INIT_LIST_HEAD(&e->flows);
884 hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
885 tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
886
887 if (family == AF_INET)
888 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
889 else if (family == AF_INET6)
890 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
891
892 complete_all(&e->res_ready);
893 if (err) {
894 e->compl_result = err;
895 goto out_err;
896 }
897 e->compl_result = 1;
898
899 attach_flow:
900 err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created,
901 tbl_time_before, out_index);
902 if (err)
903 goto out_err;
904
905 err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index);
906 if (err == -EOPNOTSUPP) {
907 /* If device doesn't support int port offload,
908 * redirect to uplink vport.
909 */
910 mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n");
911 err = 0;
912 } else if (err) {
913 goto out_err;
914 }
915
916 flow->encaps[out_index].e = e;
917 list_add(&flow->encaps[out_index].list, &e->flows);
918 flow->encaps[out_index].index = out_index;
919 *encap_dev = e->out_dev;
920 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
921 attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
922 attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
923 } else {
924 flow_flag_set(flow, SLOW);
925 }
926
927 return err;
928
929 out_err:
930 if (e)
931 mlx5e_encap_put_locked(priv, e);
932 return err;
933
934 out_err_init:
935 kfree(tun_info);
936 kfree(e);
937 return err;
938 }
939
mlx5e_attach_decap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)940 int mlx5e_attach_decap(struct mlx5e_priv *priv,
941 struct mlx5e_tc_flow *flow,
942 struct netlink_ext_ack *extack)
943 {
944 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
945 struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
946 struct mlx5_pkt_reformat_params reformat_params;
947 struct mlx5e_decap_entry *d;
948 struct mlx5e_decap_key key;
949 uintptr_t hash_key;
950 int err = 0;
951
952 if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
953 NL_SET_ERR_MSG_MOD(extack,
954 "encap header larger than max supported");
955 return -EOPNOTSUPP;
956 }
957
958 key.key = attr->eth;
959 hash_key = hash_decap_info(&key);
960 mutex_lock(&esw->offloads.decap_tbl_lock);
961 d = mlx5e_decap_get(priv, &key, hash_key);
962 if (d) {
963 mutex_unlock(&esw->offloads.decap_tbl_lock);
964 wait_for_completion(&d->res_ready);
965 mutex_lock(&esw->offloads.decap_tbl_lock);
966 if (d->compl_result) {
967 err = -EREMOTEIO;
968 goto out_free;
969 }
970 goto found;
971 }
972
973 d = kzalloc(sizeof(*d), GFP_KERNEL);
974 if (!d) {
975 err = -ENOMEM;
976 goto out_err;
977 }
978
979 d->key = key;
980 refcount_set(&d->refcnt, 1);
981 init_completion(&d->res_ready);
982 INIT_LIST_HEAD(&d->flows);
983 hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
984 mutex_unlock(&esw->offloads.decap_tbl_lock);
985
986 memset(&reformat_params, 0, sizeof(reformat_params));
987 reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
988 reformat_params.size = sizeof(attr->eth);
989 reformat_params.data = &attr->eth;
990 d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
991 &reformat_params,
992 MLX5_FLOW_NAMESPACE_FDB);
993 if (IS_ERR(d->pkt_reformat)) {
994 err = PTR_ERR(d->pkt_reformat);
995 d->compl_result = err;
996 }
997 mutex_lock(&esw->offloads.decap_tbl_lock);
998 complete_all(&d->res_ready);
999 if (err)
1000 goto out_free;
1001
1002 found:
1003 flow->decap_reformat = d;
1004 attr->decap_pkt_reformat = d->pkt_reformat;
1005 list_add(&flow->l3_to_l2_reformat, &d->flows);
1006 mutex_unlock(&esw->offloads.decap_tbl_lock);
1007 return 0;
1008
1009 out_free:
1010 mutex_unlock(&esw->offloads.decap_tbl_lock);
1011 mlx5e_decap_put(priv, d);
1012 return err;
1013
1014 out_err:
1015 mutex_unlock(&esw->offloads.decap_tbl_lock);
1016 return err;
1017 }
1018
mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct netlink_ext_ack * extack,bool * vf_tun)1019 int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv,
1020 struct mlx5e_tc_flow *flow,
1021 struct mlx5_flow_attr *attr,
1022 struct netlink_ext_ack *extack,
1023 bool *vf_tun)
1024 {
1025 struct mlx5e_tc_flow_parse_attr *parse_attr;
1026 struct mlx5_esw_flow_attr *esw_attr;
1027 struct net_device *encap_dev = NULL;
1028 struct mlx5e_rep_priv *rpriv;
1029 struct mlx5e_priv *out_priv;
1030 struct mlx5_eswitch *esw;
1031 int out_index;
1032 int err = 0;
1033
1034 parse_attr = attr->parse_attr;
1035 esw_attr = attr->esw_attr;
1036 *vf_tun = false;
1037
1038 esw = priv->mdev->priv.eswitch;
1039 mutex_lock(&esw->offloads.encap_tbl_lock);
1040 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1041 struct net_device *out_dev;
1042 int mirred_ifindex;
1043
1044 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1045 continue;
1046
1047 mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1048 out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
1049 if (!out_dev) {
1050 NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
1051 err = -ENODEV;
1052 goto out;
1053 }
1054 err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
1055 extack, &encap_dev);
1056 dev_put(out_dev);
1057 if (err)
1058 goto out;
1059
1060 if (esw_attr->dests[out_index].flags &
1061 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
1062 !esw_attr->dest_int_port)
1063 *vf_tun = true;
1064
1065 out_priv = netdev_priv(encap_dev);
1066 rpriv = out_priv->ppriv;
1067 esw_attr->dests[out_index].vport_valid = true;
1068 esw_attr->dests[out_index].vport = rpriv->rep->vport;
1069 esw_attr->dests[out_index].mdev = out_priv->mdev;
1070 }
1071
1072 if (*vf_tun && esw_attr->out_count > 1) {
1073 NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
1074 err = -EOPNOTSUPP;
1075 goto out;
1076 }
1077
1078 out:
1079 mutex_unlock(&esw->offloads.encap_tbl_lock);
1080 return err;
1081 }
1082
mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr)1083 void mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv *priv,
1084 struct mlx5e_tc_flow *flow,
1085 struct mlx5_flow_attr *attr)
1086 {
1087 struct mlx5_esw_flow_attr *esw_attr;
1088 int out_index;
1089
1090 if (!mlx5e_is_eswitch_flow(flow))
1091 return;
1092
1093 esw_attr = attr->esw_attr;
1094
1095 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1096 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1097 continue;
1098
1099 mlx5e_detach_encap(flow->priv, flow, attr, out_index);
1100 kfree(attr->parse_attr->tun_info[out_index]);
1101 }
1102 }
1103
cmp_route_info(struct mlx5e_route_key * a,struct mlx5e_route_key * b)1104 static int cmp_route_info(struct mlx5e_route_key *a,
1105 struct mlx5e_route_key *b)
1106 {
1107 if (a->ip_version == 4 && b->ip_version == 4)
1108 return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
1109 sizeof(a->endpoint_ip.v4));
1110 else if (a->ip_version == 6 && b->ip_version == 6)
1111 return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
1112 sizeof(a->endpoint_ip.v6));
1113 return 1;
1114 }
1115
hash_route_info(struct mlx5e_route_key * key)1116 static u32 hash_route_info(struct mlx5e_route_key *key)
1117 {
1118 if (key->ip_version == 4)
1119 return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
1120 return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
1121 }
1122
mlx5e_route_dealloc(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1123 static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
1124 struct mlx5e_route_entry *r)
1125 {
1126 WARN_ON(!list_empty(&r->decap_flows));
1127 WARN_ON(!list_empty(&r->encap_entries));
1128
1129 kfree_rcu(r, rcu);
1130 }
1131
mlx5e_route_put(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1132 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1133 {
1134 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1135
1136 if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
1137 return;
1138
1139 hash_del_rcu(&r->hlist);
1140 mutex_unlock(&esw->offloads.encap_tbl_lock);
1141
1142 mlx5e_route_dealloc(priv, r);
1143 }
1144
mlx5e_route_put_locked(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1145 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1146 {
1147 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1148
1149 lockdep_assert_held(&esw->offloads.encap_tbl_lock);
1150
1151 if (!refcount_dec_and_test(&r->refcnt))
1152 return;
1153 hash_del_rcu(&r->hlist);
1154 mlx5e_route_dealloc(priv, r);
1155 }
1156
1157 static struct mlx5e_route_entry *
mlx5e_route_get(struct mlx5e_tc_tun_encap * encap,struct mlx5e_route_key * key,u32 hash_key)1158 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
1159 u32 hash_key)
1160 {
1161 struct mlx5e_route_key r_key;
1162 struct mlx5e_route_entry *r;
1163
1164 hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
1165 r_key = r->key;
1166 if (!cmp_route_info(&r_key, key) &&
1167 refcount_inc_not_zero(&r->refcnt))
1168 return r;
1169 }
1170 return NULL;
1171 }
1172
1173 static struct mlx5e_route_entry *
mlx5e_route_get_create(struct mlx5e_priv * priv,struct mlx5e_route_key * key,int tunnel_dev_index,unsigned long * route_tbl_change_time)1174 mlx5e_route_get_create(struct mlx5e_priv *priv,
1175 struct mlx5e_route_key *key,
1176 int tunnel_dev_index,
1177 unsigned long *route_tbl_change_time)
1178 {
1179 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1180 struct mlx5_rep_uplink_priv *uplink_priv;
1181 struct mlx5e_rep_priv *uplink_rpriv;
1182 struct mlx5e_tc_tun_encap *encap;
1183 struct mlx5e_route_entry *r;
1184 u32 hash_key;
1185
1186 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1187 uplink_priv = &uplink_rpriv->uplink_priv;
1188 encap = uplink_priv->encap;
1189
1190 hash_key = hash_route_info(key);
1191 spin_lock_bh(&encap->route_lock);
1192 r = mlx5e_route_get(encap, key, hash_key);
1193 spin_unlock_bh(&encap->route_lock);
1194 if (r) {
1195 if (!mlx5e_route_entry_valid(r)) {
1196 mlx5e_route_put_locked(priv, r);
1197 return ERR_PTR(-EINVAL);
1198 }
1199 return r;
1200 }
1201
1202 r = kzalloc(sizeof(*r), GFP_KERNEL);
1203 if (!r)
1204 return ERR_PTR(-ENOMEM);
1205
1206 r->key = *key;
1207 r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1208 r->tunnel_dev_index = tunnel_dev_index;
1209 refcount_set(&r->refcnt, 1);
1210 INIT_LIST_HEAD(&r->decap_flows);
1211 INIT_LIST_HEAD(&r->encap_entries);
1212
1213 spin_lock_bh(&encap->route_lock);
1214 *route_tbl_change_time = encap->route_tbl_last_update;
1215 hash_add(encap->route_tbl, &r->hlist, hash_key);
1216 spin_unlock_bh(&encap->route_lock);
1217
1218 return r;
1219 }
1220
1221 static struct mlx5e_route_entry *
mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap * encap,struct mlx5e_route_key * key)1222 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1223 {
1224 u32 hash_key = hash_route_info(key);
1225 struct mlx5e_route_entry *r;
1226
1227 spin_lock_bh(&encap->route_lock);
1228 encap->route_tbl_last_update = jiffies;
1229 r = mlx5e_route_get(encap, key, hash_key);
1230 spin_unlock_bh(&encap->route_lock);
1231
1232 return r;
1233 }
1234
1235 struct mlx5e_tc_fib_event_data {
1236 struct work_struct work;
1237 unsigned long event;
1238 struct mlx5e_route_entry *r;
1239 struct net_device *ul_dev;
1240 };
1241
1242 static void mlx5e_tc_fib_event_work(struct work_struct *work);
1243 static struct mlx5e_tc_fib_event_data *
mlx5e_tc_init_fib_work(unsigned long event,struct net_device * ul_dev,gfp_t flags)1244 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1245 {
1246 struct mlx5e_tc_fib_event_data *fib_work;
1247
1248 fib_work = kzalloc(sizeof(*fib_work), flags);
1249 if (WARN_ON(!fib_work))
1250 return NULL;
1251
1252 INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1253 fib_work->event = event;
1254 fib_work->ul_dev = ul_dev;
1255
1256 return fib_work;
1257 }
1258
1259 static int
mlx5e_route_enqueue_update(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,unsigned long event)1260 mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1261 struct mlx5e_route_entry *r,
1262 unsigned long event)
1263 {
1264 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1265 struct mlx5e_tc_fib_event_data *fib_work;
1266 struct mlx5e_rep_priv *uplink_rpriv;
1267 struct net_device *ul_dev;
1268
1269 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1270 ul_dev = uplink_rpriv->netdev;
1271
1272 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1273 if (!fib_work)
1274 return -ENOMEM;
1275
1276 dev_hold(ul_dev);
1277 refcount_inc(&r->refcnt);
1278 fib_work->r = r;
1279 queue_work(priv->wq, &fib_work->work);
1280
1281 return 0;
1282 }
1283
mlx5e_attach_decap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1284 int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1285 struct mlx5e_tc_flow *flow)
1286 {
1287 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1288 unsigned long tbl_time_before, tbl_time_after;
1289 struct mlx5e_tc_flow_parse_attr *parse_attr;
1290 struct mlx5_flow_attr *attr = flow->attr;
1291 struct mlx5_esw_flow_attr *esw_attr;
1292 struct mlx5e_route_entry *r;
1293 struct mlx5e_route_key key;
1294 int err = 0;
1295
1296 esw_attr = attr->esw_attr;
1297 parse_attr = attr->parse_attr;
1298 mutex_lock(&esw->offloads.encap_tbl_lock);
1299 if (!esw_attr->rx_tun_attr)
1300 goto out;
1301
1302 tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1303 tbl_time_after = tbl_time_before;
1304 err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev);
1305 if (err || !esw_attr->rx_tun_attr->decap_vport)
1306 goto out;
1307
1308 key.ip_version = attr->tun_ip_version;
1309 if (key.ip_version == 4)
1310 key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1311 else
1312 key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1313
1314 r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
1315 &tbl_time_after);
1316 if (IS_ERR(r)) {
1317 err = PTR_ERR(r);
1318 goto out;
1319 }
1320 /* Routing changed concurrently. FIB event handler might have missed new
1321 * entry, schedule update.
1322 */
1323 if (tbl_time_before != tbl_time_after) {
1324 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1325 if (err) {
1326 mlx5e_route_put_locked(priv, r);
1327 goto out;
1328 }
1329 }
1330
1331 flow->decap_route = r;
1332 list_add(&flow->decap_routes, &r->decap_flows);
1333 mutex_unlock(&esw->offloads.encap_tbl_lock);
1334 return 0;
1335
1336 out:
1337 mutex_unlock(&esw->offloads.encap_tbl_lock);
1338 return err;
1339 }
1340
mlx5e_attach_encap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct mlx5e_encap_entry * e,bool new_encap_entry,unsigned long tbl_time_before,int out_index)1341 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1342 struct mlx5e_tc_flow *flow,
1343 struct mlx5_flow_attr *attr,
1344 struct mlx5e_encap_entry *e,
1345 bool new_encap_entry,
1346 unsigned long tbl_time_before,
1347 int out_index)
1348 {
1349 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1350 unsigned long tbl_time_after = tbl_time_before;
1351 struct mlx5e_tc_flow_parse_attr *parse_attr;
1352 const struct ip_tunnel_info *tun_info;
1353 struct mlx5_esw_flow_attr *esw_attr;
1354 struct mlx5e_route_entry *r;
1355 struct mlx5e_route_key key;
1356 unsigned short family;
1357 int err = 0;
1358
1359 esw_attr = attr->esw_attr;
1360 parse_attr = attr->parse_attr;
1361 tun_info = parse_attr->tun_info[out_index];
1362 family = ip_tunnel_info_af(tun_info);
1363
1364 if (family == AF_INET) {
1365 key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1366 key.ip_version = 4;
1367 } else if (family == AF_INET6) {
1368 key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1369 key.ip_version = 6;
1370 }
1371
1372 err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
1373 e->route_dev_ifindex, out_index);
1374 if (err || !(esw_attr->dests[out_index].flags &
1375 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1376 return err;
1377
1378 r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
1379 &tbl_time_after);
1380 if (IS_ERR(r))
1381 return PTR_ERR(r);
1382 /* Routing changed concurrently. FIB event handler might have missed new
1383 * entry, schedule update.
1384 */
1385 if (tbl_time_before != tbl_time_after) {
1386 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1387 if (err) {
1388 mlx5e_route_put_locked(priv, r);
1389 return err;
1390 }
1391 }
1392
1393 flow->encap_routes[out_index].r = r;
1394 if (new_encap_entry)
1395 list_add(&e->route_list, &r->encap_entries);
1396 flow->encap_routes[out_index].index = out_index;
1397 return 0;
1398 }
1399
mlx5e_detach_decap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1400 void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1401 struct mlx5e_tc_flow *flow)
1402 {
1403 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1404 struct mlx5e_route_entry *r = flow->decap_route;
1405
1406 if (!r)
1407 return;
1408
1409 mutex_lock(&esw->offloads.encap_tbl_lock);
1410 list_del(&flow->decap_routes);
1411 flow->decap_route = NULL;
1412
1413 if (!refcount_dec_and_test(&r->refcnt)) {
1414 mutex_unlock(&esw->offloads.encap_tbl_lock);
1415 return;
1416 }
1417 hash_del_rcu(&r->hlist);
1418 mutex_unlock(&esw->offloads.encap_tbl_lock);
1419
1420 mlx5e_route_dealloc(priv, r);
1421 }
1422
mlx5e_detach_encap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,int out_index)1423 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1424 struct mlx5e_tc_flow *flow,
1425 int out_index)
1426 {
1427 struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1428 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1429 struct mlx5e_encap_entry *e, *tmp;
1430
1431 if (!r)
1432 return;
1433
1434 mutex_lock(&esw->offloads.encap_tbl_lock);
1435 flow->encap_routes[out_index].r = NULL;
1436
1437 if (!refcount_dec_and_test(&r->refcnt)) {
1438 mutex_unlock(&esw->offloads.encap_tbl_lock);
1439 return;
1440 }
1441 list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1442 list_del_init(&e->route_list);
1443 hash_del_rcu(&r->hlist);
1444 mutex_unlock(&esw->offloads.encap_tbl_lock);
1445
1446 mlx5e_route_dealloc(priv, r);
1447 }
1448
mlx5e_invalidate_encap(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * encap_flows)1449 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1450 struct mlx5e_encap_entry *e,
1451 struct list_head *encap_flows)
1452 {
1453 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1454 struct mlx5e_tc_flow *flow;
1455
1456 list_for_each_entry(flow, encap_flows, tmp_list) {
1457 struct mlx5_esw_flow_attr *esw_attr;
1458 struct mlx5_flow_attr *attr;
1459
1460 if (!mlx5e_is_offloaded_flow(flow))
1461 continue;
1462
1463 attr = mlx5e_tc_get_encap_attr(flow);
1464 esw_attr = attr->esw_attr;
1465
1466 if (flow_flag_test(flow, SLOW)) {
1467 mlx5e_tc_unoffload_from_slow_path(esw, flow);
1468 } else {
1469 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1470 mlx5e_tc_unoffload_flow_post_acts(flow);
1471 }
1472
1473 mlx5e_tc_detach_mod_hdr(priv, flow, attr);
1474 attr->modify_hdr = NULL;
1475
1476 esw_attr->dests[flow->tmp_entry_index].flags &=
1477 ~MLX5_ESW_DEST_ENCAP_VALID;
1478 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1479 }
1480
1481 e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1482 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1483 e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1484 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1485 e->pkt_reformat = NULL;
1486 }
1487 }
1488
mlx5e_reoffload_encap(struct mlx5e_priv * priv,struct net_device * tunnel_dev,struct mlx5e_encap_entry * e,struct list_head * encap_flows)1489 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1490 struct net_device *tunnel_dev,
1491 struct mlx5e_encap_entry *e,
1492 struct list_head *encap_flows)
1493 {
1494 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1495 struct mlx5e_tc_flow *flow;
1496 int err;
1497
1498 err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
1499 mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
1500 mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
1501 if (err)
1502 mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1503 e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1504
1505 list_for_each_entry(flow, encap_flows, tmp_list) {
1506 struct mlx5e_tc_flow_parse_attr *parse_attr;
1507 struct mlx5_esw_flow_attr *esw_attr;
1508 struct mlx5_flow_handle *rule;
1509 struct mlx5_flow_attr *attr;
1510 struct mlx5_flow_spec *spec;
1511
1512 if (flow_flag_test(flow, FAILED))
1513 continue;
1514
1515 spec = &flow->attr->parse_attr->spec;
1516
1517 attr = mlx5e_tc_get_encap_attr(flow);
1518 esw_attr = attr->esw_attr;
1519 parse_attr = attr->parse_attr;
1520
1521 err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
1522 e->out_dev, e->route_dev_ifindex,
1523 flow->tmp_entry_index);
1524 if (err) {
1525 mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1526 continue;
1527 }
1528
1529 err = mlx5e_tc_attach_mod_hdr(priv, flow, attr);
1530 if (err) {
1531 mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1532 err);
1533 continue;
1534 }
1535
1536 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1537 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1538 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1539 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1540 goto offload_to_slow_path;
1541
1542 err = mlx5e_tc_offload_flow_post_acts(flow);
1543 if (err) {
1544 mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
1545 err);
1546 goto offload_to_slow_path;
1547 }
1548
1549 /* update from slow path rule to encap rule */
1550 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
1551 if (IS_ERR(rule)) {
1552 mlx5e_tc_unoffload_flow_post_acts(flow);
1553 err = PTR_ERR(rule);
1554 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1555 err);
1556 } else {
1557 flow->rule[0] = rule;
1558 }
1559 } else {
1560 offload_to_slow_path:
1561 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1562 /* mark the flow's encap dest as non-valid */
1563 esw_attr->dests[flow->tmp_entry_index].flags &=
1564 ~MLX5_ESW_DEST_ENCAP_VALID;
1565
1566 if (IS_ERR(rule)) {
1567 err = PTR_ERR(rule);
1568 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1569 err);
1570 } else {
1571 flow->rule[0] = rule;
1572 }
1573 }
1574 flow_flag_set(flow, OFFLOADED);
1575 }
1576 }
1577
mlx5e_update_route_encaps(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,struct list_head * flow_list,bool replace)1578 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1579 struct mlx5e_route_entry *r,
1580 struct list_head *flow_list,
1581 bool replace)
1582 {
1583 struct net_device *tunnel_dev;
1584 struct mlx5e_encap_entry *e;
1585
1586 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1587 if (!tunnel_dev)
1588 return -ENODEV;
1589
1590 list_for_each_entry(e, &r->encap_entries, route_list) {
1591 LIST_HEAD(encap_flows);
1592
1593 mlx5e_take_all_encap_flows(e, &encap_flows);
1594 if (list_empty(&encap_flows))
1595 continue;
1596
1597 if (mlx5e_route_entry_valid(r))
1598 mlx5e_invalidate_encap(priv, e, &encap_flows);
1599
1600 if (!replace) {
1601 list_splice(&encap_flows, flow_list);
1602 continue;
1603 }
1604
1605 mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
1606 list_splice(&encap_flows, flow_list);
1607 }
1608
1609 return 0;
1610 }
1611
mlx5e_unoffload_flow_list(struct mlx5e_priv * priv,struct list_head * flow_list)1612 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1613 struct list_head *flow_list)
1614 {
1615 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1616 struct mlx5e_tc_flow *flow;
1617
1618 list_for_each_entry(flow, flow_list, tmp_list)
1619 if (mlx5e_is_offloaded_flow(flow))
1620 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1621 }
1622
mlx5e_reoffload_decap(struct mlx5e_priv * priv,struct list_head * decap_flows)1623 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1624 struct list_head *decap_flows)
1625 {
1626 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1627 struct mlx5e_tc_flow *flow;
1628
1629 list_for_each_entry(flow, decap_flows, tmp_list) {
1630 struct mlx5e_tc_flow_parse_attr *parse_attr;
1631 struct mlx5_flow_attr *attr = flow->attr;
1632 struct mlx5_flow_handle *rule;
1633 struct mlx5_flow_spec *spec;
1634 int err;
1635
1636 if (flow_flag_test(flow, FAILED))
1637 continue;
1638
1639 parse_attr = attr->parse_attr;
1640 spec = &parse_attr->spec;
1641 err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev);
1642 if (err) {
1643 mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1644 err);
1645 continue;
1646 }
1647
1648 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1649 if (IS_ERR(rule)) {
1650 err = PTR_ERR(rule);
1651 mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1652 err);
1653 } else {
1654 flow->rule[0] = rule;
1655 flow_flag_set(flow, OFFLOADED);
1656 }
1657 }
1658 }
1659
mlx5e_update_route_decap_flows(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,struct list_head * flow_list,bool replace)1660 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1661 struct mlx5e_route_entry *r,
1662 struct list_head *flow_list,
1663 bool replace)
1664 {
1665 struct net_device *tunnel_dev;
1666 LIST_HEAD(decap_flows);
1667
1668 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1669 if (!tunnel_dev)
1670 return -ENODEV;
1671
1672 mlx5e_take_all_route_decap_flows(r, &decap_flows);
1673 if (mlx5e_route_entry_valid(r))
1674 mlx5e_unoffload_flow_list(priv, &decap_flows);
1675 if (replace)
1676 mlx5e_reoffload_decap(priv, &decap_flows);
1677
1678 list_splice(&decap_flows, flow_list);
1679
1680 return 0;
1681 }
1682
mlx5e_tc_fib_event_work(struct work_struct * work)1683 static void mlx5e_tc_fib_event_work(struct work_struct *work)
1684 {
1685 struct mlx5e_tc_fib_event_data *event_data =
1686 container_of(work, struct mlx5e_tc_fib_event_data, work);
1687 struct net_device *ul_dev = event_data->ul_dev;
1688 struct mlx5e_priv *priv = netdev_priv(ul_dev);
1689 struct mlx5e_route_entry *r = event_data->r;
1690 struct mlx5_eswitch *esw;
1691 LIST_HEAD(flow_list);
1692 bool replace;
1693 int err;
1694
1695 /* sync with concurrent neigh updates */
1696 rtnl_lock();
1697 esw = priv->mdev->priv.eswitch;
1698 mutex_lock(&esw->offloads.encap_tbl_lock);
1699 replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1700
1701 if (!mlx5e_route_entry_valid(r) && !replace)
1702 goto out;
1703
1704 err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
1705 if (err)
1706 mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1707 err);
1708
1709 err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
1710 if (err)
1711 mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1712 err);
1713
1714 if (replace)
1715 r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1716 out:
1717 mutex_unlock(&esw->offloads.encap_tbl_lock);
1718 rtnl_unlock();
1719
1720 mlx5e_put_flow_list(priv, &flow_list);
1721 mlx5e_route_put(priv, event_data->r);
1722 dev_put(event_data->ul_dev);
1723 kfree(event_data);
1724 }
1725
1726 static struct mlx5e_tc_fib_event_data *
mlx5e_init_fib_work_ipv4(struct mlx5e_priv * priv,struct net_device * ul_dev,struct mlx5e_tc_tun_encap * encap,unsigned long event,struct fib_notifier_info * info)1727 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1728 struct net_device *ul_dev,
1729 struct mlx5e_tc_tun_encap *encap,
1730 unsigned long event,
1731 struct fib_notifier_info *info)
1732 {
1733 struct fib_entry_notifier_info *fen_info;
1734 struct mlx5e_tc_fib_event_data *fib_work;
1735 struct mlx5e_route_entry *r;
1736 struct mlx5e_route_key key;
1737 struct net_device *fib_dev;
1738
1739 fen_info = container_of(info, struct fib_entry_notifier_info, info);
1740 if (fen_info->fi->nh)
1741 return NULL;
1742 fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
1743 if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1744 fen_info->dst_len != 32)
1745 return NULL;
1746
1747 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1748 if (!fib_work)
1749 return ERR_PTR(-ENOMEM);
1750
1751 key.endpoint_ip.v4 = htonl(fen_info->dst);
1752 key.ip_version = 4;
1753
1754 /* Can't fail after this point because releasing reference to r
1755 * requires obtaining sleeping mutex which we can't do in atomic
1756 * context.
1757 */
1758 r = mlx5e_route_lookup_for_update(encap, &key);
1759 if (!r)
1760 goto out;
1761 fib_work->r = r;
1762 dev_hold(ul_dev);
1763
1764 return fib_work;
1765
1766 out:
1767 kfree(fib_work);
1768 return NULL;
1769 }
1770
1771 static struct mlx5e_tc_fib_event_data *
mlx5e_init_fib_work_ipv6(struct mlx5e_priv * priv,struct net_device * ul_dev,struct mlx5e_tc_tun_encap * encap,unsigned long event,struct fib_notifier_info * info)1772 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1773 struct net_device *ul_dev,
1774 struct mlx5e_tc_tun_encap *encap,
1775 unsigned long event,
1776 struct fib_notifier_info *info)
1777 {
1778 struct fib6_entry_notifier_info *fen_info;
1779 struct mlx5e_tc_fib_event_data *fib_work;
1780 struct mlx5e_route_entry *r;
1781 struct mlx5e_route_key key;
1782 struct net_device *fib_dev;
1783
1784 fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1785 fib_dev = fib6_info_nh_dev(fen_info->rt);
1786 if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1787 fen_info->rt->fib6_dst.plen != 128)
1788 return NULL;
1789
1790 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1791 if (!fib_work)
1792 return ERR_PTR(-ENOMEM);
1793
1794 memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1795 sizeof(fen_info->rt->fib6_dst.addr));
1796 key.ip_version = 6;
1797
1798 /* Can't fail after this point because releasing reference to r
1799 * requires obtaining sleeping mutex which we can't do in atomic
1800 * context.
1801 */
1802 r = mlx5e_route_lookup_for_update(encap, &key);
1803 if (!r)
1804 goto out;
1805 fib_work->r = r;
1806 dev_hold(ul_dev);
1807
1808 return fib_work;
1809
1810 out:
1811 kfree(fib_work);
1812 return NULL;
1813 }
1814
mlx5e_tc_tun_fib_event(struct notifier_block * nb,unsigned long event,void * ptr)1815 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1816 {
1817 struct mlx5e_tc_fib_event_data *fib_work;
1818 struct fib_notifier_info *info = ptr;
1819 struct mlx5e_tc_tun_encap *encap;
1820 struct net_device *ul_dev;
1821 struct mlx5e_priv *priv;
1822
1823 encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1824 priv = encap->priv;
1825 ul_dev = priv->netdev;
1826 priv = netdev_priv(ul_dev);
1827
1828 switch (event) {
1829 case FIB_EVENT_ENTRY_REPLACE:
1830 case FIB_EVENT_ENTRY_DEL:
1831 if (info->family == AF_INET)
1832 fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1833 else if (info->family == AF_INET6)
1834 fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1835 else
1836 return NOTIFY_DONE;
1837
1838 if (!IS_ERR_OR_NULL(fib_work)) {
1839 queue_work(priv->wq, &fib_work->work);
1840 } else if (IS_ERR(fib_work)) {
1841 NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1842 mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1843 PTR_ERR(fib_work));
1844 }
1845
1846 break;
1847 default:
1848 return NOTIFY_DONE;
1849 }
1850
1851 return NOTIFY_DONE;
1852 }
1853
mlx5e_tc_tun_init(struct mlx5e_priv * priv)1854 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1855 {
1856 struct mlx5e_tc_tun_encap *encap;
1857 int err;
1858
1859 encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
1860 if (!encap)
1861 return ERR_PTR(-ENOMEM);
1862
1863 encap->priv = priv;
1864 encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1865 spin_lock_init(&encap->route_lock);
1866 hash_init(encap->route_tbl);
1867 err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
1868 NULL, NULL);
1869 if (err) {
1870 kvfree(encap);
1871 return ERR_PTR(err);
1872 }
1873
1874 return encap;
1875 }
1876
mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap * encap)1877 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1878 {
1879 if (!encap)
1880 return;
1881
1882 unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
1883 flush_workqueue(encap->priv->wq); /* flush fib event works */
1884 kvfree(encap);
1885 }
1886