1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
3
4 #include <linux/mlx5/fs.h>
5 #include "en/mapping.h"
6 #include "en/tc/int_port.h"
7 #include "en.h"
8 #include "en_rep.h"
9 #include "en_tc.h"
10
11 struct mlx5e_tc_int_port {
12 enum mlx5e_tc_int_port_type type;
13 int ifindex;
14 u32 match_metadata;
15 u32 mapping;
16 struct list_head list;
17 struct mlx5_flow_handle *rx_rule;
18 refcount_t refcnt;
19 struct rcu_head rcu_head;
20 };
21
22 struct mlx5e_tc_int_port_priv {
23 struct mlx5_core_dev *dev;
24 struct mutex int_ports_lock; /* Protects int ports list */
25 struct list_head int_ports; /* Uses int_ports_lock */
26 u16 num_ports;
27 bool ul_rep_rx_ready; /* Set when uplink is performing teardown */
28 struct mapping_ctx *metadata_mapping; /* Metadata for source port rewrite and matching */
29 };
30
mlx5e_tc_int_port_supported(const struct mlx5_eswitch * esw)31 bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw)
32 {
33 return mlx5_eswitch_vport_match_metadata_enabled(esw) &&
34 MLX5_CAP_GEN(esw->dev, reg_c_preserve);
35 }
36
mlx5e_tc_int_port_get_metadata(struct mlx5e_tc_int_port * int_port)37 u32 mlx5e_tc_int_port_get_metadata(struct mlx5e_tc_int_port *int_port)
38 {
39 return int_port->match_metadata;
40 }
41
mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port * int_port)42 int mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port)
43 {
44 /* For egress forwarding we can have the case
45 * where the packet came from a vport and redirected
46 * to int port or it came from the uplink, going
47 * via internal port and hairpinned back to uplink
48 * so we set the source to any port in this case.
49 */
50 return int_port->type == MLX5E_TC_INT_PORT_EGRESS ?
51 MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT :
52 MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
53 }
54
mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port * int_port)55 u32 mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port)
56 {
57 return int_port->match_metadata << (32 - ESW_SOURCE_PORT_METADATA_BITS);
58 }
59
60 static struct mlx5_flow_handle *
mlx5e_int_port_create_rx_rule(struct mlx5_eswitch * esw,struct mlx5e_tc_int_port * int_port,struct mlx5_flow_destination * dest)61 mlx5e_int_port_create_rx_rule(struct mlx5_eswitch *esw,
62 struct mlx5e_tc_int_port *int_port,
63 struct mlx5_flow_destination *dest)
64
65 {
66 struct mlx5_flow_context *flow_context;
67 struct mlx5_flow_act flow_act = {};
68 struct mlx5_flow_handle *flow_rule;
69 struct mlx5_flow_spec *spec;
70 void *misc;
71
72 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
73 if (!spec)
74 return ERR_PTR(-ENOMEM);
75
76 misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
77 MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
78 mlx5e_tc_int_port_get_metadata_for_match(int_port));
79
80 misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
81 MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
82 mlx5_eswitch_get_vport_metadata_mask());
83
84 spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
85
86 /* Overwrite flow tag with the int port metadata mapping
87 * instead of the chain mapping.
88 */
89 flow_context = &spec->flow_context;
90 flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
91 flow_context->flow_tag = int_port->mapping;
92 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
93 flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
94 &flow_act, dest, 1);
95 if (IS_ERR(flow_rule))
96 mlx5_core_warn(esw->dev, "ft offloads: Failed to add internal vport rx rule err %ld\n",
97 PTR_ERR(flow_rule));
98
99 kvfree(spec);
100
101 return flow_rule;
102 }
103
104 static struct mlx5e_tc_int_port *
mlx5e_int_port_lookup(struct mlx5e_tc_int_port_priv * priv,int ifindex,enum mlx5e_tc_int_port_type type)105 mlx5e_int_port_lookup(struct mlx5e_tc_int_port_priv *priv,
106 int ifindex,
107 enum mlx5e_tc_int_port_type type)
108 {
109 struct mlx5e_tc_int_port *int_port;
110
111 if (!priv->ul_rep_rx_ready)
112 goto not_found;
113
114 list_for_each_entry(int_port, &priv->int_ports, list)
115 if (int_port->ifindex == ifindex && int_port->type == type) {
116 refcount_inc(&int_port->refcnt);
117 return int_port;
118 }
119
120 not_found:
121 return NULL;
122 }
123
mlx5e_int_port_metadata_alloc(struct mlx5e_tc_int_port_priv * priv,int ifindex,enum mlx5e_tc_int_port_type type,u32 * id)124 static int mlx5e_int_port_metadata_alloc(struct mlx5e_tc_int_port_priv *priv,
125 int ifindex, enum mlx5e_tc_int_port_type type,
126 u32 *id)
127 {
128 u32 mapped_key[2] = {type, ifindex};
129 int err;
130
131 err = mapping_add(priv->metadata_mapping, mapped_key, id);
132 if (err)
133 return err;
134
135 /* Fill upper 4 bits of PFNUM with reserved value */
136 *id |= 0xf << ESW_VPORT_BITS;
137
138 return 0;
139 }
140
mlx5e_int_port_metadata_free(struct mlx5e_tc_int_port_priv * priv,u32 id)141 static void mlx5e_int_port_metadata_free(struct mlx5e_tc_int_port_priv *priv,
142 u32 id)
143 {
144 id &= (1 << ESW_VPORT_BITS) - 1;
145 mapping_remove(priv->metadata_mapping, id);
146 }
147
148 /* Must be called with priv->int_ports_lock held */
149 static struct mlx5e_tc_int_port *
mlx5e_int_port_add(struct mlx5e_tc_int_port_priv * priv,int ifindex,enum mlx5e_tc_int_port_type type)150 mlx5e_int_port_add(struct mlx5e_tc_int_port_priv *priv,
151 int ifindex,
152 enum mlx5e_tc_int_port_type type)
153 {
154 struct mlx5_eswitch *esw = priv->dev->priv.eswitch;
155 struct mlx5_mapped_obj mapped_obj = {};
156 struct mlx5e_rep_priv *uplink_rpriv;
157 struct mlx5e_tc_int_port *int_port;
158 struct mlx5_flow_destination dest;
159 struct mapping_ctx *ctx;
160 u32 match_metadata;
161 u32 mapping;
162 int err;
163
164 if (priv->num_ports == MLX5E_TC_MAX_INT_PORT_NUM) {
165 mlx5_core_dbg(priv->dev, "Cannot add a new int port, max supported %d",
166 MLX5E_TC_MAX_INT_PORT_NUM);
167 return ERR_PTR(-ENOSPC);
168 }
169
170 int_port = kzalloc(sizeof(*int_port), GFP_KERNEL);
171 if (!int_port)
172 return ERR_PTR(-ENOMEM);
173
174 err = mlx5e_int_port_metadata_alloc(priv, ifindex, type, &match_metadata);
175 if (err) {
176 mlx5_core_warn(esw->dev, "Cannot add a new internal port, metadata allocation failed for ifindex %d",
177 ifindex);
178 goto err_metadata;
179 }
180
181 /* map metadata to reg_c0 object for miss handling */
182 ctx = esw->offloads.reg_c0_obj_pool;
183 mapped_obj.type = MLX5_MAPPED_OBJ_INT_PORT_METADATA;
184 mapped_obj.int_port_metadata = match_metadata;
185 err = mapping_add(ctx, &mapped_obj, &mapping);
186 if (err)
187 goto err_map;
188
189 int_port->type = type;
190 int_port->ifindex = ifindex;
191 int_port->match_metadata = match_metadata;
192 int_port->mapping = mapping;
193
194 /* Create a match on internal vport metadata in vport table */
195 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
196
197 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
198 dest.ft = uplink_rpriv->root_ft;
199
200 int_port->rx_rule = mlx5e_int_port_create_rx_rule(esw, int_port, &dest);
201 if (IS_ERR(int_port->rx_rule)) {
202 err = PTR_ERR(int_port->rx_rule);
203 mlx5_core_warn(esw->dev, "Can't add internal port rx rule, err %d", err);
204 goto err_rx_rule;
205 }
206
207 refcount_set(&int_port->refcnt, 1);
208 list_add_rcu(&int_port->list, &priv->int_ports);
209 priv->num_ports++;
210
211 return int_port;
212
213 err_rx_rule:
214 mapping_remove(ctx, int_port->mapping);
215
216 err_map:
217 mlx5e_int_port_metadata_free(priv, match_metadata);
218
219 err_metadata:
220 kfree(int_port);
221
222 return ERR_PTR(err);
223 }
224
225 /* Must be called with priv->int_ports_lock held */
226 static void
mlx5e_int_port_remove(struct mlx5e_tc_int_port_priv * priv,struct mlx5e_tc_int_port * int_port)227 mlx5e_int_port_remove(struct mlx5e_tc_int_port_priv *priv,
228 struct mlx5e_tc_int_port *int_port)
229 {
230 struct mlx5_eswitch *esw = priv->dev->priv.eswitch;
231 struct mapping_ctx *ctx;
232
233 ctx = esw->offloads.reg_c0_obj_pool;
234
235 list_del_rcu(&int_port->list);
236
237 /* The following parameters are not used by the
238 * rcu readers of this int_port object so it is
239 * safe to release them.
240 */
241 if (int_port->rx_rule)
242 mlx5_del_flow_rules(int_port->rx_rule);
243 mapping_remove(ctx, int_port->mapping);
244 mlx5e_int_port_metadata_free(priv, int_port->match_metadata);
245 kfree_rcu(int_port);
246 priv->num_ports--;
247 }
248
249 /* Must be called with rcu_read_lock held */
250 static struct mlx5e_tc_int_port *
mlx5e_int_port_get_from_metadata(struct mlx5e_tc_int_port_priv * priv,u32 metadata)251 mlx5e_int_port_get_from_metadata(struct mlx5e_tc_int_port_priv *priv,
252 u32 metadata)
253 {
254 struct mlx5e_tc_int_port *int_port;
255
256 list_for_each_entry_rcu(int_port, &priv->int_ports, list)
257 if (int_port->match_metadata == metadata)
258 return int_port;
259
260 return NULL;
261 }
262
263 struct mlx5e_tc_int_port *
mlx5e_tc_int_port_get(struct mlx5e_tc_int_port_priv * priv,int ifindex,enum mlx5e_tc_int_port_type type)264 mlx5e_tc_int_port_get(struct mlx5e_tc_int_port_priv *priv,
265 int ifindex,
266 enum mlx5e_tc_int_port_type type)
267 {
268 struct mlx5e_tc_int_port *int_port;
269
270 if (!priv)
271 return ERR_PTR(-EOPNOTSUPP);
272
273 mutex_lock(&priv->int_ports_lock);
274
275 /* Reject request if ul rep not ready */
276 if (!priv->ul_rep_rx_ready) {
277 int_port = ERR_PTR(-EOPNOTSUPP);
278 goto done;
279 }
280
281 int_port = mlx5e_int_port_lookup(priv, ifindex, type);
282 if (int_port)
283 goto done;
284
285 /* Alloc and add new int port to list */
286 int_port = mlx5e_int_port_add(priv, ifindex, type);
287
288 done:
289 mutex_unlock(&priv->int_ports_lock);
290
291 return int_port;
292 }
293
294 void
mlx5e_tc_int_port_put(struct mlx5e_tc_int_port_priv * priv,struct mlx5e_tc_int_port * int_port)295 mlx5e_tc_int_port_put(struct mlx5e_tc_int_port_priv *priv,
296 struct mlx5e_tc_int_port *int_port)
297 {
298 if (!refcount_dec_and_mutex_lock(&int_port->refcnt, &priv->int_ports_lock))
299 return;
300
301 mlx5e_int_port_remove(priv, int_port);
302 mutex_unlock(&priv->int_ports_lock);
303 }
304
305 struct mlx5e_tc_int_port_priv *
mlx5e_tc_int_port_init(struct mlx5e_priv * priv)306 mlx5e_tc_int_port_init(struct mlx5e_priv *priv)
307 {
308 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
309 struct mlx5e_tc_int_port_priv *int_port_priv;
310 u64 mapping_id;
311
312 if (!mlx5e_tc_int_port_supported(esw))
313 return NULL;
314
315 int_port_priv = kzalloc(sizeof(*int_port_priv), GFP_KERNEL);
316 if (!int_port_priv)
317 return NULL;
318
319 mapping_id = mlx5_query_nic_system_image_guid(priv->mdev);
320
321 int_port_priv->metadata_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_INT_PORT,
322 sizeof(u32) * 2,
323 (1 << ESW_VPORT_BITS) - 1, true);
324 if (IS_ERR(int_port_priv->metadata_mapping)) {
325 mlx5_core_warn(priv->mdev, "Can't allocate metadata mapping of int port offload, err=%ld\n",
326 PTR_ERR(int_port_priv->metadata_mapping));
327 goto err_mapping;
328 }
329
330 int_port_priv->dev = priv->mdev;
331 mutex_init(&int_port_priv->int_ports_lock);
332 INIT_LIST_HEAD(&int_port_priv->int_ports);
333
334 return int_port_priv;
335
336 err_mapping:
337 kfree(int_port_priv);
338
339 return NULL;
340 }
341
342 void
mlx5e_tc_int_port_cleanup(struct mlx5e_tc_int_port_priv * priv)343 mlx5e_tc_int_port_cleanup(struct mlx5e_tc_int_port_priv *priv)
344 {
345 if (!priv)
346 return;
347
348 mutex_destroy(&priv->int_ports_lock);
349 mapping_destroy(priv->metadata_mapping);
350 kfree(priv);
351 }
352
353 /* Int port rx rules reside in ul rep rx tables.
354 * It is possible the ul rep will go down while there are
355 * still int port rules in its rx table so proper cleanup
356 * is required to free resources.
357 */
mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv * priv)358 void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv)
359 {
360 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
361 struct mlx5_rep_uplink_priv *uplink_priv;
362 struct mlx5e_tc_int_port_priv *ppriv;
363 struct mlx5e_rep_priv *uplink_rpriv;
364
365 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
366 uplink_priv = &uplink_rpriv->uplink_priv;
367
368 ppriv = uplink_priv->int_port_priv;
369
370 if (!ppriv)
371 return;
372
373 mutex_lock(&ppriv->int_ports_lock);
374 ppriv->ul_rep_rx_ready = true;
375 mutex_unlock(&ppriv->int_ports_lock);
376 }
377
mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv * priv)378 void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv)
379 {
380 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
381 struct mlx5_rep_uplink_priv *uplink_priv;
382 struct mlx5e_tc_int_port_priv *ppriv;
383 struct mlx5e_rep_priv *uplink_rpriv;
384 struct mlx5e_tc_int_port *int_port;
385
386 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
387 uplink_priv = &uplink_rpriv->uplink_priv;
388
389 ppriv = uplink_priv->int_port_priv;
390
391 if (!ppriv)
392 return;
393
394 mutex_lock(&ppriv->int_ports_lock);
395
396 ppriv->ul_rep_rx_ready = false;
397
398 list_for_each_entry(int_port, &ppriv->int_ports, list) {
399 if (!IS_ERR_OR_NULL(int_port->rx_rule))
400 mlx5_del_flow_rules(int_port->rx_rule);
401
402 int_port->rx_rule = NULL;
403 }
404
405 mutex_unlock(&ppriv->int_ports_lock);
406 }
407
408 bool
mlx5e_tc_int_port_dev_fwd(struct mlx5e_tc_int_port_priv * priv,struct sk_buff * skb,u32 int_vport_metadata,bool * forward_tx)409 mlx5e_tc_int_port_dev_fwd(struct mlx5e_tc_int_port_priv *priv,
410 struct sk_buff *skb, u32 int_vport_metadata,
411 bool *forward_tx)
412 {
413 enum mlx5e_tc_int_port_type fwd_type;
414 struct mlx5e_tc_int_port *int_port;
415 struct net_device *dev;
416 int ifindex;
417
418 if (!priv)
419 return false;
420
421 rcu_read_lock();
422 int_port = mlx5e_int_port_get_from_metadata(priv, int_vport_metadata);
423 if (!int_port) {
424 rcu_read_unlock();
425 mlx5_core_dbg(priv->dev, "Unable to find int port with metadata 0x%.8x\n",
426 int_vport_metadata);
427 return false;
428 }
429
430 ifindex = int_port->ifindex;
431 fwd_type = int_port->type;
432 rcu_read_unlock();
433
434 dev = dev_get_by_index(&init_net, ifindex);
435 if (!dev) {
436 mlx5_core_dbg(priv->dev,
437 "Couldn't find internal port device with ifindex: %d\n",
438 ifindex);
439 return false;
440 }
441
442 skb->skb_iif = dev->ifindex;
443 skb->dev = dev;
444
445 if (fwd_type == MLX5E_TC_INT_PORT_INGRESS) {
446 skb->pkt_type = PACKET_HOST;
447 skb_set_redirected(skb, true);
448 *forward_tx = false;
449 } else {
450 skb_reset_network_header(skb);
451 skb_push_rcsum(skb, skb->mac_len);
452 skb_set_redirected(skb, false);
453 *forward_tx = true;
454 }
455
456 return true;
457 }
458