1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2 /* Copyright (C) 2015-2019 Netronome Systems, Inc. */
3
4 #include <linux/bpf_trace.h>
5 #include <linux/netdevice.h>
6 #include <linux/overflow.h>
7 #include <linux/sizes.h>
8 #include <linux/bitfield.h>
9
10 #include "../nfp_app.h"
11 #include "../nfp_net.h"
12 #include "../nfp_net_dp.h"
13 #include "../crypto/crypto.h"
14 #include "../crypto/fw.h"
15 #include "nfdk.h"
16
nfp_nfdk_tx_ring_should_wake(struct nfp_net_tx_ring * tx_ring)17 static int nfp_nfdk_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring)
18 {
19 return !nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT * 2);
20 }
21
nfp_nfdk_tx_ring_should_stop(struct nfp_net_tx_ring * tx_ring)22 static int nfp_nfdk_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring)
23 {
24 return nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT);
25 }
26
nfp_nfdk_tx_ring_stop(struct netdev_queue * nd_q,struct nfp_net_tx_ring * tx_ring)27 static void nfp_nfdk_tx_ring_stop(struct netdev_queue *nd_q,
28 struct nfp_net_tx_ring *tx_ring)
29 {
30 netif_tx_stop_queue(nd_q);
31
32 /* We can race with the TX completion out of NAPI so recheck */
33 smp_mb();
34 if (unlikely(nfp_nfdk_tx_ring_should_wake(tx_ring)))
35 netif_tx_start_queue(nd_q);
36 }
37
38 static __le64
nfp_nfdk_tx_tso(struct nfp_net_r_vector * r_vec,struct nfp_nfdk_tx_buf * txbuf,struct sk_buff * skb)39 nfp_nfdk_tx_tso(struct nfp_net_r_vector *r_vec, struct nfp_nfdk_tx_buf *txbuf,
40 struct sk_buff *skb)
41 {
42 u32 segs, hdrlen, l3_offset, l4_offset;
43 struct nfp_nfdk_tx_desc txd;
44 u16 mss;
45
46 if (!skb->encapsulation) {
47 l3_offset = skb_network_offset(skb);
48 l4_offset = skb_transport_offset(skb);
49 hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
50 } else {
51 l3_offset = skb_inner_network_offset(skb);
52 l4_offset = skb_inner_transport_offset(skb);
53 hdrlen = skb_inner_transport_header(skb) - skb->data +
54 inner_tcp_hdrlen(skb);
55 }
56
57 segs = skb_shinfo(skb)->gso_segs;
58 mss = skb_shinfo(skb)->gso_size & NFDK_DESC_TX_MSS_MASK;
59
60 /* Note: TSO of the packet with metadata prepended to skb is not
61 * supported yet, in which case l3/l4_offset and lso_hdrlen need
62 * be correctly handled here.
63 * Concern:
64 * The driver doesn't have md_bytes easily available at this point.
65 * The PCI.IN PD ME won't have md_bytes bytes to add to lso_hdrlen,
66 * so it needs the full length there. The app MEs might prefer
67 * l3_offset and l4_offset relative to the start of packet data,
68 * but could probably cope with it being relative to the CTM buf
69 * data offset.
70 */
71 txd.l3_offset = l3_offset;
72 txd.l4_offset = l4_offset;
73 txd.lso_meta_res = 0;
74 txd.mss = cpu_to_le16(mss);
75 txd.lso_hdrlen = hdrlen;
76 txd.lso_totsegs = segs;
77
78 txbuf->pkt_cnt = segs;
79 txbuf->real_len = skb->len + hdrlen * (txbuf->pkt_cnt - 1);
80
81 u64_stats_update_begin(&r_vec->tx_sync);
82 r_vec->tx_lso++;
83 u64_stats_update_end(&r_vec->tx_sync);
84
85 return txd.raw;
86 }
87
88 static u8
nfp_nfdk_tx_csum(struct nfp_net_dp * dp,struct nfp_net_r_vector * r_vec,unsigned int pkt_cnt,struct sk_buff * skb,u64 flags)89 nfp_nfdk_tx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
90 unsigned int pkt_cnt, struct sk_buff *skb, u64 flags)
91 {
92 struct ipv6hdr *ipv6h;
93 struct iphdr *iph;
94
95 if (!(dp->ctrl & NFP_NET_CFG_CTRL_TXCSUM))
96 return flags;
97
98 if (skb->ip_summed != CHECKSUM_PARTIAL)
99 return flags;
100
101 flags |= NFDK_DESC_TX_L4_CSUM;
102
103 iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
104 ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
105
106 /* L3 checksum offloading flag is not required for ipv6 */
107 if (iph->version == 4) {
108 flags |= NFDK_DESC_TX_L3_CSUM;
109 } else if (ipv6h->version != 6) {
110 nn_dp_warn(dp, "partial checksum but ipv=%x!\n", iph->version);
111 return flags;
112 }
113
114 u64_stats_update_begin(&r_vec->tx_sync);
115 if (!skb->encapsulation) {
116 r_vec->hw_csum_tx += pkt_cnt;
117 } else {
118 flags |= NFDK_DESC_TX_ENCAP;
119 r_vec->hw_csum_tx_inner += pkt_cnt;
120 }
121 u64_stats_update_end(&r_vec->tx_sync);
122
123 return flags;
124 }
125
126 static int
nfp_nfdk_tx_maybe_close_block(struct nfp_net_tx_ring * tx_ring,struct sk_buff * skb)127 nfp_nfdk_tx_maybe_close_block(struct nfp_net_tx_ring *tx_ring,
128 struct sk_buff *skb)
129 {
130 unsigned int n_descs, wr_p, nop_slots;
131 const skb_frag_t *frag, *fend;
132 struct nfp_nfdk_tx_desc *txd;
133 unsigned int nr_frags;
134 unsigned int wr_idx;
135 int err;
136
137 recount_descs:
138 n_descs = nfp_nfdk_headlen_to_segs(skb_headlen(skb));
139 nr_frags = skb_shinfo(skb)->nr_frags;
140 frag = skb_shinfo(skb)->frags;
141 fend = frag + nr_frags;
142 for (; frag < fend; frag++)
143 n_descs += DIV_ROUND_UP(skb_frag_size(frag),
144 NFDK_TX_MAX_DATA_PER_DESC);
145
146 if (unlikely(n_descs > NFDK_TX_DESC_GATHER_MAX)) {
147 if (skb_is_nonlinear(skb)) {
148 err = skb_linearize(skb);
149 if (err)
150 return err;
151 goto recount_descs;
152 }
153 return -EINVAL;
154 }
155
156 /* Under count by 1 (don't count meta) for the round down to work out */
157 n_descs += !!skb_is_gso(skb);
158
159 if (round_down(tx_ring->wr_p, NFDK_TX_DESC_BLOCK_CNT) !=
160 round_down(tx_ring->wr_p + n_descs, NFDK_TX_DESC_BLOCK_CNT))
161 goto close_block;
162
163 if ((u32)tx_ring->data_pending + skb->len > NFDK_TX_MAX_DATA_PER_BLOCK)
164 goto close_block;
165
166 return 0;
167
168 close_block:
169 wr_p = tx_ring->wr_p;
170 nop_slots = D_BLOCK_CPL(wr_p);
171
172 wr_idx = D_IDX(tx_ring, wr_p);
173 tx_ring->ktxbufs[wr_idx].skb = NULL;
174 txd = &tx_ring->ktxds[wr_idx];
175
176 memset(txd, 0, array_size(nop_slots, sizeof(struct nfp_nfdk_tx_desc)));
177
178 tx_ring->data_pending = 0;
179 tx_ring->wr_p += nop_slots;
180 tx_ring->wr_ptr_add += nop_slots;
181
182 return 0;
183 }
184
nfp_nfdk_prep_port_id(struct sk_buff * skb)185 static int nfp_nfdk_prep_port_id(struct sk_buff *skb)
186 {
187 struct metadata_dst *md_dst = skb_metadata_dst(skb);
188 unsigned char *data;
189
190 if (likely(!md_dst))
191 return 0;
192 if (unlikely(md_dst->type != METADATA_HW_PORT_MUX))
193 return 0;
194
195 /* Note: Unsupported case when TSO a skb with metedata prepended.
196 * See the comments in `nfp_nfdk_tx_tso` for details.
197 */
198 if (unlikely(md_dst && skb_is_gso(skb)))
199 return -EOPNOTSUPP;
200
201 if (unlikely(skb_cow_head(skb, sizeof(md_dst->u.port_info.port_id))))
202 return -ENOMEM;
203
204 data = skb_push(skb, sizeof(md_dst->u.port_info.port_id));
205 put_unaligned_be32(md_dst->u.port_info.port_id, data);
206
207 return sizeof(md_dst->u.port_info.port_id);
208 }
209
210 static int
nfp_nfdk_prep_tx_meta(struct nfp_app * app,struct sk_buff * skb,struct nfp_net_r_vector * r_vec)211 nfp_nfdk_prep_tx_meta(struct nfp_app *app, struct sk_buff *skb,
212 struct nfp_net_r_vector *r_vec)
213 {
214 unsigned char *data;
215 int res, md_bytes;
216 u32 meta_id = 0;
217
218 res = nfp_nfdk_prep_port_id(skb);
219 if (unlikely(res <= 0))
220 return res;
221
222 md_bytes = res;
223 meta_id = NFP_NET_META_PORTID;
224
225 if (unlikely(skb_cow_head(skb, sizeof(meta_id))))
226 return -ENOMEM;
227
228 md_bytes += sizeof(meta_id);
229
230 meta_id = FIELD_PREP(NFDK_META_LEN, md_bytes) |
231 FIELD_PREP(NFDK_META_FIELDS, meta_id);
232
233 data = skb_push(skb, sizeof(meta_id));
234 put_unaligned_be32(meta_id, data);
235
236 return NFDK_DESC_TX_CHAIN_META;
237 }
238
239 /**
240 * nfp_nfdk_tx() - Main transmit entry point
241 * @skb: SKB to transmit
242 * @netdev: netdev structure
243 *
244 * Return: NETDEV_TX_OK on success.
245 */
nfp_nfdk_tx(struct sk_buff * skb,struct net_device * netdev)246 netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev)
247 {
248 struct nfp_net *nn = netdev_priv(netdev);
249 struct nfp_nfdk_tx_buf *txbuf, *etxbuf;
250 u32 cnt, tmp_dlen, dlen_type = 0;
251 struct nfp_net_tx_ring *tx_ring;
252 struct nfp_net_r_vector *r_vec;
253 const skb_frag_t *frag, *fend;
254 struct nfp_nfdk_tx_desc *txd;
255 unsigned int real_len, qidx;
256 unsigned int dma_len, type;
257 struct netdev_queue *nd_q;
258 struct nfp_net_dp *dp;
259 int nr_frags, wr_idx;
260 dma_addr_t dma_addr;
261 u64 metadata;
262
263 dp = &nn->dp;
264 qidx = skb_get_queue_mapping(skb);
265 tx_ring = &dp->tx_rings[qidx];
266 r_vec = tx_ring->r_vec;
267 nd_q = netdev_get_tx_queue(dp->netdev, qidx);
268
269 /* Don't bother counting frags, assume the worst */
270 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) {
271 nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n",
272 qidx, tx_ring->wr_p, tx_ring->rd_p);
273 netif_tx_stop_queue(nd_q);
274 nfp_net_tx_xmit_more_flush(tx_ring);
275 u64_stats_update_begin(&r_vec->tx_sync);
276 r_vec->tx_busy++;
277 u64_stats_update_end(&r_vec->tx_sync);
278 return NETDEV_TX_BUSY;
279 }
280
281 metadata = nfp_nfdk_prep_tx_meta(nn->app, skb, r_vec);
282 if (unlikely((int)metadata < 0))
283 goto err_flush;
284
285 if (nfp_nfdk_tx_maybe_close_block(tx_ring, skb))
286 goto err_flush;
287
288 /* nr_frags will change after skb_linearize so we get nr_frags after
289 * nfp_nfdk_tx_maybe_close_block function
290 */
291 nr_frags = skb_shinfo(skb)->nr_frags;
292 /* DMA map all */
293 wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
294 txd = &tx_ring->ktxds[wr_idx];
295 txbuf = &tx_ring->ktxbufs[wr_idx];
296
297 dma_len = skb_headlen(skb);
298 if (skb_is_gso(skb))
299 type = NFDK_DESC_TX_TYPE_TSO;
300 else if (!nr_frags && dma_len < NFDK_TX_MAX_DATA_PER_HEAD)
301 type = NFDK_DESC_TX_TYPE_SIMPLE;
302 else
303 type = NFDK_DESC_TX_TYPE_GATHER;
304
305 dma_addr = dma_map_single(dp->dev, skb->data, dma_len, DMA_TO_DEVICE);
306 if (dma_mapping_error(dp->dev, dma_addr))
307 goto err_warn_dma;
308
309 txbuf->skb = skb;
310 txbuf++;
311
312 txbuf->dma_addr = dma_addr;
313 txbuf++;
314
315 /* FIELD_PREP() implicitly truncates to chunk */
316 dma_len -= 1;
317
318 /* We will do our best to pass as much data as we can in descriptor
319 * and we need to make sure the first descriptor includes whole head
320 * since there is limitation in firmware side. Sometimes the value of
321 * dma_len bitwise and NFDK_DESC_TX_DMA_LEN_HEAD will less than
322 * headlen.
323 */
324 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD,
325 dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ?
326 NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) |
327 FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type);
328
329 txd->dma_len_type = cpu_to_le16(dlen_type);
330 nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr);
331
332 /* starts at bit 0 */
333 BUILD_BUG_ON(!(NFDK_DESC_TX_DMA_LEN_HEAD & 1));
334
335 /* Preserve the original dlen_type, this way below the EOP logic
336 * can use dlen_type.
337 */
338 tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD;
339 dma_len -= tmp_dlen;
340 dma_addr += tmp_dlen + 1;
341 txd++;
342
343 /* The rest of the data (if any) will be in larger dma descritors
344 * and is handled with the fragment loop.
345 */
346 frag = skb_shinfo(skb)->frags;
347 fend = frag + nr_frags;
348
349 while (true) {
350 while (dma_len > 0) {
351 dma_len -= 1;
352 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len);
353
354 txd->dma_len_type = cpu_to_le16(dlen_type);
355 nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr);
356
357 dma_len -= dlen_type;
358 dma_addr += dlen_type + 1;
359 txd++;
360 }
361
362 if (frag >= fend)
363 break;
364
365 dma_len = skb_frag_size(frag);
366 dma_addr = skb_frag_dma_map(dp->dev, frag, 0, dma_len,
367 DMA_TO_DEVICE);
368 if (dma_mapping_error(dp->dev, dma_addr))
369 goto err_unmap;
370
371 txbuf->dma_addr = dma_addr;
372 txbuf++;
373
374 frag++;
375 }
376
377 (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP);
378
379 if (!skb_is_gso(skb)) {
380 real_len = skb->len;
381 /* Metadata desc */
382 metadata = nfp_nfdk_tx_csum(dp, r_vec, 1, skb, metadata);
383 txd->raw = cpu_to_le64(metadata);
384 txd++;
385 } else {
386 /* lso desc should be placed after metadata desc */
387 (txd + 1)->raw = nfp_nfdk_tx_tso(r_vec, txbuf, skb);
388 real_len = txbuf->real_len;
389 /* Metadata desc */
390 metadata = nfp_nfdk_tx_csum(dp, r_vec, txbuf->pkt_cnt, skb, metadata);
391 txd->raw = cpu_to_le64(metadata);
392 txd += 2;
393 txbuf++;
394 }
395
396 cnt = txd - tx_ring->ktxds - wr_idx;
397 if (unlikely(round_down(wr_idx, NFDK_TX_DESC_BLOCK_CNT) !=
398 round_down(wr_idx + cnt - 1, NFDK_TX_DESC_BLOCK_CNT)))
399 goto err_warn_overflow;
400
401 skb_tx_timestamp(skb);
402
403 tx_ring->wr_p += cnt;
404 if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT)
405 tx_ring->data_pending += skb->len;
406 else
407 tx_ring->data_pending = 0;
408
409 if (nfp_nfdk_tx_ring_should_stop(tx_ring))
410 nfp_nfdk_tx_ring_stop(nd_q, tx_ring);
411
412 tx_ring->wr_ptr_add += cnt;
413 if (__netdev_tx_sent_queue(nd_q, real_len, netdev_xmit_more()))
414 nfp_net_tx_xmit_more_flush(tx_ring);
415
416 return NETDEV_TX_OK;
417
418 err_warn_overflow:
419 WARN_ONCE(1, "unable to fit packet into a descriptor wr_idx:%d head:%d frags:%d cnt:%d",
420 wr_idx, skb_headlen(skb), nr_frags, cnt);
421 if (skb_is_gso(skb))
422 txbuf--;
423 err_unmap:
424 /* txbuf pointed to the next-to-use */
425 etxbuf = txbuf;
426 /* first txbuf holds the skb */
427 txbuf = &tx_ring->ktxbufs[wr_idx + 1];
428 if (txbuf < etxbuf) {
429 dma_unmap_single(dp->dev, txbuf->dma_addr,
430 skb_headlen(skb), DMA_TO_DEVICE);
431 txbuf->raw = 0;
432 txbuf++;
433 }
434 frag = skb_shinfo(skb)->frags;
435 while (etxbuf < txbuf) {
436 dma_unmap_page(dp->dev, txbuf->dma_addr,
437 skb_frag_size(frag), DMA_TO_DEVICE);
438 txbuf->raw = 0;
439 frag++;
440 txbuf++;
441 }
442 err_warn_dma:
443 nn_dp_warn(dp, "Failed to map DMA TX buffer\n");
444 err_flush:
445 nfp_net_tx_xmit_more_flush(tx_ring);
446 u64_stats_update_begin(&r_vec->tx_sync);
447 r_vec->tx_errors++;
448 u64_stats_update_end(&r_vec->tx_sync);
449 dev_kfree_skb_any(skb);
450 return NETDEV_TX_OK;
451 }
452
453 /**
454 * nfp_nfdk_tx_complete() - Handled completed TX packets
455 * @tx_ring: TX ring structure
456 * @budget: NAPI budget (only used as bool to determine if in NAPI context)
457 */
nfp_nfdk_tx_complete(struct nfp_net_tx_ring * tx_ring,int budget)458 static void nfp_nfdk_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget)
459 {
460 struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
461 struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
462 u32 done_pkts = 0, done_bytes = 0;
463 struct nfp_nfdk_tx_buf *ktxbufs;
464 struct device *dev = dp->dev;
465 struct netdev_queue *nd_q;
466 u32 rd_p, qcp_rd_p;
467 int todo;
468
469 rd_p = tx_ring->rd_p;
470 if (tx_ring->wr_p == rd_p)
471 return;
472
473 /* Work out how many descriptors have been transmitted */
474 qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp);
475
476 if (qcp_rd_p == tx_ring->qcp_rd_p)
477 return;
478
479 todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p);
480 ktxbufs = tx_ring->ktxbufs;
481
482 while (todo > 0) {
483 const skb_frag_t *frag, *fend;
484 unsigned int size, n_descs = 1;
485 struct nfp_nfdk_tx_buf *txbuf;
486 struct sk_buff *skb;
487
488 txbuf = &ktxbufs[D_IDX(tx_ring, rd_p)];
489 skb = txbuf->skb;
490 txbuf++;
491
492 /* Closed block */
493 if (!skb) {
494 n_descs = D_BLOCK_CPL(rd_p);
495 goto next;
496 }
497
498 /* Unmap head */
499 size = skb_headlen(skb);
500 n_descs += nfp_nfdk_headlen_to_segs(size);
501 dma_unmap_single(dev, txbuf->dma_addr, size, DMA_TO_DEVICE);
502 txbuf++;
503
504 /* Unmap frags */
505 frag = skb_shinfo(skb)->frags;
506 fend = frag + skb_shinfo(skb)->nr_frags;
507 for (; frag < fend; frag++) {
508 size = skb_frag_size(frag);
509 n_descs += DIV_ROUND_UP(size,
510 NFDK_TX_MAX_DATA_PER_DESC);
511 dma_unmap_page(dev, txbuf->dma_addr,
512 skb_frag_size(frag), DMA_TO_DEVICE);
513 txbuf++;
514 }
515
516 if (!skb_is_gso(skb)) {
517 done_bytes += skb->len;
518 done_pkts++;
519 } else {
520 done_bytes += txbuf->real_len;
521 done_pkts += txbuf->pkt_cnt;
522 n_descs++;
523 }
524
525 napi_consume_skb(skb, budget);
526 next:
527 rd_p += n_descs;
528 todo -= n_descs;
529 }
530
531 tx_ring->rd_p = rd_p;
532 tx_ring->qcp_rd_p = qcp_rd_p;
533
534 u64_stats_update_begin(&r_vec->tx_sync);
535 r_vec->tx_bytes += done_bytes;
536 r_vec->tx_pkts += done_pkts;
537 u64_stats_update_end(&r_vec->tx_sync);
538
539 if (!dp->netdev)
540 return;
541
542 nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
543 netdev_tx_completed_queue(nd_q, done_pkts, done_bytes);
544 if (nfp_nfdk_tx_ring_should_wake(tx_ring)) {
545 /* Make sure TX thread will see updated tx_ring->rd_p */
546 smp_mb();
547
548 if (unlikely(netif_tx_queue_stopped(nd_q)))
549 netif_tx_wake_queue(nd_q);
550 }
551
552 WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt,
553 "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
554 tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt);
555 }
556
557 /* Receive processing */
558 static void *
nfp_nfdk_napi_alloc_one(struct nfp_net_dp * dp,dma_addr_t * dma_addr)559 nfp_nfdk_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr)
560 {
561 void *frag;
562
563 if (!dp->xdp_prog) {
564 frag = napi_alloc_frag(dp->fl_bufsz);
565 if (unlikely(!frag))
566 return NULL;
567 } else {
568 struct page *page;
569
570 page = dev_alloc_page();
571 if (unlikely(!page))
572 return NULL;
573 frag = page_address(page);
574 }
575
576 *dma_addr = nfp_net_dma_map_rx(dp, frag);
577 if (dma_mapping_error(dp->dev, *dma_addr)) {
578 nfp_net_free_frag(frag, dp->xdp_prog);
579 nn_dp_warn(dp, "Failed to map DMA RX buffer\n");
580 return NULL;
581 }
582
583 return frag;
584 }
585
586 /**
587 * nfp_nfdk_rx_give_one() - Put mapped skb on the software and hardware rings
588 * @dp: NFP Net data path struct
589 * @rx_ring: RX ring structure
590 * @frag: page fragment buffer
591 * @dma_addr: DMA address of skb mapping
592 */
593 static void
nfp_nfdk_rx_give_one(const struct nfp_net_dp * dp,struct nfp_net_rx_ring * rx_ring,void * frag,dma_addr_t dma_addr)594 nfp_nfdk_rx_give_one(const struct nfp_net_dp *dp,
595 struct nfp_net_rx_ring *rx_ring,
596 void *frag, dma_addr_t dma_addr)
597 {
598 unsigned int wr_idx;
599
600 wr_idx = D_IDX(rx_ring, rx_ring->wr_p);
601
602 nfp_net_dma_sync_dev_rx(dp, dma_addr);
603
604 /* Stash SKB and DMA address away */
605 rx_ring->rxbufs[wr_idx].frag = frag;
606 rx_ring->rxbufs[wr_idx].dma_addr = dma_addr;
607
608 /* Fill freelist descriptor */
609 rx_ring->rxds[wr_idx].fld.reserved = 0;
610 rx_ring->rxds[wr_idx].fld.meta_len_dd = 0;
611 nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld,
612 dma_addr + dp->rx_dma_off);
613
614 rx_ring->wr_p++;
615 if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) {
616 /* Update write pointer of the freelist queue. Make
617 * sure all writes are flushed before telling the hardware.
618 */
619 wmb();
620 nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, NFP_NET_FL_BATCH);
621 }
622 }
623
624 /**
625 * nfp_nfdk_rx_ring_fill_freelist() - Give buffers from the ring to FW
626 * @dp: NFP Net data path struct
627 * @rx_ring: RX ring to fill
628 */
nfp_nfdk_rx_ring_fill_freelist(struct nfp_net_dp * dp,struct nfp_net_rx_ring * rx_ring)629 void nfp_nfdk_rx_ring_fill_freelist(struct nfp_net_dp *dp,
630 struct nfp_net_rx_ring *rx_ring)
631 {
632 unsigned int i;
633
634 for (i = 0; i < rx_ring->cnt - 1; i++)
635 nfp_nfdk_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag,
636 rx_ring->rxbufs[i].dma_addr);
637 }
638
639 /**
640 * nfp_nfdk_rx_csum_has_errors() - group check if rxd has any csum errors
641 * @flags: RX descriptor flags field in CPU byte order
642 */
nfp_nfdk_rx_csum_has_errors(u16 flags)643 static int nfp_nfdk_rx_csum_has_errors(u16 flags)
644 {
645 u16 csum_all_checked, csum_all_ok;
646
647 csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL;
648 csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK;
649
650 return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT);
651 }
652
653 /**
654 * nfp_nfdk_rx_csum() - set SKB checksum field based on RX descriptor flags
655 * @dp: NFP Net data path struct
656 * @r_vec: per-ring structure
657 * @rxd: Pointer to RX descriptor
658 * @meta: Parsed metadata prepend
659 * @skb: Pointer to SKB
660 */
661 static void
nfp_nfdk_rx_csum(struct nfp_net_dp * dp,struct nfp_net_r_vector * r_vec,struct nfp_net_rx_desc * rxd,struct nfp_meta_parsed * meta,struct sk_buff * skb)662 nfp_nfdk_rx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
663 struct nfp_net_rx_desc *rxd, struct nfp_meta_parsed *meta,
664 struct sk_buff *skb)
665 {
666 skb_checksum_none_assert(skb);
667
668 if (!(dp->netdev->features & NETIF_F_RXCSUM))
669 return;
670
671 if (meta->csum_type) {
672 skb->ip_summed = meta->csum_type;
673 skb->csum = meta->csum;
674 u64_stats_update_begin(&r_vec->rx_sync);
675 r_vec->hw_csum_rx_complete++;
676 u64_stats_update_end(&r_vec->rx_sync);
677 return;
678 }
679
680 if (nfp_nfdk_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) {
681 u64_stats_update_begin(&r_vec->rx_sync);
682 r_vec->hw_csum_rx_error++;
683 u64_stats_update_end(&r_vec->rx_sync);
684 return;
685 }
686
687 /* Assume that the firmware will never report inner CSUM_OK unless outer
688 * L4 headers were successfully parsed. FW will always report zero UDP
689 * checksum as CSUM_OK.
690 */
691 if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK ||
692 rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) {
693 __skb_incr_checksum_unnecessary(skb);
694 u64_stats_update_begin(&r_vec->rx_sync);
695 r_vec->hw_csum_rx_ok++;
696 u64_stats_update_end(&r_vec->rx_sync);
697 }
698
699 if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK ||
700 rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) {
701 __skb_incr_checksum_unnecessary(skb);
702 u64_stats_update_begin(&r_vec->rx_sync);
703 r_vec->hw_csum_rx_inner_ok++;
704 u64_stats_update_end(&r_vec->rx_sync);
705 }
706 }
707
708 static void
nfp_nfdk_set_hash(struct net_device * netdev,struct nfp_meta_parsed * meta,unsigned int type,__be32 * hash)709 nfp_nfdk_set_hash(struct net_device *netdev, struct nfp_meta_parsed *meta,
710 unsigned int type, __be32 *hash)
711 {
712 if (!(netdev->features & NETIF_F_RXHASH))
713 return;
714
715 switch (type) {
716 case NFP_NET_RSS_IPV4:
717 case NFP_NET_RSS_IPV6:
718 case NFP_NET_RSS_IPV6_EX:
719 meta->hash_type = PKT_HASH_TYPE_L3;
720 break;
721 default:
722 meta->hash_type = PKT_HASH_TYPE_L4;
723 break;
724 }
725
726 meta->hash = get_unaligned_be32(hash);
727 }
728
729 static bool
nfp_nfdk_parse_meta(struct net_device * netdev,struct nfp_meta_parsed * meta,void * data,void * pkt,unsigned int pkt_len,int meta_len)730 nfp_nfdk_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta,
731 void *data, void *pkt, unsigned int pkt_len, int meta_len)
732 {
733 u32 meta_info;
734
735 meta_info = get_unaligned_be32(data);
736 data += 4;
737
738 while (meta_info) {
739 switch (meta_info & NFP_NET_META_FIELD_MASK) {
740 case NFP_NET_META_HASH:
741 meta_info >>= NFP_NET_META_FIELD_SIZE;
742 nfp_nfdk_set_hash(netdev, meta,
743 meta_info & NFP_NET_META_FIELD_MASK,
744 (__be32 *)data);
745 data += 4;
746 break;
747 case NFP_NET_META_MARK:
748 meta->mark = get_unaligned_be32(data);
749 data += 4;
750 break;
751 case NFP_NET_META_PORTID:
752 meta->portid = get_unaligned_be32(data);
753 data += 4;
754 break;
755 case NFP_NET_META_CSUM:
756 meta->csum_type = CHECKSUM_COMPLETE;
757 meta->csum =
758 (__force __wsum)__get_unaligned_cpu32(data);
759 data += 4;
760 break;
761 case NFP_NET_META_RESYNC_INFO:
762 if (nfp_net_tls_rx_resync_req(netdev, data, pkt,
763 pkt_len))
764 return false;
765 data += sizeof(struct nfp_net_tls_resync_req);
766 break;
767 default:
768 return true;
769 }
770
771 meta_info >>= NFP_NET_META_FIELD_SIZE;
772 }
773
774 return data != pkt;
775 }
776
777 static void
nfp_nfdk_rx_drop(const struct nfp_net_dp * dp,struct nfp_net_r_vector * r_vec,struct nfp_net_rx_ring * rx_ring,struct nfp_net_rx_buf * rxbuf,struct sk_buff * skb)778 nfp_nfdk_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
779 struct nfp_net_rx_ring *rx_ring, struct nfp_net_rx_buf *rxbuf,
780 struct sk_buff *skb)
781 {
782 u64_stats_update_begin(&r_vec->rx_sync);
783 r_vec->rx_drops++;
784 /* If we have both skb and rxbuf the replacement buffer allocation
785 * must have failed, count this as an alloc failure.
786 */
787 if (skb && rxbuf)
788 r_vec->rx_replace_buf_alloc_fail++;
789 u64_stats_update_end(&r_vec->rx_sync);
790
791 /* skb is build based on the frag, free_skb() would free the frag
792 * so to be able to reuse it we need an extra ref.
793 */
794 if (skb && rxbuf && skb->head == rxbuf->frag)
795 page_ref_inc(virt_to_head_page(rxbuf->frag));
796 if (rxbuf)
797 nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, rxbuf->dma_addr);
798 if (skb)
799 dev_kfree_skb_any(skb);
800 }
801
nfp_nfdk_xdp_complete(struct nfp_net_tx_ring * tx_ring)802 static bool nfp_nfdk_xdp_complete(struct nfp_net_tx_ring *tx_ring)
803 {
804 struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
805 struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
806 struct nfp_net_rx_ring *rx_ring;
807 u32 qcp_rd_p, done = 0;
808 bool done_all;
809 int todo;
810
811 /* Work out how many descriptors have been transmitted */
812 qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp);
813 if (qcp_rd_p == tx_ring->qcp_rd_p)
814 return true;
815
816 todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p);
817
818 done_all = todo <= NFP_NET_XDP_MAX_COMPLETE;
819 todo = min(todo, NFP_NET_XDP_MAX_COMPLETE);
820
821 rx_ring = r_vec->rx_ring;
822 while (todo > 0) {
823 int idx = D_IDX(tx_ring, tx_ring->rd_p + done);
824 struct nfp_nfdk_tx_buf *txbuf;
825 unsigned int step = 1;
826
827 txbuf = &tx_ring->ktxbufs[idx];
828 if (!txbuf->raw)
829 goto next;
830
831 if (NFDK_TX_BUF_INFO(txbuf->val) != NFDK_TX_BUF_INFO_SOP) {
832 WARN_ONCE(1, "Unexpected TX buffer in XDP TX ring\n");
833 goto next;
834 }
835
836 /* Two successive txbufs are used to stash virtual and dma
837 * address respectively, recycle and clean them here.
838 */
839 nfp_nfdk_rx_give_one(dp, rx_ring,
840 (void *)NFDK_TX_BUF_PTR(txbuf[0].val),
841 txbuf[1].dma_addr);
842 txbuf[0].raw = 0;
843 txbuf[1].raw = 0;
844 step = 2;
845
846 u64_stats_update_begin(&r_vec->tx_sync);
847 /* Note: tx_bytes not accumulated. */
848 r_vec->tx_pkts++;
849 u64_stats_update_end(&r_vec->tx_sync);
850 next:
851 todo -= step;
852 done += step;
853 }
854
855 tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + done);
856 tx_ring->rd_p += done;
857
858 WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt,
859 "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
860 tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt);
861
862 return done_all;
863 }
864
865 static bool
nfp_nfdk_tx_xdp_buf(struct nfp_net_dp * dp,struct nfp_net_rx_ring * rx_ring,struct nfp_net_tx_ring * tx_ring,struct nfp_net_rx_buf * rxbuf,unsigned int dma_off,unsigned int pkt_len,bool * completed)866 nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
867 struct nfp_net_tx_ring *tx_ring,
868 struct nfp_net_rx_buf *rxbuf, unsigned int dma_off,
869 unsigned int pkt_len, bool *completed)
870 {
871 unsigned int dma_map_sz = dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA;
872 unsigned int dma_len, type, cnt, dlen_type, tmp_dlen;
873 struct nfp_nfdk_tx_buf *txbuf;
874 struct nfp_nfdk_tx_desc *txd;
875 unsigned int n_descs;
876 dma_addr_t dma_addr;
877 int wr_idx;
878
879 /* Reject if xdp_adjust_tail grow packet beyond DMA area */
880 if (pkt_len + dma_off > dma_map_sz)
881 return false;
882
883 /* Make sure there's still at least one block available after
884 * aligning to block boundary, so that the txds used below
885 * won't wrap around the tx_ring.
886 */
887 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) {
888 if (!*completed) {
889 nfp_nfdk_xdp_complete(tx_ring);
890 *completed = true;
891 }
892
893 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) {
894 nfp_nfdk_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf,
895 NULL);
896 return false;
897 }
898 }
899
900 /* Check if cross block boundary */
901 n_descs = nfp_nfdk_headlen_to_segs(pkt_len);
902 if ((round_down(tx_ring->wr_p, NFDK_TX_DESC_BLOCK_CNT) !=
903 round_down(tx_ring->wr_p + n_descs, NFDK_TX_DESC_BLOCK_CNT)) ||
904 ((u32)tx_ring->data_pending + pkt_len >
905 NFDK_TX_MAX_DATA_PER_BLOCK)) {
906 unsigned int nop_slots = D_BLOCK_CPL(tx_ring->wr_p);
907
908 wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
909 txd = &tx_ring->ktxds[wr_idx];
910 memset(txd, 0,
911 array_size(nop_slots, sizeof(struct nfp_nfdk_tx_desc)));
912
913 tx_ring->data_pending = 0;
914 tx_ring->wr_p += nop_slots;
915 tx_ring->wr_ptr_add += nop_slots;
916 }
917
918 wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
919
920 txbuf = &tx_ring->ktxbufs[wr_idx];
921
922 txbuf[0].val = (unsigned long)rxbuf->frag | NFDK_TX_BUF_INFO_SOP;
923 txbuf[1].dma_addr = rxbuf->dma_addr;
924 /* Note: pkt len not stored */
925
926 dma_sync_single_for_device(dp->dev, rxbuf->dma_addr + dma_off,
927 pkt_len, DMA_BIDIRECTIONAL);
928
929 /* Build TX descriptor */
930 txd = &tx_ring->ktxds[wr_idx];
931 dma_len = pkt_len;
932 dma_addr = rxbuf->dma_addr + dma_off;
933
934 if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD)
935 type = NFDK_DESC_TX_TYPE_SIMPLE;
936 else
937 type = NFDK_DESC_TX_TYPE_GATHER;
938
939 /* FIELD_PREP() implicitly truncates to chunk */
940 dma_len -= 1;
941 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD,
942 dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ?
943 NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) |
944 FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type);
945
946 txd->dma_len_type = cpu_to_le16(dlen_type);
947 nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr);
948
949 tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD;
950 dma_len -= tmp_dlen;
951 dma_addr += tmp_dlen + 1;
952 txd++;
953
954 while (dma_len > 0) {
955 dma_len -= 1;
956 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len);
957 txd->dma_len_type = cpu_to_le16(dlen_type);
958 nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr);
959
960 dlen_type &= NFDK_DESC_TX_DMA_LEN;
961 dma_len -= dlen_type;
962 dma_addr += dlen_type + 1;
963 txd++;
964 }
965
966 (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP);
967
968 /* Metadata desc */
969 txd->raw = 0;
970 txd++;
971
972 cnt = txd - tx_ring->ktxds - wr_idx;
973 tx_ring->wr_p += cnt;
974 if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT)
975 tx_ring->data_pending += pkt_len;
976 else
977 tx_ring->data_pending = 0;
978
979 tx_ring->wr_ptr_add += cnt;
980 return true;
981 }
982
983 /**
984 * nfp_nfdk_rx() - receive up to @budget packets on @rx_ring
985 * @rx_ring: RX ring to receive from
986 * @budget: NAPI budget
987 *
988 * Note, this function is separated out from the napi poll function to
989 * more cleanly separate packet receive code from other bookkeeping
990 * functions performed in the napi poll function.
991 *
992 * Return: Number of packets received.
993 */
nfp_nfdk_rx(struct nfp_net_rx_ring * rx_ring,int budget)994 static int nfp_nfdk_rx(struct nfp_net_rx_ring *rx_ring, int budget)
995 {
996 struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
997 struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
998 struct nfp_net_tx_ring *tx_ring;
999 struct bpf_prog *xdp_prog;
1000 bool xdp_tx_cmpl = false;
1001 unsigned int true_bufsz;
1002 struct sk_buff *skb;
1003 int pkts_polled = 0;
1004 struct xdp_buff xdp;
1005 int idx;
1006
1007 xdp_prog = READ_ONCE(dp->xdp_prog);
1008 true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz;
1009 xdp_init_buff(&xdp, PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM,
1010 &rx_ring->xdp_rxq);
1011 tx_ring = r_vec->xdp_ring;
1012
1013 while (pkts_polled < budget) {
1014 unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off;
1015 struct nfp_net_rx_buf *rxbuf;
1016 struct nfp_net_rx_desc *rxd;
1017 struct nfp_meta_parsed meta;
1018 bool redir_egress = false;
1019 struct net_device *netdev;
1020 dma_addr_t new_dma_addr;
1021 u32 meta_len_xdp = 0;
1022 void *new_frag;
1023
1024 idx = D_IDX(rx_ring, rx_ring->rd_p);
1025
1026 rxd = &rx_ring->rxds[idx];
1027 if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD))
1028 break;
1029
1030 /* Memory barrier to ensure that we won't do other reads
1031 * before the DD bit.
1032 */
1033 dma_rmb();
1034
1035 memset(&meta, 0, sizeof(meta));
1036
1037 rx_ring->rd_p++;
1038 pkts_polled++;
1039
1040 rxbuf = &rx_ring->rxbufs[idx];
1041 /* < meta_len >
1042 * <-- [rx_offset] -->
1043 * ---------------------------------------------------------
1044 * | [XX] | metadata | packet | XXXX |
1045 * ---------------------------------------------------------
1046 * <---------------- data_len --------------->
1047 *
1048 * The rx_offset is fixed for all packets, the meta_len can vary
1049 * on a packet by packet basis. If rx_offset is set to zero
1050 * (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the
1051 * buffer and is immediately followed by the packet (no [XX]).
1052 */
1053 meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK;
1054 data_len = le16_to_cpu(rxd->rxd.data_len);
1055 pkt_len = data_len - meta_len;
1056
1057 pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off;
1058 if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
1059 pkt_off += meta_len;
1060 else
1061 pkt_off += dp->rx_offset;
1062 meta_off = pkt_off - meta_len;
1063
1064 /* Stats update */
1065 u64_stats_update_begin(&r_vec->rx_sync);
1066 r_vec->rx_pkts++;
1067 r_vec->rx_bytes += pkt_len;
1068 u64_stats_update_end(&r_vec->rx_sync);
1069
1070 if (unlikely(meta_len > NFP_NET_MAX_PREPEND ||
1071 (dp->rx_offset && meta_len > dp->rx_offset))) {
1072 nn_dp_warn(dp, "oversized RX packet metadata %u\n",
1073 meta_len);
1074 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
1075 continue;
1076 }
1077
1078 nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off,
1079 data_len);
1080
1081 if (meta_len) {
1082 if (unlikely(nfp_nfdk_parse_meta(dp->netdev, &meta,
1083 rxbuf->frag + meta_off,
1084 rxbuf->frag + pkt_off,
1085 pkt_len, meta_len))) {
1086 nn_dp_warn(dp, "invalid RX packet metadata\n");
1087 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf,
1088 NULL);
1089 continue;
1090 }
1091 }
1092
1093 if (xdp_prog && !meta.portid) {
1094 void *orig_data = rxbuf->frag + pkt_off;
1095 unsigned int dma_off;
1096 int act;
1097
1098 xdp_prepare_buff(&xdp,
1099 rxbuf->frag + NFP_NET_RX_BUF_HEADROOM,
1100 pkt_off - NFP_NET_RX_BUF_HEADROOM,
1101 pkt_len, true);
1102
1103 act = bpf_prog_run_xdp(xdp_prog, &xdp);
1104
1105 pkt_len = xdp.data_end - xdp.data;
1106 pkt_off += xdp.data - orig_data;
1107
1108 switch (act) {
1109 case XDP_PASS:
1110 meta_len_xdp = xdp.data - xdp.data_meta;
1111 break;
1112 case XDP_TX:
1113 dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM;
1114 if (unlikely(!nfp_nfdk_tx_xdp_buf(dp, rx_ring,
1115 tx_ring,
1116 rxbuf,
1117 dma_off,
1118 pkt_len,
1119 &xdp_tx_cmpl)))
1120 trace_xdp_exception(dp->netdev,
1121 xdp_prog, act);
1122 continue;
1123 default:
1124 bpf_warn_invalid_xdp_action(dp->netdev, xdp_prog, act);
1125 fallthrough;
1126 case XDP_ABORTED:
1127 trace_xdp_exception(dp->netdev, xdp_prog, act);
1128 fallthrough;
1129 case XDP_DROP:
1130 nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag,
1131 rxbuf->dma_addr);
1132 continue;
1133 }
1134 }
1135
1136 if (likely(!meta.portid)) {
1137 netdev = dp->netdev;
1138 } else if (meta.portid == NFP_META_PORT_ID_CTRL) {
1139 struct nfp_net *nn = netdev_priv(dp->netdev);
1140
1141 nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off,
1142 pkt_len);
1143 nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag,
1144 rxbuf->dma_addr);
1145 continue;
1146 } else {
1147 struct nfp_net *nn;
1148
1149 nn = netdev_priv(dp->netdev);
1150 netdev = nfp_app_dev_get(nn->app, meta.portid,
1151 &redir_egress);
1152 if (unlikely(!netdev)) {
1153 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf,
1154 NULL);
1155 continue;
1156 }
1157
1158 if (nfp_netdev_is_nfp_repr(netdev))
1159 nfp_repr_inc_rx_stats(netdev, pkt_len);
1160 }
1161
1162 skb = build_skb(rxbuf->frag, true_bufsz);
1163 if (unlikely(!skb)) {
1164 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
1165 continue;
1166 }
1167 new_frag = nfp_nfdk_napi_alloc_one(dp, &new_dma_addr);
1168 if (unlikely(!new_frag)) {
1169 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, skb);
1170 continue;
1171 }
1172
1173 nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr);
1174
1175 nfp_nfdk_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
1176
1177 skb_reserve(skb, pkt_off);
1178 skb_put(skb, pkt_len);
1179
1180 skb->mark = meta.mark;
1181 skb_set_hash(skb, meta.hash, meta.hash_type);
1182
1183 skb_record_rx_queue(skb, rx_ring->idx);
1184 skb->protocol = eth_type_trans(skb, netdev);
1185
1186 nfp_nfdk_rx_csum(dp, r_vec, rxd, &meta, skb);
1187
1188 if (rxd->rxd.flags & PCIE_DESC_RX_VLAN)
1189 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
1190 le16_to_cpu(rxd->rxd.vlan));
1191 if (meta_len_xdp)
1192 skb_metadata_set(skb, meta_len_xdp);
1193
1194 if (likely(!redir_egress)) {
1195 napi_gro_receive(&rx_ring->r_vec->napi, skb);
1196 } else {
1197 skb->dev = netdev;
1198 skb_reset_network_header(skb);
1199 __skb_push(skb, ETH_HLEN);
1200 dev_queue_xmit(skb);
1201 }
1202 }
1203
1204 if (xdp_prog) {
1205 if (tx_ring->wr_ptr_add)
1206 nfp_net_tx_xmit_more_flush(tx_ring);
1207 else if (unlikely(tx_ring->wr_p != tx_ring->rd_p) &&
1208 !xdp_tx_cmpl)
1209 if (!nfp_nfdk_xdp_complete(tx_ring))
1210 pkts_polled = budget;
1211 }
1212
1213 return pkts_polled;
1214 }
1215
1216 /**
1217 * nfp_nfdk_poll() - napi poll function
1218 * @napi: NAPI structure
1219 * @budget: NAPI budget
1220 *
1221 * Return: number of packets polled.
1222 */
nfp_nfdk_poll(struct napi_struct * napi,int budget)1223 int nfp_nfdk_poll(struct napi_struct *napi, int budget)
1224 {
1225 struct nfp_net_r_vector *r_vec =
1226 container_of(napi, struct nfp_net_r_vector, napi);
1227 unsigned int pkts_polled = 0;
1228
1229 if (r_vec->tx_ring)
1230 nfp_nfdk_tx_complete(r_vec->tx_ring, budget);
1231 if (r_vec->rx_ring)
1232 pkts_polled = nfp_nfdk_rx(r_vec->rx_ring, budget);
1233
1234 if (pkts_polled < budget)
1235 if (napi_complete_done(napi, pkts_polled))
1236 nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
1237
1238 if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) {
1239 struct dim_sample dim_sample = {};
1240 unsigned int start;
1241 u64 pkts, bytes;
1242
1243 do {
1244 start = u64_stats_fetch_begin(&r_vec->rx_sync);
1245 pkts = r_vec->rx_pkts;
1246 bytes = r_vec->rx_bytes;
1247 } while (u64_stats_fetch_retry(&r_vec->rx_sync, start));
1248
1249 dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
1250 net_dim(&r_vec->rx_dim, dim_sample);
1251 }
1252
1253 if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) {
1254 struct dim_sample dim_sample = {};
1255 unsigned int start;
1256 u64 pkts, bytes;
1257
1258 do {
1259 start = u64_stats_fetch_begin(&r_vec->tx_sync);
1260 pkts = r_vec->tx_pkts;
1261 bytes = r_vec->tx_bytes;
1262 } while (u64_stats_fetch_retry(&r_vec->tx_sync, start));
1263
1264 dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
1265 net_dim(&r_vec->tx_dim, dim_sample);
1266 }
1267
1268 return pkts_polled;
1269 }
1270
1271 /* Control device data path
1272 */
1273
1274 bool
nfp_nfdk_ctrl_tx_one(struct nfp_net * nn,struct nfp_net_r_vector * r_vec,struct sk_buff * skb,bool old)1275 nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
1276 struct sk_buff *skb, bool old)
1277 {
1278 u32 cnt, tmp_dlen, dlen_type = 0;
1279 struct nfp_net_tx_ring *tx_ring;
1280 struct nfp_nfdk_tx_buf *txbuf;
1281 struct nfp_nfdk_tx_desc *txd;
1282 unsigned int dma_len, type;
1283 struct nfp_net_dp *dp;
1284 dma_addr_t dma_addr;
1285 u64 metadata = 0;
1286 int wr_idx;
1287
1288 dp = &r_vec->nfp_net->dp;
1289 tx_ring = r_vec->tx_ring;
1290
1291 if (WARN_ON_ONCE(skb_shinfo(skb)->nr_frags)) {
1292 nn_dp_warn(dp, "Driver's CTRL TX does not implement gather\n");
1293 goto err_free;
1294 }
1295
1296 /* Don't bother counting frags, assume the worst */
1297 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) {
1298 u64_stats_update_begin(&r_vec->tx_sync);
1299 r_vec->tx_busy++;
1300 u64_stats_update_end(&r_vec->tx_sync);
1301 if (!old)
1302 __skb_queue_tail(&r_vec->queue, skb);
1303 else
1304 __skb_queue_head(&r_vec->queue, skb);
1305 return NETDEV_TX_BUSY;
1306 }
1307
1308 if (nfp_app_ctrl_has_meta(nn->app)) {
1309 if (unlikely(skb_headroom(skb) < 8)) {
1310 nn_dp_warn(dp, "CTRL TX on skb without headroom\n");
1311 goto err_free;
1312 }
1313 metadata = NFDK_DESC_TX_CHAIN_META;
1314 put_unaligned_be32(NFP_META_PORT_ID_CTRL, skb_push(skb, 4));
1315 put_unaligned_be32(FIELD_PREP(NFDK_META_LEN, 8) |
1316 FIELD_PREP(NFDK_META_FIELDS,
1317 NFP_NET_META_PORTID),
1318 skb_push(skb, 4));
1319 }
1320
1321 if (nfp_nfdk_tx_maybe_close_block(tx_ring, skb))
1322 goto err_free;
1323
1324 /* DMA map all */
1325 wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
1326 txd = &tx_ring->ktxds[wr_idx];
1327 txbuf = &tx_ring->ktxbufs[wr_idx];
1328
1329 dma_len = skb_headlen(skb);
1330 if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD)
1331 type = NFDK_DESC_TX_TYPE_SIMPLE;
1332 else
1333 type = NFDK_DESC_TX_TYPE_GATHER;
1334
1335 dma_addr = dma_map_single(dp->dev, skb->data, dma_len, DMA_TO_DEVICE);
1336 if (dma_mapping_error(dp->dev, dma_addr))
1337 goto err_warn_dma;
1338
1339 txbuf->skb = skb;
1340 txbuf++;
1341
1342 txbuf->dma_addr = dma_addr;
1343 txbuf++;
1344
1345 dma_len -= 1;
1346 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD,
1347 dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ?
1348 NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) |
1349 FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type);
1350
1351 txd->dma_len_type = cpu_to_le16(dlen_type);
1352 nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr);
1353
1354 tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD;
1355 dma_len -= tmp_dlen;
1356 dma_addr += tmp_dlen + 1;
1357 txd++;
1358
1359 while (dma_len > 0) {
1360 dma_len -= 1;
1361 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len);
1362 txd->dma_len_type = cpu_to_le16(dlen_type);
1363 nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr);
1364
1365 dlen_type &= NFDK_DESC_TX_DMA_LEN;
1366 dma_len -= dlen_type;
1367 dma_addr += dlen_type + 1;
1368 txd++;
1369 }
1370
1371 (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP);
1372
1373 /* Metadata desc */
1374 txd->raw = cpu_to_le64(metadata);
1375 txd++;
1376
1377 cnt = txd - tx_ring->ktxds - wr_idx;
1378 if (unlikely(round_down(wr_idx, NFDK_TX_DESC_BLOCK_CNT) !=
1379 round_down(wr_idx + cnt - 1, NFDK_TX_DESC_BLOCK_CNT)))
1380 goto err_warn_overflow;
1381
1382 tx_ring->wr_p += cnt;
1383 if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT)
1384 tx_ring->data_pending += skb->len;
1385 else
1386 tx_ring->data_pending = 0;
1387
1388 tx_ring->wr_ptr_add += cnt;
1389 nfp_net_tx_xmit_more_flush(tx_ring);
1390
1391 return NETDEV_TX_OK;
1392
1393 err_warn_overflow:
1394 WARN_ONCE(1, "unable to fit packet into a descriptor wr_idx:%d head:%d frags:%d cnt:%d",
1395 wr_idx, skb_headlen(skb), 0, cnt);
1396 txbuf--;
1397 dma_unmap_single(dp->dev, txbuf->dma_addr,
1398 skb_headlen(skb), DMA_TO_DEVICE);
1399 txbuf->raw = 0;
1400 err_warn_dma:
1401 nn_dp_warn(dp, "Failed to map DMA TX buffer\n");
1402 err_free:
1403 u64_stats_update_begin(&r_vec->tx_sync);
1404 r_vec->tx_errors++;
1405 u64_stats_update_end(&r_vec->tx_sync);
1406 dev_kfree_skb_any(skb);
1407 return NETDEV_TX_OK;
1408 }
1409
__nfp_ctrl_tx_queued(struct nfp_net_r_vector * r_vec)1410 static void __nfp_ctrl_tx_queued(struct nfp_net_r_vector *r_vec)
1411 {
1412 struct sk_buff *skb;
1413
1414 while ((skb = __skb_dequeue(&r_vec->queue)))
1415 if (nfp_nfdk_ctrl_tx_one(r_vec->nfp_net, r_vec, skb, true))
1416 return;
1417 }
1418
1419 static bool
nfp_ctrl_meta_ok(struct nfp_net * nn,void * data,unsigned int meta_len)1420 nfp_ctrl_meta_ok(struct nfp_net *nn, void *data, unsigned int meta_len)
1421 {
1422 u32 meta_type, meta_tag;
1423
1424 if (!nfp_app_ctrl_has_meta(nn->app))
1425 return !meta_len;
1426
1427 if (meta_len != 8)
1428 return false;
1429
1430 meta_type = get_unaligned_be32(data);
1431 meta_tag = get_unaligned_be32(data + 4);
1432
1433 return (meta_type == NFP_NET_META_PORTID &&
1434 meta_tag == NFP_META_PORT_ID_CTRL);
1435 }
1436
1437 static bool
nfp_ctrl_rx_one(struct nfp_net * nn,struct nfp_net_dp * dp,struct nfp_net_r_vector * r_vec,struct nfp_net_rx_ring * rx_ring)1438 nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp,
1439 struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring)
1440 {
1441 unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off;
1442 struct nfp_net_rx_buf *rxbuf;
1443 struct nfp_net_rx_desc *rxd;
1444 dma_addr_t new_dma_addr;
1445 struct sk_buff *skb;
1446 void *new_frag;
1447 int idx;
1448
1449 idx = D_IDX(rx_ring, rx_ring->rd_p);
1450
1451 rxd = &rx_ring->rxds[idx];
1452 if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD))
1453 return false;
1454
1455 /* Memory barrier to ensure that we won't do other reads
1456 * before the DD bit.
1457 */
1458 dma_rmb();
1459
1460 rx_ring->rd_p++;
1461
1462 rxbuf = &rx_ring->rxbufs[idx];
1463 meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK;
1464 data_len = le16_to_cpu(rxd->rxd.data_len);
1465 pkt_len = data_len - meta_len;
1466
1467 pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off;
1468 if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
1469 pkt_off += meta_len;
1470 else
1471 pkt_off += dp->rx_offset;
1472 meta_off = pkt_off - meta_len;
1473
1474 /* Stats update */
1475 u64_stats_update_begin(&r_vec->rx_sync);
1476 r_vec->rx_pkts++;
1477 r_vec->rx_bytes += pkt_len;
1478 u64_stats_update_end(&r_vec->rx_sync);
1479
1480 nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, data_len);
1481
1482 if (unlikely(!nfp_ctrl_meta_ok(nn, rxbuf->frag + meta_off, meta_len))) {
1483 nn_dp_warn(dp, "incorrect metadata for ctrl packet (%d)\n",
1484 meta_len);
1485 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
1486 return true;
1487 }
1488
1489 skb = build_skb(rxbuf->frag, dp->fl_bufsz);
1490 if (unlikely(!skb)) {
1491 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
1492 return true;
1493 }
1494 new_frag = nfp_nfdk_napi_alloc_one(dp, &new_dma_addr);
1495 if (unlikely(!new_frag)) {
1496 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, skb);
1497 return true;
1498 }
1499
1500 nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr);
1501
1502 nfp_nfdk_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
1503
1504 skb_reserve(skb, pkt_off);
1505 skb_put(skb, pkt_len);
1506
1507 nfp_app_ctrl_rx(nn->app, skb);
1508
1509 return true;
1510 }
1511
nfp_ctrl_rx(struct nfp_net_r_vector * r_vec)1512 static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec)
1513 {
1514 struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring;
1515 struct nfp_net *nn = r_vec->nfp_net;
1516 struct nfp_net_dp *dp = &nn->dp;
1517 unsigned int budget = 512;
1518
1519 while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--)
1520 continue;
1521
1522 return budget;
1523 }
1524
nfp_nfdk_ctrl_poll(struct tasklet_struct * t)1525 void nfp_nfdk_ctrl_poll(struct tasklet_struct *t)
1526 {
1527 struct nfp_net_r_vector *r_vec = from_tasklet(r_vec, t, tasklet);
1528
1529 spin_lock(&r_vec->lock);
1530 nfp_nfdk_tx_complete(r_vec->tx_ring, 0);
1531 __nfp_ctrl_tx_queued(r_vec);
1532 spin_unlock(&r_vec->lock);
1533
1534 if (nfp_ctrl_rx(r_vec)) {
1535 nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
1536 } else {
1537 tasklet_schedule(&r_vec->tasklet);
1538 nn_dp_warn(&r_vec->nfp_net->dp,
1539 "control message budget exceeded!\n");
1540 }
1541 }
1542