1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
2
3 #include <linux/bpf_trace.h>
4 #include <linux/dma-mapping.h>
5 #include <linux/etherdevice.h>
6 #include <linux/filter.h>
7 #include <linux/irq.h>
8 #include <linux/pci.h>
9 #include <linux/skbuff.h>
10 #include "funeth_txrx.h"
11 #include "funeth.h"
12 #include "fun_queue.h"
13
14 #define CREATE_TRACE_POINTS
15 #include "funeth_trace.h"
16
17 /* Given the device's max supported MTU and pages of at least 4KB a packet can
18 * be scattered into at most 4 buffers.
19 */
20 #define RX_MAX_FRAGS 4
21
22 /* Per packet headroom in non-XDP mode. Present only for 1-frag packets. */
23 #define FUN_RX_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN)
24
25 /* We try to reuse pages for our buffers. To avoid frequent page ref writes we
26 * take EXTRA_PAGE_REFS references at once and then hand them out one per packet
27 * occupying the buffer.
28 */
29 #define EXTRA_PAGE_REFS 1000000
30 #define MIN_PAGE_REFS 1000
31
32 enum {
33 FUN_XDP_FLUSH_REDIR = 1,
34 FUN_XDP_FLUSH_TX = 2,
35 };
36
37 /* See if a page is running low on refs we are holding and if so take more. */
refresh_refs(struct funeth_rxbuf * buf)38 static void refresh_refs(struct funeth_rxbuf *buf)
39 {
40 if (unlikely(buf->pg_refs < MIN_PAGE_REFS)) {
41 buf->pg_refs += EXTRA_PAGE_REFS;
42 page_ref_add(buf->page, EXTRA_PAGE_REFS);
43 }
44 }
45
46 /* Offer a buffer to the Rx buffer cache. The cache will hold the buffer if its
47 * page is worth retaining and there's room for it. Otherwise the page is
48 * unmapped and our references released.
49 */
cache_offer(struct funeth_rxq * q,const struct funeth_rxbuf * buf)50 static void cache_offer(struct funeth_rxq *q, const struct funeth_rxbuf *buf)
51 {
52 struct funeth_rx_cache *c = &q->cache;
53
54 if (c->prod_cnt - c->cons_cnt <= c->mask && buf->node == numa_mem_id()) {
55 c->bufs[c->prod_cnt & c->mask] = *buf;
56 c->prod_cnt++;
57 } else {
58 dma_unmap_page_attrs(q->dma_dev, buf->dma_addr, PAGE_SIZE,
59 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
60 __page_frag_cache_drain(buf->page, buf->pg_refs);
61 }
62 }
63
64 /* Get a page from the Rx buffer cache. We only consider the next available
65 * page and return it if we own all its references.
66 */
cache_get(struct funeth_rxq * q,struct funeth_rxbuf * rb)67 static bool cache_get(struct funeth_rxq *q, struct funeth_rxbuf *rb)
68 {
69 struct funeth_rx_cache *c = &q->cache;
70 struct funeth_rxbuf *buf;
71
72 if (c->prod_cnt == c->cons_cnt)
73 return false; /* empty cache */
74
75 buf = &c->bufs[c->cons_cnt & c->mask];
76 if (page_ref_count(buf->page) == buf->pg_refs) {
77 dma_sync_single_for_device(q->dma_dev, buf->dma_addr,
78 PAGE_SIZE, DMA_FROM_DEVICE);
79 *rb = *buf;
80 buf->page = NULL;
81 refresh_refs(rb);
82 c->cons_cnt++;
83 return true;
84 }
85
86 /* Page can't be reused. If the cache is full drop this page. */
87 if (c->prod_cnt - c->cons_cnt > c->mask) {
88 dma_unmap_page_attrs(q->dma_dev, buf->dma_addr, PAGE_SIZE,
89 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
90 __page_frag_cache_drain(buf->page, buf->pg_refs);
91 buf->page = NULL;
92 c->cons_cnt++;
93 }
94 return false;
95 }
96
97 /* Allocate and DMA-map a page for receive. */
funeth_alloc_page(struct funeth_rxq * q,struct funeth_rxbuf * rb,int node,gfp_t gfp)98 static int funeth_alloc_page(struct funeth_rxq *q, struct funeth_rxbuf *rb,
99 int node, gfp_t gfp)
100 {
101 struct page *p;
102
103 if (cache_get(q, rb))
104 return 0;
105
106 p = __alloc_pages_node(node, gfp | __GFP_NOWARN, 0);
107 if (unlikely(!p))
108 return -ENOMEM;
109
110 rb->dma_addr = dma_map_page(q->dma_dev, p, 0, PAGE_SIZE,
111 DMA_FROM_DEVICE);
112 if (unlikely(dma_mapping_error(q->dma_dev, rb->dma_addr))) {
113 FUN_QSTAT_INC(q, rx_map_err);
114 __free_page(p);
115 return -ENOMEM;
116 }
117
118 FUN_QSTAT_INC(q, rx_page_alloc);
119
120 rb->page = p;
121 rb->pg_refs = 1;
122 refresh_refs(rb);
123 rb->node = page_is_pfmemalloc(p) ? -1 : page_to_nid(p);
124 return 0;
125 }
126
funeth_free_page(struct funeth_rxq * q,struct funeth_rxbuf * rb)127 static void funeth_free_page(struct funeth_rxq *q, struct funeth_rxbuf *rb)
128 {
129 if (rb->page) {
130 dma_unmap_page(q->dma_dev, rb->dma_addr, PAGE_SIZE,
131 DMA_FROM_DEVICE);
132 __page_frag_cache_drain(rb->page, rb->pg_refs);
133 rb->page = NULL;
134 }
135 }
136
137 /* Run the XDP program assigned to an Rx queue.
138 * Return %NULL if the buffer is consumed, or the virtual address of the packet
139 * to turn into an skb.
140 */
fun_run_xdp(struct funeth_rxq * q,skb_frag_t * frags,void * buf_va,int ref_ok,struct funeth_txq * xdp_q)141 static void *fun_run_xdp(struct funeth_rxq *q, skb_frag_t *frags, void *buf_va,
142 int ref_ok, struct funeth_txq *xdp_q)
143 {
144 struct bpf_prog *xdp_prog;
145 struct xdp_frame *xdpf;
146 struct xdp_buff xdp;
147 u32 act;
148
149 /* VA includes the headroom, frag size includes headroom + tailroom */
150 xdp_init_buff(&xdp, ALIGN(skb_frag_size(frags), FUN_EPRQ_PKT_ALIGN),
151 &q->xdp_rxq);
152 xdp_prepare_buff(&xdp, buf_va, FUN_XDP_HEADROOM, skb_frag_size(frags) -
153 (FUN_RX_TAILROOM + FUN_XDP_HEADROOM), false);
154
155 xdp_prog = READ_ONCE(q->xdp_prog);
156 act = bpf_prog_run_xdp(xdp_prog, &xdp);
157
158 switch (act) {
159 case XDP_PASS:
160 /* remove headroom, which may not be FUN_XDP_HEADROOM now */
161 skb_frag_size_set(frags, xdp.data_end - xdp.data);
162 skb_frag_off_add(frags, xdp.data - xdp.data_hard_start);
163 goto pass;
164 case XDP_TX:
165 if (unlikely(!ref_ok))
166 goto pass;
167
168 xdpf = xdp_convert_buff_to_frame(&xdp);
169 if (!xdpf || !fun_xdp_tx(xdp_q, xdpf))
170 goto xdp_error;
171 FUN_QSTAT_INC(q, xdp_tx);
172 q->xdp_flush |= FUN_XDP_FLUSH_TX;
173 break;
174 case XDP_REDIRECT:
175 if (unlikely(!ref_ok))
176 goto pass;
177 if (unlikely(xdp_do_redirect(q->netdev, &xdp, xdp_prog)))
178 goto xdp_error;
179 FUN_QSTAT_INC(q, xdp_redir);
180 q->xdp_flush |= FUN_XDP_FLUSH_REDIR;
181 break;
182 default:
183 bpf_warn_invalid_xdp_action(q->netdev, xdp_prog, act);
184 fallthrough;
185 case XDP_ABORTED:
186 trace_xdp_exception(q->netdev, xdp_prog, act);
187 xdp_error:
188 q->cur_buf->pg_refs++; /* return frags' page reference */
189 FUN_QSTAT_INC(q, xdp_err);
190 break;
191 case XDP_DROP:
192 q->cur_buf->pg_refs++;
193 FUN_QSTAT_INC(q, xdp_drops);
194 break;
195 }
196 return NULL;
197
198 pass:
199 return xdp.data;
200 }
201
202 /* A CQE contains a fixed completion structure along with optional metadata and
203 * even packet data. Given the start address of a CQE return the start of the
204 * contained fixed structure, which lies at the end.
205 */
cqe_to_info(const void * cqe)206 static const void *cqe_to_info(const void *cqe)
207 {
208 return cqe + FUNETH_CQE_INFO_OFFSET;
209 }
210
211 /* The inverse of cqe_to_info(). */
info_to_cqe(const void * cqe_info)212 static const void *info_to_cqe(const void *cqe_info)
213 {
214 return cqe_info - FUNETH_CQE_INFO_OFFSET;
215 }
216
217 /* Return the type of hash provided by the device based on the L3 and L4
218 * protocols it parsed for the packet.
219 */
cqe_to_pkt_hash_type(u16 pkt_parse)220 static enum pkt_hash_types cqe_to_pkt_hash_type(u16 pkt_parse)
221 {
222 static const enum pkt_hash_types htype_map[] = {
223 PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3,
224 PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L4,
225 PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3,
226 PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3
227 };
228 u16 key;
229
230 /* Build the key from the TCP/UDP and IP/IPv6 bits */
231 key = ((pkt_parse >> FUN_ETH_RX_CV_OL4_PROT_S) & 6) |
232 ((pkt_parse >> (FUN_ETH_RX_CV_OL3_PROT_S + 1)) & 1);
233
234 return htype_map[key];
235 }
236
237 /* Each received packet can be scattered across several Rx buffers or can
238 * share a buffer with previously received packets depending on the buffer
239 * and packet sizes and the room available in the most recently used buffer.
240 *
241 * The rules are:
242 * - If the buffer at the head of an RQ has not been used it gets (part of) the
243 * next incoming packet.
244 * - Otherwise, if the packet fully fits in the buffer's remaining space the
245 * packet is written there.
246 * - Otherwise, the packet goes into the next Rx buffer.
247 *
248 * This function returns the Rx buffer for a packet or fragment thereof of the
249 * given length. If it isn't @buf it either recycles or frees that buffer
250 * before advancing the queue to the next buffer.
251 *
252 * If called repeatedly with the remaining length of a packet it will walk
253 * through all the buffers containing the packet.
254 */
255 static struct funeth_rxbuf *
get_buf(struct funeth_rxq * q,struct funeth_rxbuf * buf,unsigned int len)256 get_buf(struct funeth_rxq *q, struct funeth_rxbuf *buf, unsigned int len)
257 {
258 if (q->buf_offset + len <= PAGE_SIZE || !q->buf_offset)
259 return buf; /* @buf holds (part of) the packet */
260
261 /* The packet occupies part of the next buffer. Move there after
262 * replenishing the current buffer slot either with the spare page or
263 * by reusing the slot's existing page. Note that if a spare page isn't
264 * available and the current packet occupies @buf it is a multi-frag
265 * packet that will be dropped leaving @buf available for reuse.
266 */
267 if ((page_ref_count(buf->page) == buf->pg_refs &&
268 buf->node == numa_mem_id()) || !q->spare_buf.page) {
269 dma_sync_single_for_device(q->dma_dev, buf->dma_addr,
270 PAGE_SIZE, DMA_FROM_DEVICE);
271 refresh_refs(buf);
272 } else {
273 cache_offer(q, buf);
274 *buf = q->spare_buf;
275 q->spare_buf.page = NULL;
276 q->rqes[q->rq_cons & q->rq_mask] =
277 FUN_EPRQ_RQBUF_INIT(buf->dma_addr);
278 }
279 q->buf_offset = 0;
280 q->rq_cons++;
281 return &q->bufs[q->rq_cons & q->rq_mask];
282 }
283
284 /* Gather the page fragments making up the first Rx packet on @q. Its total
285 * length @tot_len includes optional head- and tail-rooms.
286 *
287 * Return 0 if the device retains ownership of at least some of the pages.
288 * In this case the caller may only copy the packet.
289 *
290 * A non-zero return value gives the caller permission to use references to the
291 * pages, e.g., attach them to skbs. Additionally, if the value is <0 at least
292 * one of the pages is PF_MEMALLOC.
293 *
294 * Regardless of outcome the caller is granted a reference to each of the pages.
295 */
fun_gather_pkt(struct funeth_rxq * q,unsigned int tot_len,skb_frag_t * frags)296 static int fun_gather_pkt(struct funeth_rxq *q, unsigned int tot_len,
297 skb_frag_t *frags)
298 {
299 struct funeth_rxbuf *buf = q->cur_buf;
300 unsigned int frag_len;
301 int ref_ok = 1;
302
303 for (;;) {
304 buf = get_buf(q, buf, tot_len);
305
306 /* We always keep the RQ full of buffers so before we can give
307 * one of our pages to the stack we require that we can obtain
308 * a replacement page. If we can't the packet will either be
309 * copied or dropped so we can retain ownership of the page and
310 * reuse it.
311 */
312 if (!q->spare_buf.page &&
313 funeth_alloc_page(q, &q->spare_buf, numa_mem_id(),
314 GFP_ATOMIC | __GFP_MEMALLOC))
315 ref_ok = 0;
316
317 frag_len = min_t(unsigned int, tot_len,
318 PAGE_SIZE - q->buf_offset);
319 dma_sync_single_for_cpu(q->dma_dev,
320 buf->dma_addr + q->buf_offset,
321 frag_len, DMA_FROM_DEVICE);
322 buf->pg_refs--;
323 if (ref_ok)
324 ref_ok |= buf->node;
325
326 __skb_frag_set_page(frags, buf->page);
327 skb_frag_off_set(frags, q->buf_offset);
328 skb_frag_size_set(frags++, frag_len);
329
330 tot_len -= frag_len;
331 if (!tot_len)
332 break;
333
334 q->buf_offset = PAGE_SIZE;
335 }
336 q->buf_offset = ALIGN(q->buf_offset + frag_len, FUN_EPRQ_PKT_ALIGN);
337 q->cur_buf = buf;
338 return ref_ok;
339 }
340
rx_hwtstamp_enabled(const struct net_device * dev)341 static bool rx_hwtstamp_enabled(const struct net_device *dev)
342 {
343 const struct funeth_priv *d = netdev_priv(dev);
344
345 return d->hwtstamp_cfg.rx_filter == HWTSTAMP_FILTER_ALL;
346 }
347
348 /* Advance the CQ pointers and phase tag to the next CQE. */
advance_cq(struct funeth_rxq * q)349 static void advance_cq(struct funeth_rxq *q)
350 {
351 if (unlikely(q->cq_head == q->cq_mask)) {
352 q->cq_head = 0;
353 q->phase ^= 1;
354 q->next_cqe_info = cqe_to_info(q->cqes);
355 } else {
356 q->cq_head++;
357 q->next_cqe_info += FUNETH_CQE_SIZE;
358 }
359 prefetch(q->next_cqe_info);
360 }
361
362 /* Process the packet represented by the head CQE of @q. Gather the packet's
363 * fragments, run it through the optional XDP program, and if needed construct
364 * an skb and pass it to the stack.
365 */
fun_handle_cqe_pkt(struct funeth_rxq * q,struct funeth_txq * xdp_q)366 static void fun_handle_cqe_pkt(struct funeth_rxq *q, struct funeth_txq *xdp_q)
367 {
368 const struct fun_eth_cqe *rxreq = info_to_cqe(q->next_cqe_info);
369 unsigned int i, tot_len, pkt_len = be32_to_cpu(rxreq->pkt_len);
370 struct net_device *ndev = q->netdev;
371 skb_frag_t frags[RX_MAX_FRAGS];
372 struct skb_shared_info *si;
373 unsigned int headroom;
374 gro_result_t gro_res;
375 struct sk_buff *skb;
376 int ref_ok;
377 void *va;
378 u16 cv;
379
380 u64_stats_update_begin(&q->syncp);
381 q->stats.rx_pkts++;
382 q->stats.rx_bytes += pkt_len;
383 u64_stats_update_end(&q->syncp);
384
385 advance_cq(q);
386
387 /* account for head- and tail-room, present only for 1-buffer packets */
388 tot_len = pkt_len;
389 headroom = be16_to_cpu(rxreq->headroom);
390 if (likely(headroom))
391 tot_len += FUN_RX_TAILROOM + headroom;
392
393 ref_ok = fun_gather_pkt(q, tot_len, frags);
394 va = skb_frag_address(frags);
395 if (xdp_q && headroom == FUN_XDP_HEADROOM) {
396 va = fun_run_xdp(q, frags, va, ref_ok, xdp_q);
397 if (!va)
398 return;
399 headroom = 0; /* XDP_PASS trims it */
400 }
401 if (unlikely(!ref_ok))
402 goto no_mem;
403
404 if (likely(headroom)) {
405 /* headroom is either FUN_RX_HEADROOM or FUN_XDP_HEADROOM */
406 prefetch(va + headroom);
407 skb = napi_build_skb(va, ALIGN(tot_len, FUN_EPRQ_PKT_ALIGN));
408 if (unlikely(!skb))
409 goto no_mem;
410
411 skb_reserve(skb, headroom);
412 __skb_put(skb, pkt_len);
413 skb->protocol = eth_type_trans(skb, ndev);
414 } else {
415 prefetch(va);
416 skb = napi_get_frags(q->napi);
417 if (unlikely(!skb))
418 goto no_mem;
419
420 if (ref_ok < 0)
421 skb->pfmemalloc = 1;
422
423 si = skb_shinfo(skb);
424 si->nr_frags = rxreq->nsgl;
425 for (i = 0; i < si->nr_frags; i++)
426 si->frags[i] = frags[i];
427
428 skb->len = pkt_len;
429 skb->data_len = pkt_len;
430 skb->truesize += round_up(pkt_len, FUN_EPRQ_PKT_ALIGN);
431 }
432
433 skb_record_rx_queue(skb, q->qidx);
434 cv = be16_to_cpu(rxreq->pkt_cv);
435 if (likely((q->netdev->features & NETIF_F_RXHASH) && rxreq->hash))
436 skb_set_hash(skb, be32_to_cpu(rxreq->hash),
437 cqe_to_pkt_hash_type(cv));
438 if (likely((q->netdev->features & NETIF_F_RXCSUM) && rxreq->csum)) {
439 FUN_QSTAT_INC(q, rx_cso);
440 skb->ip_summed = CHECKSUM_UNNECESSARY;
441 skb->csum_level = be16_to_cpu(rxreq->csum) - 1;
442 }
443 if (unlikely(rx_hwtstamp_enabled(q->netdev)))
444 skb_hwtstamps(skb)->hwtstamp = be64_to_cpu(rxreq->timestamp);
445
446 trace_funeth_rx(q, rxreq->nsgl, pkt_len, skb->hash, cv);
447
448 gro_res = skb->data_len ? napi_gro_frags(q->napi) :
449 napi_gro_receive(q->napi, skb);
450 if (gro_res == GRO_MERGED || gro_res == GRO_MERGED_FREE)
451 FUN_QSTAT_INC(q, gro_merged);
452 else if (gro_res == GRO_HELD)
453 FUN_QSTAT_INC(q, gro_pkts);
454 return;
455
456 no_mem:
457 FUN_QSTAT_INC(q, rx_mem_drops);
458
459 /* Release the references we've been granted for the frag pages.
460 * We return the ref of the last frag and free the rest.
461 */
462 q->cur_buf->pg_refs++;
463 for (i = 0; i < rxreq->nsgl - 1; i++)
464 __free_page(skb_frag_page(frags + i));
465 }
466
467 /* Return 0 if the phase tag of the CQE at the CQ's head matches expectations
468 * indicating the CQE is new.
469 */
cqe_phase_mismatch(const struct fun_cqe_info * ci,u16 phase)470 static u16 cqe_phase_mismatch(const struct fun_cqe_info *ci, u16 phase)
471 {
472 u16 sf_p = be16_to_cpu(ci->sf_p);
473
474 return (sf_p & 1) ^ phase;
475 }
476
477 /* Walk through a CQ identifying and processing fresh CQEs up to the given
478 * budget. Return the remaining budget.
479 */
fun_process_cqes(struct funeth_rxq * q,int budget)480 static int fun_process_cqes(struct funeth_rxq *q, int budget)
481 {
482 struct funeth_priv *fp = netdev_priv(q->netdev);
483 struct funeth_txq **xdpqs, *xdp_q = NULL;
484
485 xdpqs = rcu_dereference_bh(fp->xdpqs);
486 if (xdpqs)
487 xdp_q = xdpqs[smp_processor_id()];
488
489 while (budget && !cqe_phase_mismatch(q->next_cqe_info, q->phase)) {
490 /* access other descriptor fields after the phase check */
491 dma_rmb();
492
493 fun_handle_cqe_pkt(q, xdp_q);
494 budget--;
495 }
496
497 if (unlikely(q->xdp_flush)) {
498 if (q->xdp_flush & FUN_XDP_FLUSH_TX)
499 fun_txq_wr_db(xdp_q);
500 if (q->xdp_flush & FUN_XDP_FLUSH_REDIR)
501 xdp_do_flush();
502 q->xdp_flush = 0;
503 }
504
505 return budget;
506 }
507
508 /* NAPI handler for Rx queues. Calls the CQE processing loop and writes RQ/CQ
509 * doorbells as needed.
510 */
fun_rxq_napi_poll(struct napi_struct * napi,int budget)511 int fun_rxq_napi_poll(struct napi_struct *napi, int budget)
512 {
513 struct fun_irq *irq = container_of(napi, struct fun_irq, napi);
514 struct funeth_rxq *q = irq->rxq;
515 int work_done = budget - fun_process_cqes(q, budget);
516 u32 cq_db_val = q->cq_head;
517
518 if (unlikely(work_done >= budget))
519 FUN_QSTAT_INC(q, rx_budget);
520 else if (napi_complete_done(napi, work_done))
521 cq_db_val |= q->irq_db_val;
522
523 /* check whether to post new Rx buffers */
524 if (q->rq_cons - q->rq_cons_db >= q->rq_db_thres) {
525 u64_stats_update_begin(&q->syncp);
526 q->stats.rx_bufs += q->rq_cons - q->rq_cons_db;
527 u64_stats_update_end(&q->syncp);
528 q->rq_cons_db = q->rq_cons;
529 writel((q->rq_cons - 1) & q->rq_mask, q->rq_db);
530 }
531
532 writel(cq_db_val, q->cq_db);
533 return work_done;
534 }
535
536 /* Free the Rx buffers of an Rx queue. */
fun_rxq_free_bufs(struct funeth_rxq * q)537 static void fun_rxq_free_bufs(struct funeth_rxq *q)
538 {
539 struct funeth_rxbuf *b = q->bufs;
540 unsigned int i;
541
542 for (i = 0; i <= q->rq_mask; i++, b++)
543 funeth_free_page(q, b);
544
545 funeth_free_page(q, &q->spare_buf);
546 q->cur_buf = NULL;
547 }
548
549 /* Initially provision an Rx queue with Rx buffers. */
fun_rxq_alloc_bufs(struct funeth_rxq * q,int node)550 static int fun_rxq_alloc_bufs(struct funeth_rxq *q, int node)
551 {
552 struct funeth_rxbuf *b = q->bufs;
553 unsigned int i;
554
555 for (i = 0; i <= q->rq_mask; i++, b++) {
556 if (funeth_alloc_page(q, b, node, GFP_KERNEL)) {
557 fun_rxq_free_bufs(q);
558 return -ENOMEM;
559 }
560 q->rqes[i] = FUN_EPRQ_RQBUF_INIT(b->dma_addr);
561 }
562 q->cur_buf = q->bufs;
563 return 0;
564 }
565
566 /* Initialize a used-buffer cache of the given depth. */
fun_rxq_init_cache(struct funeth_rx_cache * c,unsigned int depth,int node)567 static int fun_rxq_init_cache(struct funeth_rx_cache *c, unsigned int depth,
568 int node)
569 {
570 c->mask = depth - 1;
571 c->bufs = kvzalloc_node(depth * sizeof(*c->bufs), GFP_KERNEL, node);
572 return c->bufs ? 0 : -ENOMEM;
573 }
574
575 /* Deallocate an Rx queue's used-buffer cache and its contents. */
fun_rxq_free_cache(struct funeth_rxq * q)576 static void fun_rxq_free_cache(struct funeth_rxq *q)
577 {
578 struct funeth_rxbuf *b = q->cache.bufs;
579 unsigned int i;
580
581 for (i = 0; i <= q->cache.mask; i++, b++)
582 funeth_free_page(q, b);
583
584 kvfree(q->cache.bufs);
585 q->cache.bufs = NULL;
586 }
587
fun_rxq_set_bpf(struct funeth_rxq * q,struct bpf_prog * prog)588 int fun_rxq_set_bpf(struct funeth_rxq *q, struct bpf_prog *prog)
589 {
590 struct funeth_priv *fp = netdev_priv(q->netdev);
591 struct fun_admin_epcq_req cmd;
592 u16 headroom;
593 int err;
594
595 headroom = prog ? FUN_XDP_HEADROOM : FUN_RX_HEADROOM;
596 if (headroom != q->headroom) {
597 cmd.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_EPCQ,
598 sizeof(cmd));
599 cmd.u.modify =
600 FUN_ADMIN_EPCQ_MODIFY_REQ_INIT(FUN_ADMIN_SUBOP_MODIFY,
601 0, q->hw_cqid, headroom);
602 err = fun_submit_admin_sync_cmd(fp->fdev, &cmd.common, NULL, 0,
603 0);
604 if (err)
605 return err;
606 q->headroom = headroom;
607 }
608
609 WRITE_ONCE(q->xdp_prog, prog);
610 return 0;
611 }
612
613 /* Create an Rx queue, allocating the host memory it needs. */
fun_rxq_create_sw(struct net_device * dev,unsigned int qidx,unsigned int ncqe,unsigned int nrqe,struct fun_irq * irq)614 static struct funeth_rxq *fun_rxq_create_sw(struct net_device *dev,
615 unsigned int qidx,
616 unsigned int ncqe,
617 unsigned int nrqe,
618 struct fun_irq *irq)
619 {
620 struct funeth_priv *fp = netdev_priv(dev);
621 struct funeth_rxq *q;
622 int err = -ENOMEM;
623 int numa_node;
624
625 numa_node = fun_irq_node(irq);
626 q = kzalloc_node(sizeof(*q), GFP_KERNEL, numa_node);
627 if (!q)
628 goto err;
629
630 q->qidx = qidx;
631 q->netdev = dev;
632 q->cq_mask = ncqe - 1;
633 q->rq_mask = nrqe - 1;
634 q->numa_node = numa_node;
635 q->rq_db_thres = nrqe / 4;
636 u64_stats_init(&q->syncp);
637 q->dma_dev = &fp->pdev->dev;
638
639 q->rqes = fun_alloc_ring_mem(q->dma_dev, nrqe, sizeof(*q->rqes),
640 sizeof(*q->bufs), false, numa_node,
641 &q->rq_dma_addr, (void **)&q->bufs, NULL);
642 if (!q->rqes)
643 goto free_q;
644
645 q->cqes = fun_alloc_ring_mem(q->dma_dev, ncqe, FUNETH_CQE_SIZE, 0,
646 false, numa_node, &q->cq_dma_addr, NULL,
647 NULL);
648 if (!q->cqes)
649 goto free_rqes;
650
651 err = fun_rxq_init_cache(&q->cache, nrqe, numa_node);
652 if (err)
653 goto free_cqes;
654
655 err = fun_rxq_alloc_bufs(q, numa_node);
656 if (err)
657 goto free_cache;
658
659 q->stats.rx_bufs = q->rq_mask;
660 q->init_state = FUN_QSTATE_INIT_SW;
661 return q;
662
663 free_cache:
664 fun_rxq_free_cache(q);
665 free_cqes:
666 dma_free_coherent(q->dma_dev, ncqe * FUNETH_CQE_SIZE, q->cqes,
667 q->cq_dma_addr);
668 free_rqes:
669 fun_free_ring_mem(q->dma_dev, nrqe, sizeof(*q->rqes), false, q->rqes,
670 q->rq_dma_addr, q->bufs);
671 free_q:
672 kfree(q);
673 err:
674 netdev_err(dev, "Unable to allocate memory for Rx queue %u\n", qidx);
675 return ERR_PTR(err);
676 }
677
fun_rxq_free_sw(struct funeth_rxq * q)678 static void fun_rxq_free_sw(struct funeth_rxq *q)
679 {
680 struct funeth_priv *fp = netdev_priv(q->netdev);
681
682 fun_rxq_free_cache(q);
683 fun_rxq_free_bufs(q);
684 fun_free_ring_mem(q->dma_dev, q->rq_mask + 1, sizeof(*q->rqes), false,
685 q->rqes, q->rq_dma_addr, q->bufs);
686 dma_free_coherent(q->dma_dev, (q->cq_mask + 1) * FUNETH_CQE_SIZE,
687 q->cqes, q->cq_dma_addr);
688
689 /* Before freeing the queue transfer key counters to the device. */
690 fp->rx_packets += q->stats.rx_pkts;
691 fp->rx_bytes += q->stats.rx_bytes;
692 fp->rx_dropped += q->stats.rx_map_err + q->stats.rx_mem_drops;
693
694 kfree(q);
695 }
696
697 /* Create an Rx queue's resources on the device. */
fun_rxq_create_dev(struct funeth_rxq * q,struct fun_irq * irq)698 int fun_rxq_create_dev(struct funeth_rxq *q, struct fun_irq *irq)
699 {
700 struct funeth_priv *fp = netdev_priv(q->netdev);
701 unsigned int ncqe = q->cq_mask + 1;
702 unsigned int nrqe = q->rq_mask + 1;
703 int err;
704
705 err = xdp_rxq_info_reg(&q->xdp_rxq, q->netdev, q->qidx,
706 irq->napi.napi_id);
707 if (err)
708 goto out;
709
710 err = xdp_rxq_info_reg_mem_model(&q->xdp_rxq, MEM_TYPE_PAGE_SHARED,
711 NULL);
712 if (err)
713 goto xdp_unreg;
714
715 q->phase = 1;
716 q->irq_cnt = 0;
717 q->cq_head = 0;
718 q->rq_cons = 0;
719 q->rq_cons_db = 0;
720 q->buf_offset = 0;
721 q->napi = &irq->napi;
722 q->irq_db_val = fp->cq_irq_db;
723 q->next_cqe_info = cqe_to_info(q->cqes);
724
725 q->xdp_prog = fp->xdp_prog;
726 q->headroom = fp->xdp_prog ? FUN_XDP_HEADROOM : FUN_RX_HEADROOM;
727
728 err = fun_sq_create(fp->fdev, FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR |
729 FUN_ADMIN_EPSQ_CREATE_FLAG_RQ, 0,
730 FUN_HCI_ID_INVALID, 0, nrqe, q->rq_dma_addr, 0, 0,
731 0, 0, fp->fdev->kern_end_qid, PAGE_SHIFT,
732 &q->hw_sqid, &q->rq_db);
733 if (err)
734 goto xdp_unreg;
735
736 err = fun_cq_create(fp->fdev, FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR |
737 FUN_ADMIN_EPCQ_CREATE_FLAG_RQ, 0,
738 q->hw_sqid, ilog2(FUNETH_CQE_SIZE), ncqe,
739 q->cq_dma_addr, q->headroom, FUN_RX_TAILROOM, 0, 0,
740 irq->irq_idx, 0, fp->fdev->kern_end_qid,
741 &q->hw_cqid, &q->cq_db);
742 if (err)
743 goto free_rq;
744
745 irq->rxq = q;
746 writel(q->rq_mask, q->rq_db);
747 q->init_state = FUN_QSTATE_INIT_FULL;
748
749 netif_info(fp, ifup, q->netdev,
750 "Rx queue %u, depth %u/%u, HW qid %u/%u, IRQ idx %u, node %d, headroom %u\n",
751 q->qidx, ncqe, nrqe, q->hw_cqid, q->hw_sqid, irq->irq_idx,
752 q->numa_node, q->headroom);
753 return 0;
754
755 free_rq:
756 fun_destroy_sq(fp->fdev, q->hw_sqid);
757 xdp_unreg:
758 xdp_rxq_info_unreg(&q->xdp_rxq);
759 out:
760 netdev_err(q->netdev,
761 "Failed to create Rx queue %u on device, error %d\n",
762 q->qidx, err);
763 return err;
764 }
765
fun_rxq_free_dev(struct funeth_rxq * q)766 static void fun_rxq_free_dev(struct funeth_rxq *q)
767 {
768 struct funeth_priv *fp = netdev_priv(q->netdev);
769 struct fun_irq *irq;
770
771 if (q->init_state < FUN_QSTATE_INIT_FULL)
772 return;
773
774 irq = container_of(q->napi, struct fun_irq, napi);
775 netif_info(fp, ifdown, q->netdev,
776 "Freeing Rx queue %u (id %u/%u), IRQ %u\n",
777 q->qidx, q->hw_cqid, q->hw_sqid, irq->irq_idx);
778
779 irq->rxq = NULL;
780 xdp_rxq_info_unreg(&q->xdp_rxq);
781 fun_destroy_sq(fp->fdev, q->hw_sqid);
782 fun_destroy_cq(fp->fdev, q->hw_cqid);
783 q->init_state = FUN_QSTATE_INIT_SW;
784 }
785
786 /* Create or advance an Rx queue, allocating all the host and device resources
787 * needed to reach the target state.
788 */
funeth_rxq_create(struct net_device * dev,unsigned int qidx,unsigned int ncqe,unsigned int nrqe,struct fun_irq * irq,int state,struct funeth_rxq ** qp)789 int funeth_rxq_create(struct net_device *dev, unsigned int qidx,
790 unsigned int ncqe, unsigned int nrqe, struct fun_irq *irq,
791 int state, struct funeth_rxq **qp)
792 {
793 struct funeth_rxq *q = *qp;
794 int err;
795
796 if (!q) {
797 q = fun_rxq_create_sw(dev, qidx, ncqe, nrqe, irq);
798 if (IS_ERR(q))
799 return PTR_ERR(q);
800 }
801
802 if (q->init_state >= state)
803 goto out;
804
805 err = fun_rxq_create_dev(q, irq);
806 if (err) {
807 if (!*qp)
808 fun_rxq_free_sw(q);
809 return err;
810 }
811
812 out:
813 *qp = q;
814 return 0;
815 }
816
817 /* Free Rx queue resources until it reaches the target state. */
funeth_rxq_free(struct funeth_rxq * q,int state)818 struct funeth_rxq *funeth_rxq_free(struct funeth_rxq *q, int state)
819 {
820 if (state < FUN_QSTATE_INIT_FULL)
821 fun_rxq_free_dev(q);
822
823 if (state == FUN_QSTATE_DESTROYED) {
824 fun_rxq_free_sw(q);
825 q = NULL;
826 }
827
828 return q;
829 }
830