1 /*
2 * Copyright (c) 2005-2008 Chelsio, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32 #include <linux/skbuff.h>
33 #include <linux/netdevice.h>
34 #include <linux/etherdevice.h>
35 #include <linux/if_vlan.h>
36 #include <linux/ip.h>
37 #include <linux/tcp.h>
38 #include <linux/dma-mapping.h>
39 #include <linux/slab.h>
40 #include <linux/prefetch.h>
41 #include <net/arp.h>
42 #include "common.h"
43 #include "regs.h"
44 #include "sge_defs.h"
45 #include "t3_cpl.h"
46 #include "firmware_exports.h"
47 #include "cxgb3_offload.h"
48
49 #define USE_GTS 0
50
51 #define SGE_RX_SM_BUF_SIZE 1536
52
53 #define SGE_RX_COPY_THRES 256
54 #define SGE_RX_PULL_LEN 128
55
56 #define SGE_PG_RSVD SMP_CACHE_BYTES
57 /*
58 * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
59 * It must be a divisor of PAGE_SIZE. If set to 0 FL0 will use sk_buffs
60 * directly.
61 */
62 #define FL0_PG_CHUNK_SIZE 2048
63 #define FL0_PG_ORDER 0
64 #define FL0_PG_ALLOC_SIZE (PAGE_SIZE << FL0_PG_ORDER)
65 #define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192)
66 #define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1)
67 #define FL1_PG_ALLOC_SIZE (PAGE_SIZE << FL1_PG_ORDER)
68
69 #define SGE_RX_DROP_THRES 16
70 #define RX_RECLAIM_PERIOD (HZ/4)
71
72 /*
73 * Max number of Rx buffers we replenish at a time.
74 */
75 #define MAX_RX_REFILL 16U
76 /*
77 * Period of the Tx buffer reclaim timer. This timer does not need to run
78 * frequently as Tx buffers are usually reclaimed by new Tx packets.
79 */
80 #define TX_RECLAIM_PERIOD (HZ / 4)
81 #define TX_RECLAIM_TIMER_CHUNK 64U
82 #define TX_RECLAIM_CHUNK 16U
83
84 /* WR size in bytes */
85 #define WR_LEN (WR_FLITS * 8)
86
87 /*
88 * Types of Tx queues in each queue set. Order here matters, do not change.
89 */
90 enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
91
92 /* Values for sge_txq.flags */
93 enum {
94 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
95 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
96 };
97
98 struct tx_desc {
99 __be64 flit[TX_DESC_FLITS];
100 };
101
102 struct rx_desc {
103 __be32 addr_lo;
104 __be32 len_gen;
105 __be32 gen2;
106 __be32 addr_hi;
107 };
108
109 struct tx_sw_desc { /* SW state per Tx descriptor */
110 struct sk_buff *skb;
111 u8 eop; /* set if last descriptor for packet */
112 u8 addr_idx; /* buffer index of first SGL entry in descriptor */
113 u8 fragidx; /* first page fragment associated with descriptor */
114 s8 sflit; /* start flit of first SGL entry in descriptor */
115 };
116
117 struct rx_sw_desc { /* SW state per Rx descriptor */
118 union {
119 struct sk_buff *skb;
120 struct fl_pg_chunk pg_chunk;
121 };
122 DEFINE_DMA_UNMAP_ADDR(dma_addr);
123 };
124
125 struct rsp_desc { /* response queue descriptor */
126 struct rss_header rss_hdr;
127 __be32 flags;
128 __be32 len_cq;
129 struct_group(immediate,
130 u8 imm_data[47];
131 u8 intr_gen;
132 );
133 };
134
135 /*
136 * Holds unmapping information for Tx packets that need deferred unmapping.
137 * This structure lives at skb->head and must be allocated by callers.
138 */
139 struct deferred_unmap_info {
140 struct pci_dev *pdev;
141 dma_addr_t addr[MAX_SKB_FRAGS + 1];
142 };
143
144 /*
145 * Maps a number of flits to the number of Tx descriptors that can hold them.
146 * The formula is
147 *
148 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
149 *
150 * HW allows up to 4 descriptors to be combined into a WR.
151 */
152 static u8 flit_desc_map[] = {
153 0,
154 #if SGE_NUM_GENBITS == 1
155 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
156 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
157 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
158 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
159 #elif SGE_NUM_GENBITS == 2
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
162 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
163 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
164 #else
165 # error "SGE_NUM_GENBITS must be 1 or 2"
166 #endif
167 };
168
fl_to_qset(const struct sge_fl * q,int qidx)169 static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx)
170 {
171 return container_of(q, struct sge_qset, fl[qidx]);
172 }
173
rspq_to_qset(const struct sge_rspq * q)174 static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q)
175 {
176 return container_of(q, struct sge_qset, rspq);
177 }
178
txq_to_qset(const struct sge_txq * q,int qidx)179 static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx)
180 {
181 return container_of(q, struct sge_qset, txq[qidx]);
182 }
183
184 /**
185 * refill_rspq - replenish an SGE response queue
186 * @adapter: the adapter
187 * @q: the response queue to replenish
188 * @credits: how many new responses to make available
189 *
190 * Replenishes a response queue by making the supplied number of responses
191 * available to HW.
192 */
refill_rspq(struct adapter * adapter,const struct sge_rspq * q,unsigned int credits)193 static inline void refill_rspq(struct adapter *adapter,
194 const struct sge_rspq *q, unsigned int credits)
195 {
196 rmb();
197 t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
198 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
199 }
200
201 /**
202 * need_skb_unmap - does the platform need unmapping of sk_buffs?
203 *
204 * Returns true if the platform needs sk_buff unmapping. The compiler
205 * optimizes away unnecessary code if this returns true.
206 */
need_skb_unmap(void)207 static inline int need_skb_unmap(void)
208 {
209 #ifdef CONFIG_NEED_DMA_MAP_STATE
210 return 1;
211 #else
212 return 0;
213 #endif
214 }
215
216 /**
217 * unmap_skb - unmap a packet main body and its page fragments
218 * @skb: the packet
219 * @q: the Tx queue containing Tx descriptors for the packet
220 * @cidx: index of Tx descriptor
221 * @pdev: the PCI device
222 *
223 * Unmap the main body of an sk_buff and its page fragments, if any.
224 * Because of the fairly complicated structure of our SGLs and the desire
225 * to conserve space for metadata, the information necessary to unmap an
226 * sk_buff is spread across the sk_buff itself (buffer lengths), the HW Tx
227 * descriptors (the physical addresses of the various data buffers), and
228 * the SW descriptor state (assorted indices). The send functions
229 * initialize the indices for the first packet descriptor so we can unmap
230 * the buffers held in the first Tx descriptor here, and we have enough
231 * information at this point to set the state for the next Tx descriptor.
232 *
233 * Note that it is possible to clean up the first descriptor of a packet
234 * before the send routines have written the next descriptors, but this
235 * race does not cause any problem. We just end up writing the unmapping
236 * info for the descriptor first.
237 */
unmap_skb(struct sk_buff * skb,struct sge_txq * q,unsigned int cidx,struct pci_dev * pdev)238 static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
239 unsigned int cidx, struct pci_dev *pdev)
240 {
241 const struct sg_ent *sgp;
242 struct tx_sw_desc *d = &q->sdesc[cidx];
243 int nfrags, frag_idx, curflit, j = d->addr_idx;
244
245 sgp = (struct sg_ent *)&q->desc[cidx].flit[d->sflit];
246 frag_idx = d->fragidx;
247
248 if (frag_idx == 0 && skb_headlen(skb)) {
249 dma_unmap_single(&pdev->dev, be64_to_cpu(sgp->addr[0]),
250 skb_headlen(skb), DMA_TO_DEVICE);
251 j = 1;
252 }
253
254 curflit = d->sflit + 1 + j;
255 nfrags = skb_shinfo(skb)->nr_frags;
256
257 while (frag_idx < nfrags && curflit < WR_FLITS) {
258 dma_unmap_page(&pdev->dev, be64_to_cpu(sgp->addr[j]),
259 skb_frag_size(&skb_shinfo(skb)->frags[frag_idx]),
260 DMA_TO_DEVICE);
261 j ^= 1;
262 if (j == 0) {
263 sgp++;
264 curflit++;
265 }
266 curflit++;
267 frag_idx++;
268 }
269
270 if (frag_idx < nfrags) { /* SGL continues into next Tx descriptor */
271 d = cidx + 1 == q->size ? q->sdesc : d + 1;
272 d->fragidx = frag_idx;
273 d->addr_idx = j;
274 d->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */
275 }
276 }
277
278 /**
279 * free_tx_desc - reclaims Tx descriptors and their buffers
280 * @adapter: the adapter
281 * @q: the Tx queue to reclaim descriptors from
282 * @n: the number of descriptors to reclaim
283 *
284 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
285 * Tx buffers. Called with the Tx queue lock held.
286 */
free_tx_desc(struct adapter * adapter,struct sge_txq * q,unsigned int n)287 static void free_tx_desc(struct adapter *adapter, struct sge_txq *q,
288 unsigned int n)
289 {
290 struct tx_sw_desc *d;
291 struct pci_dev *pdev = adapter->pdev;
292 unsigned int cidx = q->cidx;
293
294 const int need_unmap = need_skb_unmap() &&
295 q->cntxt_id >= FW_TUNNEL_SGEEC_START;
296
297 d = &q->sdesc[cidx];
298 while (n--) {
299 if (d->skb) { /* an SGL is present */
300 if (need_unmap)
301 unmap_skb(d->skb, q, cidx, pdev);
302 if (d->eop) {
303 dev_consume_skb_any(d->skb);
304 d->skb = NULL;
305 }
306 }
307 ++d;
308 if (++cidx == q->size) {
309 cidx = 0;
310 d = q->sdesc;
311 }
312 }
313 q->cidx = cidx;
314 }
315
316 /**
317 * reclaim_completed_tx - reclaims completed Tx descriptors
318 * @adapter: the adapter
319 * @q: the Tx queue to reclaim completed descriptors from
320 * @chunk: maximum number of descriptors to reclaim
321 *
322 * Reclaims Tx descriptors that the SGE has indicated it has processed,
323 * and frees the associated buffers if possible. Called with the Tx
324 * queue's lock held.
325 */
reclaim_completed_tx(struct adapter * adapter,struct sge_txq * q,unsigned int chunk)326 static inline unsigned int reclaim_completed_tx(struct adapter *adapter,
327 struct sge_txq *q,
328 unsigned int chunk)
329 {
330 unsigned int reclaim = q->processed - q->cleaned;
331
332 reclaim = min(chunk, reclaim);
333 if (reclaim) {
334 free_tx_desc(adapter, q, reclaim);
335 q->cleaned += reclaim;
336 q->in_use -= reclaim;
337 }
338 return q->processed - q->cleaned;
339 }
340
341 /**
342 * should_restart_tx - are there enough resources to restart a Tx queue?
343 * @q: the Tx queue
344 *
345 * Checks if there are enough descriptors to restart a suspended Tx queue.
346 */
should_restart_tx(const struct sge_txq * q)347 static inline int should_restart_tx(const struct sge_txq *q)
348 {
349 unsigned int r = q->processed - q->cleaned;
350
351 return q->in_use - r < (q->size >> 1);
352 }
353
clear_rx_desc(struct pci_dev * pdev,const struct sge_fl * q,struct rx_sw_desc * d)354 static void clear_rx_desc(struct pci_dev *pdev, const struct sge_fl *q,
355 struct rx_sw_desc *d)
356 {
357 if (q->use_pages && d->pg_chunk.page) {
358 (*d->pg_chunk.p_cnt)--;
359 if (!*d->pg_chunk.p_cnt)
360 dma_unmap_page(&pdev->dev, d->pg_chunk.mapping,
361 q->alloc_size, DMA_FROM_DEVICE);
362
363 put_page(d->pg_chunk.page);
364 d->pg_chunk.page = NULL;
365 } else {
366 dma_unmap_single(&pdev->dev, dma_unmap_addr(d, dma_addr),
367 q->buf_size, DMA_FROM_DEVICE);
368 kfree_skb(d->skb);
369 d->skb = NULL;
370 }
371 }
372
373 /**
374 * free_rx_bufs - free the Rx buffers on an SGE free list
375 * @pdev: the PCI device associated with the adapter
376 * @q: the SGE free list to clean up
377 *
378 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
379 * this queue should be stopped before calling this function.
380 */
free_rx_bufs(struct pci_dev * pdev,struct sge_fl * q)381 static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
382 {
383 unsigned int cidx = q->cidx;
384
385 while (q->credits--) {
386 struct rx_sw_desc *d = &q->sdesc[cidx];
387
388
389 clear_rx_desc(pdev, q, d);
390 if (++cidx == q->size)
391 cidx = 0;
392 }
393
394 if (q->pg_chunk.page) {
395 __free_pages(q->pg_chunk.page, q->order);
396 q->pg_chunk.page = NULL;
397 }
398 }
399
400 /**
401 * add_one_rx_buf - add a packet buffer to a free-buffer list
402 * @va: buffer start VA
403 * @len: the buffer length
404 * @d: the HW Rx descriptor to write
405 * @sd: the SW Rx descriptor to write
406 * @gen: the generation bit value
407 * @pdev: the PCI device associated with the adapter
408 *
409 * Add a buffer of the given length to the supplied HW and SW Rx
410 * descriptors.
411 */
add_one_rx_buf(void * va,unsigned int len,struct rx_desc * d,struct rx_sw_desc * sd,unsigned int gen,struct pci_dev * pdev)412 static inline int add_one_rx_buf(void *va, unsigned int len,
413 struct rx_desc *d, struct rx_sw_desc *sd,
414 unsigned int gen, struct pci_dev *pdev)
415 {
416 dma_addr_t mapping;
417
418 mapping = dma_map_single(&pdev->dev, va, len, DMA_FROM_DEVICE);
419 if (unlikely(dma_mapping_error(&pdev->dev, mapping)))
420 return -ENOMEM;
421
422 dma_unmap_addr_set(sd, dma_addr, mapping);
423
424 d->addr_lo = cpu_to_be32(mapping);
425 d->addr_hi = cpu_to_be32((u64) mapping >> 32);
426 dma_wmb();
427 d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
428 d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
429 return 0;
430 }
431
add_one_rx_chunk(dma_addr_t mapping,struct rx_desc * d,unsigned int gen)432 static inline int add_one_rx_chunk(dma_addr_t mapping, struct rx_desc *d,
433 unsigned int gen)
434 {
435 d->addr_lo = cpu_to_be32(mapping);
436 d->addr_hi = cpu_to_be32((u64) mapping >> 32);
437 dma_wmb();
438 d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
439 d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
440 return 0;
441 }
442
alloc_pg_chunk(struct adapter * adapter,struct sge_fl * q,struct rx_sw_desc * sd,gfp_t gfp,unsigned int order)443 static int alloc_pg_chunk(struct adapter *adapter, struct sge_fl *q,
444 struct rx_sw_desc *sd, gfp_t gfp,
445 unsigned int order)
446 {
447 if (!q->pg_chunk.page) {
448 dma_addr_t mapping;
449
450 q->pg_chunk.page = alloc_pages(gfp, order);
451 if (unlikely(!q->pg_chunk.page))
452 return -ENOMEM;
453 q->pg_chunk.va = page_address(q->pg_chunk.page);
454 q->pg_chunk.p_cnt = q->pg_chunk.va + (PAGE_SIZE << order) -
455 SGE_PG_RSVD;
456 q->pg_chunk.offset = 0;
457 mapping = dma_map_page(&adapter->pdev->dev, q->pg_chunk.page,
458 0, q->alloc_size, DMA_FROM_DEVICE);
459 if (unlikely(dma_mapping_error(&adapter->pdev->dev, mapping))) {
460 __free_pages(q->pg_chunk.page, order);
461 q->pg_chunk.page = NULL;
462 return -EIO;
463 }
464 q->pg_chunk.mapping = mapping;
465 }
466 sd->pg_chunk = q->pg_chunk;
467
468 prefetch(sd->pg_chunk.p_cnt);
469
470 q->pg_chunk.offset += q->buf_size;
471 if (q->pg_chunk.offset == (PAGE_SIZE << order))
472 q->pg_chunk.page = NULL;
473 else {
474 q->pg_chunk.va += q->buf_size;
475 get_page(q->pg_chunk.page);
476 }
477
478 if (sd->pg_chunk.offset == 0)
479 *sd->pg_chunk.p_cnt = 1;
480 else
481 *sd->pg_chunk.p_cnt += 1;
482
483 return 0;
484 }
485
ring_fl_db(struct adapter * adap,struct sge_fl * q)486 static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
487 {
488 if (q->pend_cred >= q->credits / 4) {
489 q->pend_cred = 0;
490 wmb();
491 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
492 }
493 }
494
495 /**
496 * refill_fl - refill an SGE free-buffer list
497 * @adap: the adapter
498 * @q: the free-list to refill
499 * @n: the number of new buffers to allocate
500 * @gfp: the gfp flags for allocating new buffers
501 *
502 * (Re)populate an SGE free-buffer list with up to @n new packet buffers,
503 * allocated with the supplied gfp flags. The caller must assure that
504 * @n does not exceed the queue's capacity.
505 */
refill_fl(struct adapter * adap,struct sge_fl * q,int n,gfp_t gfp)506 static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
507 {
508 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
509 struct rx_desc *d = &q->desc[q->pidx];
510 unsigned int count = 0;
511
512 while (n--) {
513 dma_addr_t mapping;
514 int err;
515
516 if (q->use_pages) {
517 if (unlikely(alloc_pg_chunk(adap, q, sd, gfp,
518 q->order))) {
519 nomem: q->alloc_failed++;
520 break;
521 }
522 mapping = sd->pg_chunk.mapping + sd->pg_chunk.offset;
523 dma_unmap_addr_set(sd, dma_addr, mapping);
524
525 add_one_rx_chunk(mapping, d, q->gen);
526 dma_sync_single_for_device(&adap->pdev->dev, mapping,
527 q->buf_size - SGE_PG_RSVD,
528 DMA_FROM_DEVICE);
529 } else {
530 void *buf_start;
531
532 struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
533 if (!skb)
534 goto nomem;
535
536 sd->skb = skb;
537 buf_start = skb->data;
538 err = add_one_rx_buf(buf_start, q->buf_size, d, sd,
539 q->gen, adap->pdev);
540 if (unlikely(err)) {
541 clear_rx_desc(adap->pdev, q, sd);
542 break;
543 }
544 }
545
546 d++;
547 sd++;
548 if (++q->pidx == q->size) {
549 q->pidx = 0;
550 q->gen ^= 1;
551 sd = q->sdesc;
552 d = q->desc;
553 }
554 count++;
555 }
556
557 q->credits += count;
558 q->pend_cred += count;
559 ring_fl_db(adap, q);
560
561 return count;
562 }
563
__refill_fl(struct adapter * adap,struct sge_fl * fl)564 static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
565 {
566 refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits),
567 GFP_ATOMIC | __GFP_COMP);
568 }
569
570 /**
571 * recycle_rx_buf - recycle a receive buffer
572 * @adap: the adapter
573 * @q: the SGE free list
574 * @idx: index of buffer to recycle
575 *
576 * Recycles the specified buffer on the given free list by adding it at
577 * the next available slot on the list.
578 */
recycle_rx_buf(struct adapter * adap,struct sge_fl * q,unsigned int idx)579 static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
580 unsigned int idx)
581 {
582 struct rx_desc *from = &q->desc[idx];
583 struct rx_desc *to = &q->desc[q->pidx];
584
585 q->sdesc[q->pidx] = q->sdesc[idx];
586 to->addr_lo = from->addr_lo; /* already big endian */
587 to->addr_hi = from->addr_hi; /* likewise */
588 dma_wmb();
589 to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
590 to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
591
592 if (++q->pidx == q->size) {
593 q->pidx = 0;
594 q->gen ^= 1;
595 }
596
597 q->credits++;
598 q->pend_cred++;
599 ring_fl_db(adap, q);
600 }
601
602 /**
603 * alloc_ring - allocate resources for an SGE descriptor ring
604 * @pdev: the PCI device
605 * @nelem: the number of descriptors
606 * @elem_size: the size of each descriptor
607 * @sw_size: the size of the SW state associated with each ring element
608 * @phys: the physical address of the allocated ring
609 * @metadata: address of the array holding the SW state for the ring
610 *
611 * Allocates resources for an SGE descriptor ring, such as Tx queues,
612 * free buffer lists, or response queues. Each SGE ring requires
613 * space for its HW descriptors plus, optionally, space for the SW state
614 * associated with each HW entry (the metadata). The function returns
615 * three values: the virtual address for the HW ring (the return value
616 * of the function), the physical address of the HW ring, and the address
617 * of the SW ring.
618 */
alloc_ring(struct pci_dev * pdev,size_t nelem,size_t elem_size,size_t sw_size,dma_addr_t * phys,void * metadata)619 static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
620 size_t sw_size, dma_addr_t * phys, void *metadata)
621 {
622 size_t len = nelem * elem_size;
623 void *s = NULL;
624 void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
625
626 if (!p)
627 return NULL;
628 if (sw_size && metadata) {
629 s = kcalloc(nelem, sw_size, GFP_KERNEL);
630
631 if (!s) {
632 dma_free_coherent(&pdev->dev, len, p, *phys);
633 return NULL;
634 }
635 *(void **)metadata = s;
636 }
637 return p;
638 }
639
640 /**
641 * t3_reset_qset - reset a sge qset
642 * @q: the queue set
643 *
644 * Reset the qset structure.
645 * the NAPI structure is preserved in the event of
646 * the qset's reincarnation, for example during EEH recovery.
647 */
t3_reset_qset(struct sge_qset * q)648 static void t3_reset_qset(struct sge_qset *q)
649 {
650 if (q->adap &&
651 !(q->adap->flags & NAPI_INIT)) {
652 memset(q, 0, sizeof(*q));
653 return;
654 }
655
656 q->adap = NULL;
657 memset(&q->rspq, 0, sizeof(q->rspq));
658 memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
659 memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
660 q->txq_stopped = 0;
661 q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */
662 q->rx_reclaim_timer.function = NULL;
663 q->nomem = 0;
664 napi_free_frags(&q->napi);
665 }
666
667
668 /**
669 * t3_free_qset - free the resources of an SGE queue set
670 * @adapter: the adapter owning the queue set
671 * @q: the queue set
672 *
673 * Release the HW and SW resources associated with an SGE queue set, such
674 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
675 * queue set must be quiesced prior to calling this.
676 */
t3_free_qset(struct adapter * adapter,struct sge_qset * q)677 static void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
678 {
679 int i;
680 struct pci_dev *pdev = adapter->pdev;
681
682 for (i = 0; i < SGE_RXQ_PER_SET; ++i)
683 if (q->fl[i].desc) {
684 spin_lock_irq(&adapter->sge.reg_lock);
685 t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);
686 spin_unlock_irq(&adapter->sge.reg_lock);
687 free_rx_bufs(pdev, &q->fl[i]);
688 kfree(q->fl[i].sdesc);
689 dma_free_coherent(&pdev->dev,
690 q->fl[i].size *
691 sizeof(struct rx_desc), q->fl[i].desc,
692 q->fl[i].phys_addr);
693 }
694
695 for (i = 0; i < SGE_TXQ_PER_SET; ++i)
696 if (q->txq[i].desc) {
697 spin_lock_irq(&adapter->sge.reg_lock);
698 t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);
699 spin_unlock_irq(&adapter->sge.reg_lock);
700 if (q->txq[i].sdesc) {
701 free_tx_desc(adapter, &q->txq[i],
702 q->txq[i].in_use);
703 kfree(q->txq[i].sdesc);
704 }
705 dma_free_coherent(&pdev->dev,
706 q->txq[i].size *
707 sizeof(struct tx_desc),
708 q->txq[i].desc, q->txq[i].phys_addr);
709 __skb_queue_purge(&q->txq[i].sendq);
710 }
711
712 if (q->rspq.desc) {
713 spin_lock_irq(&adapter->sge.reg_lock);
714 t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);
715 spin_unlock_irq(&adapter->sge.reg_lock);
716 dma_free_coherent(&pdev->dev,
717 q->rspq.size * sizeof(struct rsp_desc),
718 q->rspq.desc, q->rspq.phys_addr);
719 }
720
721 t3_reset_qset(q);
722 }
723
724 /**
725 * init_qset_cntxt - initialize an SGE queue set context info
726 * @qs: the queue set
727 * @id: the queue set id
728 *
729 * Initializes the TIDs and context ids for the queues of a queue set.
730 */
init_qset_cntxt(struct sge_qset * qs,unsigned int id)731 static void init_qset_cntxt(struct sge_qset *qs, unsigned int id)
732 {
733 qs->rspq.cntxt_id = id;
734 qs->fl[0].cntxt_id = 2 * id;
735 qs->fl[1].cntxt_id = 2 * id + 1;
736 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
737 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
738 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
739 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
740 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
741 }
742
743 /**
744 * sgl_len - calculates the size of an SGL of the given capacity
745 * @n: the number of SGL entries
746 *
747 * Calculates the number of flits needed for a scatter/gather list that
748 * can hold the given number of entries.
749 */
sgl_len(unsigned int n)750 static inline unsigned int sgl_len(unsigned int n)
751 {
752 /* alternatively: 3 * (n / 2) + 2 * (n & 1) */
753 return (3 * n) / 2 + (n & 1);
754 }
755
756 /**
757 * flits_to_desc - returns the num of Tx descriptors for the given flits
758 * @n: the number of flits
759 *
760 * Calculates the number of Tx descriptors needed for the supplied number
761 * of flits.
762 */
flits_to_desc(unsigned int n)763 static inline unsigned int flits_to_desc(unsigned int n)
764 {
765 BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
766 return flit_desc_map[n];
767 }
768
769 /**
770 * get_packet - return the next ingress packet buffer from a free list
771 * @adap: the adapter that received the packet
772 * @fl: the SGE free list holding the packet
773 * @len: the packet length including any SGE padding
774 * @drop_thres: # of remaining buffers before we start dropping packets
775 *
776 * Get the next packet from a free list and complete setup of the
777 * sk_buff. If the packet is small we make a copy and recycle the
778 * original buffer, otherwise we use the original buffer itself. If a
779 * positive drop threshold is supplied packets are dropped and their
780 * buffers recycled if (a) the number of remaining buffers is under the
781 * threshold and the packet is too big to copy, or (b) the packet should
782 * be copied but there is no memory for the copy.
783 */
get_packet(struct adapter * adap,struct sge_fl * fl,unsigned int len,unsigned int drop_thres)784 static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
785 unsigned int len, unsigned int drop_thres)
786 {
787 struct sk_buff *skb = NULL;
788 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
789
790 prefetch(sd->skb->data);
791 fl->credits--;
792
793 if (len <= SGE_RX_COPY_THRES) {
794 skb = alloc_skb(len, GFP_ATOMIC);
795 if (likely(skb != NULL)) {
796 __skb_put(skb, len);
797 dma_sync_single_for_cpu(&adap->pdev->dev,
798 dma_unmap_addr(sd, dma_addr),
799 len, DMA_FROM_DEVICE);
800 memcpy(skb->data, sd->skb->data, len);
801 dma_sync_single_for_device(&adap->pdev->dev,
802 dma_unmap_addr(sd, dma_addr),
803 len, DMA_FROM_DEVICE);
804 } else if (!drop_thres)
805 goto use_orig_buf;
806 recycle:
807 recycle_rx_buf(adap, fl, fl->cidx);
808 return skb;
809 }
810
811 if (unlikely(fl->credits < drop_thres) &&
812 refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits - 1),
813 GFP_ATOMIC | __GFP_COMP) == 0)
814 goto recycle;
815
816 use_orig_buf:
817 dma_unmap_single(&adap->pdev->dev, dma_unmap_addr(sd, dma_addr),
818 fl->buf_size, DMA_FROM_DEVICE);
819 skb = sd->skb;
820 skb_put(skb, len);
821 __refill_fl(adap, fl);
822 return skb;
823 }
824
825 /**
826 * get_packet_pg - return the next ingress packet buffer from a free list
827 * @adap: the adapter that received the packet
828 * @fl: the SGE free list holding the packet
829 * @q: the queue
830 * @len: the packet length including any SGE padding
831 * @drop_thres: # of remaining buffers before we start dropping packets
832 *
833 * Get the next packet from a free list populated with page chunks.
834 * If the packet is small we make a copy and recycle the original buffer,
835 * otherwise we attach the original buffer as a page fragment to a fresh
836 * sk_buff. If a positive drop threshold is supplied packets are dropped
837 * and their buffers recycled if (a) the number of remaining buffers is
838 * under the threshold and the packet is too big to copy, or (b) there's
839 * no system memory.
840 *
841 * Note: this function is similar to @get_packet but deals with Rx buffers
842 * that are page chunks rather than sk_buffs.
843 */
get_packet_pg(struct adapter * adap,struct sge_fl * fl,struct sge_rspq * q,unsigned int len,unsigned int drop_thres)844 static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
845 struct sge_rspq *q, unsigned int len,
846 unsigned int drop_thres)
847 {
848 struct sk_buff *newskb, *skb;
849 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
850
851 dma_addr_t dma_addr = dma_unmap_addr(sd, dma_addr);
852
853 newskb = skb = q->pg_skb;
854 if (!skb && (len <= SGE_RX_COPY_THRES)) {
855 newskb = alloc_skb(len, GFP_ATOMIC);
856 if (likely(newskb != NULL)) {
857 __skb_put(newskb, len);
858 dma_sync_single_for_cpu(&adap->pdev->dev, dma_addr,
859 len, DMA_FROM_DEVICE);
860 memcpy(newskb->data, sd->pg_chunk.va, len);
861 dma_sync_single_for_device(&adap->pdev->dev, dma_addr,
862 len, DMA_FROM_DEVICE);
863 } else if (!drop_thres)
864 return NULL;
865 recycle:
866 fl->credits--;
867 recycle_rx_buf(adap, fl, fl->cidx);
868 q->rx_recycle_buf++;
869 return newskb;
870 }
871
872 if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres)))
873 goto recycle;
874
875 prefetch(sd->pg_chunk.p_cnt);
876
877 if (!skb)
878 newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
879
880 if (unlikely(!newskb)) {
881 if (!drop_thres)
882 return NULL;
883 goto recycle;
884 }
885
886 dma_sync_single_for_cpu(&adap->pdev->dev, dma_addr, len,
887 DMA_FROM_DEVICE);
888 (*sd->pg_chunk.p_cnt)--;
889 if (!*sd->pg_chunk.p_cnt && sd->pg_chunk.page != fl->pg_chunk.page)
890 dma_unmap_page(&adap->pdev->dev, sd->pg_chunk.mapping,
891 fl->alloc_size, DMA_FROM_DEVICE);
892 if (!skb) {
893 __skb_put(newskb, SGE_RX_PULL_LEN);
894 memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
895 skb_fill_page_desc(newskb, 0, sd->pg_chunk.page,
896 sd->pg_chunk.offset + SGE_RX_PULL_LEN,
897 len - SGE_RX_PULL_LEN);
898 newskb->len = len;
899 newskb->data_len = len - SGE_RX_PULL_LEN;
900 newskb->truesize += newskb->data_len;
901 } else {
902 skb_fill_page_desc(newskb, skb_shinfo(newskb)->nr_frags,
903 sd->pg_chunk.page,
904 sd->pg_chunk.offset, len);
905 newskb->len += len;
906 newskb->data_len += len;
907 newskb->truesize += len;
908 }
909
910 fl->credits--;
911 /*
912 * We do not refill FLs here, we let the caller do it to overlap a
913 * prefetch.
914 */
915 return newskb;
916 }
917
918 /**
919 * get_imm_packet - return the next ingress packet buffer from a response
920 * @resp: the response descriptor containing the packet data
921 *
922 * Return a packet containing the immediate data of the given response.
923 */
get_imm_packet(const struct rsp_desc * resp)924 static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
925 {
926 struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC);
927
928 if (skb) {
929 __skb_put(skb, IMMED_PKT_SIZE);
930 BUILD_BUG_ON(IMMED_PKT_SIZE != sizeof(resp->immediate));
931 skb_copy_to_linear_data(skb, &resp->immediate, IMMED_PKT_SIZE);
932 }
933 return skb;
934 }
935
936 /**
937 * calc_tx_descs - calculate the number of Tx descriptors for a packet
938 * @skb: the packet
939 *
940 * Returns the number of Tx descriptors needed for the given Ethernet
941 * packet. Ethernet packets require addition of WR and CPL headers.
942 */
calc_tx_descs(const struct sk_buff * skb)943 static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
944 {
945 unsigned int flits;
946
947 if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
948 return 1;
949
950 flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;
951 if (skb_shinfo(skb)->gso_size)
952 flits++;
953 return flits_to_desc(flits);
954 }
955
956 /* map_skb - map a packet main body and its page fragments
957 * @pdev: the PCI device
958 * @skb: the packet
959 * @addr: placeholder to save the mapped addresses
960 *
961 * map the main body of an sk_buff and its page fragments, if any.
962 */
map_skb(struct pci_dev * pdev,const struct sk_buff * skb,dma_addr_t * addr)963 static int map_skb(struct pci_dev *pdev, const struct sk_buff *skb,
964 dma_addr_t *addr)
965 {
966 const skb_frag_t *fp, *end;
967 const struct skb_shared_info *si;
968
969 if (skb_headlen(skb)) {
970 *addr = dma_map_single(&pdev->dev, skb->data,
971 skb_headlen(skb), DMA_TO_DEVICE);
972 if (dma_mapping_error(&pdev->dev, *addr))
973 goto out_err;
974 addr++;
975 }
976
977 si = skb_shinfo(skb);
978 end = &si->frags[si->nr_frags];
979
980 for (fp = si->frags; fp < end; fp++) {
981 *addr = skb_frag_dma_map(&pdev->dev, fp, 0, skb_frag_size(fp),
982 DMA_TO_DEVICE);
983 if (dma_mapping_error(&pdev->dev, *addr))
984 goto unwind;
985 addr++;
986 }
987 return 0;
988
989 unwind:
990 while (fp-- > si->frags)
991 dma_unmap_page(&pdev->dev, *--addr, skb_frag_size(fp),
992 DMA_TO_DEVICE);
993
994 dma_unmap_single(&pdev->dev, addr[-1], skb_headlen(skb),
995 DMA_TO_DEVICE);
996 out_err:
997 return -ENOMEM;
998 }
999
1000 /**
1001 * write_sgl - populate a scatter/gather list for a packet
1002 * @skb: the packet
1003 * @sgp: the SGL to populate
1004 * @start: start address of skb main body data to include in the SGL
1005 * @len: length of skb main body data to include in the SGL
1006 * @addr: the list of the mapped addresses
1007 *
1008 * Copies the scatter/gather list for the buffers that make up a packet
1009 * and returns the SGL size in 8-byte words. The caller must size the SGL
1010 * appropriately.
1011 */
write_sgl(const struct sk_buff * skb,struct sg_ent * sgp,unsigned char * start,unsigned int len,const dma_addr_t * addr)1012 static inline unsigned int write_sgl(const struct sk_buff *skb,
1013 struct sg_ent *sgp, unsigned char *start,
1014 unsigned int len, const dma_addr_t *addr)
1015 {
1016 unsigned int i, j = 0, k = 0, nfrags;
1017
1018 if (len) {
1019 sgp->len[0] = cpu_to_be32(len);
1020 sgp->addr[j++] = cpu_to_be64(addr[k++]);
1021 }
1022
1023 nfrags = skb_shinfo(skb)->nr_frags;
1024 for (i = 0; i < nfrags; i++) {
1025 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1026
1027 sgp->len[j] = cpu_to_be32(skb_frag_size(frag));
1028 sgp->addr[j] = cpu_to_be64(addr[k++]);
1029 j ^= 1;
1030 if (j == 0)
1031 ++sgp;
1032 }
1033 if (j)
1034 sgp->len[j] = 0;
1035 return ((nfrags + (len != 0)) * 3) / 2 + j;
1036 }
1037
1038 /**
1039 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1040 * @adap: the adapter
1041 * @q: the Tx queue
1042 *
1043 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
1044 * where the HW is going to sleep just after we checked, however,
1045 * then the interrupt handler will detect the outstanding TX packet
1046 * and ring the doorbell for us.
1047 *
1048 * When GTS is disabled we unconditionally ring the doorbell.
1049 */
check_ring_tx_db(struct adapter * adap,struct sge_txq * q)1050 static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
1051 {
1052 #if USE_GTS
1053 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1054 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1055 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1056 t3_write_reg(adap, A_SG_KDOORBELL,
1057 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1058 }
1059 #else
1060 wmb(); /* write descriptors before telling HW */
1061 t3_write_reg(adap, A_SG_KDOORBELL,
1062 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1063 #endif
1064 }
1065
wr_gen2(struct tx_desc * d,unsigned int gen)1066 static inline void wr_gen2(struct tx_desc *d, unsigned int gen)
1067 {
1068 #if SGE_NUM_GENBITS == 2
1069 d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);
1070 #endif
1071 }
1072
1073 /**
1074 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
1075 * @ndesc: number of Tx descriptors spanned by the SGL
1076 * @skb: the packet corresponding to the WR
1077 * @d: first Tx descriptor to be written
1078 * @pidx: index of above descriptors
1079 * @q: the SGE Tx queue
1080 * @sgl: the SGL
1081 * @flits: number of flits to the start of the SGL in the first descriptor
1082 * @sgl_flits: the SGL size in flits
1083 * @gen: the Tx descriptor generation
1084 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
1085 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
1086 *
1087 * Write a work request header and an associated SGL. If the SGL is
1088 * small enough to fit into one Tx descriptor it has already been written
1089 * and we just need to write the WR header. Otherwise we distribute the
1090 * SGL across the number of descriptors it spans.
1091 */
write_wr_hdr_sgl(unsigned int ndesc,struct sk_buff * skb,struct tx_desc * d,unsigned int pidx,const struct sge_txq * q,const struct sg_ent * sgl,unsigned int flits,unsigned int sgl_flits,unsigned int gen,__be32 wr_hi,__be32 wr_lo)1092 static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
1093 struct tx_desc *d, unsigned int pidx,
1094 const struct sge_txq *q,
1095 const struct sg_ent *sgl,
1096 unsigned int flits, unsigned int sgl_flits,
1097 unsigned int gen, __be32 wr_hi,
1098 __be32 wr_lo)
1099 {
1100 struct work_request_hdr *wrp = (struct work_request_hdr *)d;
1101 struct tx_sw_desc *sd = &q->sdesc[pidx];
1102
1103 sd->skb = skb;
1104 if (need_skb_unmap()) {
1105 sd->fragidx = 0;
1106 sd->addr_idx = 0;
1107 sd->sflit = flits;
1108 }
1109
1110 if (likely(ndesc == 1)) {
1111 sd->eop = 1;
1112 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1113 V_WR_SGLSFLT(flits)) | wr_hi;
1114 dma_wmb();
1115 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1116 V_WR_GEN(gen)) | wr_lo;
1117 wr_gen2(d, gen);
1118 } else {
1119 unsigned int ogen = gen;
1120 const u64 *fp = (const u64 *)sgl;
1121 struct work_request_hdr *wp = wrp;
1122
1123 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1124 V_WR_SGLSFLT(flits)) | wr_hi;
1125
1126 while (sgl_flits) {
1127 unsigned int avail = WR_FLITS - flits;
1128
1129 if (avail > sgl_flits)
1130 avail = sgl_flits;
1131 memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
1132 sgl_flits -= avail;
1133 ndesc--;
1134 if (!sgl_flits)
1135 break;
1136
1137 fp += avail;
1138 d++;
1139 sd->eop = 0;
1140 sd++;
1141 if (++pidx == q->size) {
1142 pidx = 0;
1143 gen ^= 1;
1144 d = q->desc;
1145 sd = q->sdesc;
1146 }
1147
1148 sd->skb = skb;
1149 wrp = (struct work_request_hdr *)d;
1150 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1151 V_WR_SGLSFLT(1)) | wr_hi;
1152 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1153 sgl_flits + 1)) |
1154 V_WR_GEN(gen)) | wr_lo;
1155 wr_gen2(d, gen);
1156 flits = 1;
1157 }
1158 sd->eop = 1;
1159 wrp->wr_hi |= htonl(F_WR_EOP);
1160 dma_wmb();
1161 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1162 wr_gen2((struct tx_desc *)wp, ogen);
1163 WARN_ON(ndesc != 0);
1164 }
1165 }
1166
1167 /**
1168 * write_tx_pkt_wr - write a TX_PKT work request
1169 * @adap: the adapter
1170 * @skb: the packet to send
1171 * @pi: the egress interface
1172 * @pidx: index of the first Tx descriptor to write
1173 * @gen: the generation value to use
1174 * @q: the Tx queue
1175 * @ndesc: number of descriptors the packet will occupy
1176 * @compl: the value of the COMPL bit to use
1177 * @addr: address
1178 *
1179 * Generate a TX_PKT work request to send the supplied packet.
1180 */
write_tx_pkt_wr(struct adapter * adap,struct sk_buff * skb,const struct port_info * pi,unsigned int pidx,unsigned int gen,struct sge_txq * q,unsigned int ndesc,unsigned int compl,const dma_addr_t * addr)1181 static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
1182 const struct port_info *pi,
1183 unsigned int pidx, unsigned int gen,
1184 struct sge_txq *q, unsigned int ndesc,
1185 unsigned int compl, const dma_addr_t *addr)
1186 {
1187 unsigned int flits, sgl_flits, cntrl, tso_info;
1188 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1189 struct tx_desc *d = &q->desc[pidx];
1190 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d;
1191
1192 cpl->len = htonl(skb->len);
1193 cntrl = V_TXPKT_INTF(pi->port_id);
1194
1195 if (skb_vlan_tag_present(skb))
1196 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(skb_vlan_tag_get(skb));
1197
1198 tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
1199 if (tso_info) {
1200 int eth_type;
1201 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl;
1202
1203 d->flit[2] = 0;
1204 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1205 hdr->cntrl = htonl(cntrl);
1206 eth_type = skb_network_offset(skb) == ETH_HLEN ?
1207 CPL_ETH_II : CPL_ETH_II_VLAN;
1208 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1209 V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
1210 V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
1211 hdr->lso_info = htonl(tso_info);
1212 flits = 3;
1213 } else {
1214 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1215 cntrl |= F_TXPKT_IPCSUM_DIS; /* SW calculates IP csum */
1216 cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
1217 cpl->cntrl = htonl(cntrl);
1218
1219 if (skb->len <= WR_LEN - sizeof(*cpl)) {
1220 q->sdesc[pidx].skb = NULL;
1221 if (!skb->data_len)
1222 skb_copy_from_linear_data(skb, &d->flit[2],
1223 skb->len);
1224 else
1225 skb_copy_bits(skb, 0, &d->flit[2], skb->len);
1226
1227 flits = (skb->len + 7) / 8 + 2;
1228 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |
1229 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
1230 | F_WR_SOP | F_WR_EOP | compl);
1231 dma_wmb();
1232 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |
1233 V_WR_TID(q->token));
1234 wr_gen2(d, gen);
1235 dev_consume_skb_any(skb);
1236 return;
1237 }
1238
1239 flits = 2;
1240 }
1241
1242 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1243 sgl_flits = write_sgl(skb, sgp, skb->data, skb_headlen(skb), addr);
1244
1245 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
1246 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
1247 htonl(V_WR_TID(q->token)));
1248 }
1249
t3_stop_tx_queue(struct netdev_queue * txq,struct sge_qset * qs,struct sge_txq * q)1250 static inline void t3_stop_tx_queue(struct netdev_queue *txq,
1251 struct sge_qset *qs, struct sge_txq *q)
1252 {
1253 netif_tx_stop_queue(txq);
1254 set_bit(TXQ_ETH, &qs->txq_stopped);
1255 q->stops++;
1256 }
1257
1258 /**
1259 * t3_eth_xmit - add a packet to the Ethernet Tx queue
1260 * @skb: the packet
1261 * @dev: the egress net device
1262 *
1263 * Add a packet to an SGE Tx queue. Runs with softirqs disabled.
1264 */
t3_eth_xmit(struct sk_buff * skb,struct net_device * dev)1265 netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1266 {
1267 int qidx;
1268 unsigned int ndesc, pidx, credits, gen, compl;
1269 const struct port_info *pi = netdev_priv(dev);
1270 struct adapter *adap = pi->adapter;
1271 struct netdev_queue *txq;
1272 struct sge_qset *qs;
1273 struct sge_txq *q;
1274 dma_addr_t addr[MAX_SKB_FRAGS + 1];
1275
1276 /*
1277 * The chip min packet length is 9 octets but play safe and reject
1278 * anything shorter than an Ethernet header.
1279 */
1280 if (unlikely(skb->len < ETH_HLEN)) {
1281 dev_kfree_skb_any(skb);
1282 return NETDEV_TX_OK;
1283 }
1284
1285 qidx = skb_get_queue_mapping(skb);
1286 qs = &pi->qs[qidx];
1287 q = &qs->txq[TXQ_ETH];
1288 txq = netdev_get_tx_queue(dev, qidx);
1289
1290 reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1291
1292 credits = q->size - q->in_use;
1293 ndesc = calc_tx_descs(skb);
1294
1295 if (unlikely(credits < ndesc)) {
1296 t3_stop_tx_queue(txq, qs, q);
1297 dev_err(&adap->pdev->dev,
1298 "%s: Tx ring %u full while queue awake!\n",
1299 dev->name, q->cntxt_id & 7);
1300 return NETDEV_TX_BUSY;
1301 }
1302
1303 /* Check if ethernet packet can't be sent as immediate data */
1304 if (skb->len > (WR_LEN - sizeof(struct cpl_tx_pkt))) {
1305 if (unlikely(map_skb(adap->pdev, skb, addr) < 0)) {
1306 dev_kfree_skb(skb);
1307 return NETDEV_TX_OK;
1308 }
1309 }
1310
1311 q->in_use += ndesc;
1312 if (unlikely(credits - ndesc < q->stop_thres)) {
1313 t3_stop_tx_queue(txq, qs, q);
1314
1315 if (should_restart_tx(q) &&
1316 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1317 q->restarts++;
1318 netif_tx_start_queue(txq);
1319 }
1320 }
1321
1322 gen = q->gen;
1323 q->unacked += ndesc;
1324 compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1325 q->unacked &= 7;
1326 pidx = q->pidx;
1327 q->pidx += ndesc;
1328 if (q->pidx >= q->size) {
1329 q->pidx -= q->size;
1330 q->gen ^= 1;
1331 }
1332
1333 /* update port statistics */
1334 if (skb->ip_summed == CHECKSUM_PARTIAL)
1335 qs->port_stats[SGE_PSTAT_TX_CSUM]++;
1336 if (skb_shinfo(skb)->gso_size)
1337 qs->port_stats[SGE_PSTAT_TSO]++;
1338 if (skb_vlan_tag_present(skb))
1339 qs->port_stats[SGE_PSTAT_VLANINS]++;
1340
1341 /*
1342 * We do not use Tx completion interrupts to free DMAd Tx packets.
1343 * This is good for performance but means that we rely on new Tx
1344 * packets arriving to run the destructors of completed packets,
1345 * which open up space in their sockets' send queues. Sometimes
1346 * we do not get such new packets causing Tx to stall. A single
1347 * UDP transmitter is a good example of this situation. We have
1348 * a clean up timer that periodically reclaims completed packets
1349 * but it doesn't run often enough (nor do we want it to) to prevent
1350 * lengthy stalls. A solution to this problem is to run the
1351 * destructor early, after the packet is queued but before it's DMAd.
1352 * A cons is that we lie to socket memory accounting, but the amount
1353 * of extra memory is reasonable (limited by the number of Tx
1354 * descriptors), the packets do actually get freed quickly by new
1355 * packets almost always, and for protocols like TCP that wait for
1356 * acks to really free up the data the extra memory is even less.
1357 * On the positive side we run the destructors on the sending CPU
1358 * rather than on a potentially different completing CPU, usually a
1359 * good thing. We also run them without holding our Tx queue lock,
1360 * unlike what reclaim_completed_tx() would otherwise do.
1361 *
1362 * Run the destructor before telling the DMA engine about the packet
1363 * to make sure it doesn't complete and get freed prematurely.
1364 */
1365 if (likely(!skb_shared(skb)))
1366 skb_orphan(skb);
1367
1368 write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl, addr);
1369 check_ring_tx_db(adap, q);
1370 return NETDEV_TX_OK;
1371 }
1372
1373 /**
1374 * write_imm - write a packet into a Tx descriptor as immediate data
1375 * @d: the Tx descriptor to write
1376 * @skb: the packet
1377 * @len: the length of packet data to write as immediate data
1378 * @gen: the generation bit value to write
1379 *
1380 * Writes a packet as immediate data into a Tx descriptor. The packet
1381 * contains a work request at its beginning. We must write the packet
1382 * carefully so the SGE doesn't read it accidentally before it's written
1383 * in its entirety.
1384 */
write_imm(struct tx_desc * d,struct sk_buff * skb,unsigned int len,unsigned int gen)1385 static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
1386 unsigned int len, unsigned int gen)
1387 {
1388 struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
1389 struct work_request_hdr *to = (struct work_request_hdr *)d;
1390
1391 if (likely(!skb->data_len))
1392 memcpy(&to[1], &from[1], len - sizeof(*from));
1393 else
1394 skb_copy_bits(skb, sizeof(*from), &to[1], len - sizeof(*from));
1395
1396 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1397 V_WR_BCNTLFLT(len & 7));
1398 dma_wmb();
1399 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1400 V_WR_LEN((len + 7) / 8));
1401 wr_gen2(d, gen);
1402 kfree_skb(skb);
1403 }
1404
1405 /**
1406 * check_desc_avail - check descriptor availability on a send queue
1407 * @adap: the adapter
1408 * @q: the send queue
1409 * @skb: the packet needing the descriptors
1410 * @ndesc: the number of Tx descriptors needed
1411 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1412 *
1413 * Checks if the requested number of Tx descriptors is available on an
1414 * SGE send queue. If the queue is already suspended or not enough
1415 * descriptors are available the packet is queued for later transmission.
1416 * Must be called with the Tx queue locked.
1417 *
1418 * Returns 0 if enough descriptors are available, 1 if there aren't
1419 * enough descriptors and the packet has been queued, and 2 if the caller
1420 * needs to retry because there weren't enough descriptors at the
1421 * beginning of the call but some freed up in the mean time.
1422 */
check_desc_avail(struct adapter * adap,struct sge_txq * q,struct sk_buff * skb,unsigned int ndesc,unsigned int qid)1423 static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
1424 struct sk_buff *skb, unsigned int ndesc,
1425 unsigned int qid)
1426 {
1427 if (unlikely(!skb_queue_empty(&q->sendq))) {
1428 addq_exit:__skb_queue_tail(&q->sendq, skb);
1429 return 1;
1430 }
1431 if (unlikely(q->size - q->in_use < ndesc)) {
1432 struct sge_qset *qs = txq_to_qset(q, qid);
1433
1434 set_bit(qid, &qs->txq_stopped);
1435 smp_mb__after_atomic();
1436
1437 if (should_restart_tx(q) &&
1438 test_and_clear_bit(qid, &qs->txq_stopped))
1439 return 2;
1440
1441 q->stops++;
1442 goto addq_exit;
1443 }
1444 return 0;
1445 }
1446
1447 /**
1448 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1449 * @q: the SGE control Tx queue
1450 *
1451 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1452 * that send only immediate data (presently just the control queues) and
1453 * thus do not have any sk_buffs to release.
1454 */
reclaim_completed_tx_imm(struct sge_txq * q)1455 static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1456 {
1457 unsigned int reclaim = q->processed - q->cleaned;
1458
1459 q->in_use -= reclaim;
1460 q->cleaned += reclaim;
1461 }
1462
immediate(const struct sk_buff * skb)1463 static inline int immediate(const struct sk_buff *skb)
1464 {
1465 return skb->len <= WR_LEN;
1466 }
1467
1468 /**
1469 * ctrl_xmit - send a packet through an SGE control Tx queue
1470 * @adap: the adapter
1471 * @q: the control queue
1472 * @skb: the packet
1473 *
1474 * Send a packet through an SGE control Tx queue. Packets sent through
1475 * a control queue must fit entirely as immediate data in a single Tx
1476 * descriptor and have no page fragments.
1477 */
ctrl_xmit(struct adapter * adap,struct sge_txq * q,struct sk_buff * skb)1478 static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
1479 struct sk_buff *skb)
1480 {
1481 int ret;
1482 struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;
1483
1484 if (unlikely(!immediate(skb))) {
1485 WARN_ON(1);
1486 dev_kfree_skb(skb);
1487 return NET_XMIT_SUCCESS;
1488 }
1489
1490 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1491 wrp->wr_lo = htonl(V_WR_TID(q->token));
1492
1493 spin_lock(&q->lock);
1494 again:reclaim_completed_tx_imm(q);
1495
1496 ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL);
1497 if (unlikely(ret)) {
1498 if (ret == 1) {
1499 spin_unlock(&q->lock);
1500 return NET_XMIT_CN;
1501 }
1502 goto again;
1503 }
1504
1505 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1506
1507 q->in_use++;
1508 if (++q->pidx >= q->size) {
1509 q->pidx = 0;
1510 q->gen ^= 1;
1511 }
1512 spin_unlock(&q->lock);
1513 wmb();
1514 t3_write_reg(adap, A_SG_KDOORBELL,
1515 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1516 return NET_XMIT_SUCCESS;
1517 }
1518
1519 /**
1520 * restart_ctrlq - restart a suspended control queue
1521 * @w: pointer to the work associated with this handler
1522 *
1523 * Resumes transmission on a suspended Tx control queue.
1524 */
restart_ctrlq(struct work_struct * w)1525 static void restart_ctrlq(struct work_struct *w)
1526 {
1527 struct sk_buff *skb;
1528 struct sge_qset *qs = container_of(w, struct sge_qset,
1529 txq[TXQ_CTRL].qresume_task);
1530 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1531
1532 spin_lock(&q->lock);
1533 again:reclaim_completed_tx_imm(q);
1534
1535 while (q->in_use < q->size &&
1536 (skb = __skb_dequeue(&q->sendq)) != NULL) {
1537
1538 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1539
1540 if (++q->pidx >= q->size) {
1541 q->pidx = 0;
1542 q->gen ^= 1;
1543 }
1544 q->in_use++;
1545 }
1546
1547 if (!skb_queue_empty(&q->sendq)) {
1548 set_bit(TXQ_CTRL, &qs->txq_stopped);
1549 smp_mb__after_atomic();
1550
1551 if (should_restart_tx(q) &&
1552 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1553 goto again;
1554 q->stops++;
1555 }
1556
1557 spin_unlock(&q->lock);
1558 wmb();
1559 t3_write_reg(qs->adap, A_SG_KDOORBELL,
1560 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1561 }
1562
1563 /*
1564 * Send a management message through control queue 0
1565 */
t3_mgmt_tx(struct adapter * adap,struct sk_buff * skb)1566 int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1567 {
1568 int ret;
1569 local_bh_disable();
1570 ret = ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
1571 local_bh_enable();
1572
1573 return ret;
1574 }
1575
1576 /**
1577 * deferred_unmap_destructor - unmap a packet when it is freed
1578 * @skb: the packet
1579 *
1580 * This is the packet destructor used for Tx packets that need to remain
1581 * mapped until they are freed rather than until their Tx descriptors are
1582 * freed.
1583 */
deferred_unmap_destructor(struct sk_buff * skb)1584 static void deferred_unmap_destructor(struct sk_buff *skb)
1585 {
1586 int i;
1587 const dma_addr_t *p;
1588 const struct skb_shared_info *si;
1589 const struct deferred_unmap_info *dui;
1590
1591 dui = (struct deferred_unmap_info *)skb->head;
1592 p = dui->addr;
1593
1594 if (skb_tail_pointer(skb) - skb_transport_header(skb))
1595 dma_unmap_single(&dui->pdev->dev, *p++,
1596 skb_tail_pointer(skb) - skb_transport_header(skb),
1597 DMA_TO_DEVICE);
1598
1599 si = skb_shinfo(skb);
1600 for (i = 0; i < si->nr_frags; i++)
1601 dma_unmap_page(&dui->pdev->dev, *p++,
1602 skb_frag_size(&si->frags[i]), DMA_TO_DEVICE);
1603 }
1604
setup_deferred_unmapping(struct sk_buff * skb,struct pci_dev * pdev,const struct sg_ent * sgl,int sgl_flits)1605 static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
1606 const struct sg_ent *sgl, int sgl_flits)
1607 {
1608 dma_addr_t *p;
1609 struct deferred_unmap_info *dui;
1610
1611 dui = (struct deferred_unmap_info *)skb->head;
1612 dui->pdev = pdev;
1613 for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) {
1614 *p++ = be64_to_cpu(sgl->addr[0]);
1615 *p++ = be64_to_cpu(sgl->addr[1]);
1616 }
1617 if (sgl_flits)
1618 *p = be64_to_cpu(sgl->addr[0]);
1619 }
1620
1621 /**
1622 * write_ofld_wr - write an offload work request
1623 * @adap: the adapter
1624 * @skb: the packet to send
1625 * @q: the Tx queue
1626 * @pidx: index of the first Tx descriptor to write
1627 * @gen: the generation value to use
1628 * @ndesc: number of descriptors the packet will occupy
1629 * @addr: the address
1630 *
1631 * Write an offload work request to send the supplied packet. The packet
1632 * data already carry the work request with most fields populated.
1633 */
write_ofld_wr(struct adapter * adap,struct sk_buff * skb,struct sge_txq * q,unsigned int pidx,unsigned int gen,unsigned int ndesc,const dma_addr_t * addr)1634 static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1635 struct sge_txq *q, unsigned int pidx,
1636 unsigned int gen, unsigned int ndesc,
1637 const dma_addr_t *addr)
1638 {
1639 unsigned int sgl_flits, flits;
1640 struct work_request_hdr *from;
1641 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1642 struct tx_desc *d = &q->desc[pidx];
1643
1644 if (immediate(skb)) {
1645 q->sdesc[pidx].skb = NULL;
1646 write_imm(d, skb, skb->len, gen);
1647 return;
1648 }
1649
1650 /* Only TX_DATA builds SGLs */
1651
1652 from = (struct work_request_hdr *)skb->data;
1653 memcpy(&d->flit[1], &from[1],
1654 skb_transport_offset(skb) - sizeof(*from));
1655
1656 flits = skb_transport_offset(skb) / 8;
1657 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1658 sgl_flits = write_sgl(skb, sgp, skb_transport_header(skb),
1659 skb_tail_pointer(skb) - skb_transport_header(skb),
1660 addr);
1661 if (need_skb_unmap()) {
1662 setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
1663 skb->destructor = deferred_unmap_destructor;
1664 }
1665
1666 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1667 gen, from->wr_hi, from->wr_lo);
1668 }
1669
1670 /**
1671 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1672 * @skb: the packet
1673 *
1674 * Returns the number of Tx descriptors needed for the given offload
1675 * packet. These packets are already fully constructed.
1676 */
calc_tx_descs_ofld(const struct sk_buff * skb)1677 static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1678 {
1679 unsigned int flits, cnt;
1680
1681 if (skb->len <= WR_LEN)
1682 return 1; /* packet fits as immediate data */
1683
1684 flits = skb_transport_offset(skb) / 8; /* headers */
1685 cnt = skb_shinfo(skb)->nr_frags;
1686 if (skb_tail_pointer(skb) != skb_transport_header(skb))
1687 cnt++;
1688 return flits_to_desc(flits + sgl_len(cnt));
1689 }
1690
1691 /**
1692 * ofld_xmit - send a packet through an offload queue
1693 * @adap: the adapter
1694 * @q: the Tx offload queue
1695 * @skb: the packet
1696 *
1697 * Send an offload packet through an SGE offload queue.
1698 */
ofld_xmit(struct adapter * adap,struct sge_txq * q,struct sk_buff * skb)1699 static int ofld_xmit(struct adapter *adap, struct sge_txq *q,
1700 struct sk_buff *skb)
1701 {
1702 int ret;
1703 unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1704
1705 spin_lock(&q->lock);
1706 again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1707
1708 ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD);
1709 if (unlikely(ret)) {
1710 if (ret == 1) {
1711 skb->priority = ndesc; /* save for restart */
1712 spin_unlock(&q->lock);
1713 return NET_XMIT_CN;
1714 }
1715 goto again;
1716 }
1717
1718 if (!immediate(skb) &&
1719 map_skb(adap->pdev, skb, (dma_addr_t *)skb->head)) {
1720 spin_unlock(&q->lock);
1721 return NET_XMIT_SUCCESS;
1722 }
1723
1724 gen = q->gen;
1725 q->in_use += ndesc;
1726 pidx = q->pidx;
1727 q->pidx += ndesc;
1728 if (q->pidx >= q->size) {
1729 q->pidx -= q->size;
1730 q->gen ^= 1;
1731 }
1732 spin_unlock(&q->lock);
1733
1734 write_ofld_wr(adap, skb, q, pidx, gen, ndesc, (dma_addr_t *)skb->head);
1735 check_ring_tx_db(adap, q);
1736 return NET_XMIT_SUCCESS;
1737 }
1738
1739 /**
1740 * restart_offloadq - restart a suspended offload queue
1741 * @w: pointer to the work associated with this handler
1742 *
1743 * Resumes transmission on a suspended Tx offload queue.
1744 */
restart_offloadq(struct work_struct * w)1745 static void restart_offloadq(struct work_struct *w)
1746 {
1747 struct sk_buff *skb;
1748 struct sge_qset *qs = container_of(w, struct sge_qset,
1749 txq[TXQ_OFLD].qresume_task);
1750 struct sge_txq *q = &qs->txq[TXQ_OFLD];
1751 const struct port_info *pi = netdev_priv(qs->netdev);
1752 struct adapter *adap = pi->adapter;
1753 unsigned int written = 0;
1754
1755 spin_lock(&q->lock);
1756 again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1757
1758 while ((skb = skb_peek(&q->sendq)) != NULL) {
1759 unsigned int gen, pidx;
1760 unsigned int ndesc = skb->priority;
1761
1762 if (unlikely(q->size - q->in_use < ndesc)) {
1763 set_bit(TXQ_OFLD, &qs->txq_stopped);
1764 smp_mb__after_atomic();
1765
1766 if (should_restart_tx(q) &&
1767 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1768 goto again;
1769 q->stops++;
1770 break;
1771 }
1772
1773 if (!immediate(skb) &&
1774 map_skb(adap->pdev, skb, (dma_addr_t *)skb->head))
1775 break;
1776
1777 gen = q->gen;
1778 q->in_use += ndesc;
1779 pidx = q->pidx;
1780 q->pidx += ndesc;
1781 written += ndesc;
1782 if (q->pidx >= q->size) {
1783 q->pidx -= q->size;
1784 q->gen ^= 1;
1785 }
1786 __skb_unlink(skb, &q->sendq);
1787 spin_unlock(&q->lock);
1788
1789 write_ofld_wr(adap, skb, q, pidx, gen, ndesc,
1790 (dma_addr_t *)skb->head);
1791 spin_lock(&q->lock);
1792 }
1793 spin_unlock(&q->lock);
1794
1795 #if USE_GTS
1796 set_bit(TXQ_RUNNING, &q->flags);
1797 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1798 #endif
1799 wmb();
1800 if (likely(written))
1801 t3_write_reg(adap, A_SG_KDOORBELL,
1802 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1803 }
1804
1805 /**
1806 * queue_set - return the queue set a packet should use
1807 * @skb: the packet
1808 *
1809 * Maps a packet to the SGE queue set it should use. The desired queue
1810 * set is carried in bits 1-3 in the packet's priority.
1811 */
queue_set(const struct sk_buff * skb)1812 static inline int queue_set(const struct sk_buff *skb)
1813 {
1814 return skb->priority >> 1;
1815 }
1816
1817 /**
1818 * is_ctrl_pkt - return whether an offload packet is a control packet
1819 * @skb: the packet
1820 *
1821 * Determines whether an offload packet should use an OFLD or a CTRL
1822 * Tx queue. This is indicated by bit 0 in the packet's priority.
1823 */
is_ctrl_pkt(const struct sk_buff * skb)1824 static inline int is_ctrl_pkt(const struct sk_buff *skb)
1825 {
1826 return skb->priority & 1;
1827 }
1828
1829 /**
1830 * t3_offload_tx - send an offload packet
1831 * @tdev: the offload device to send to
1832 * @skb: the packet
1833 *
1834 * Sends an offload packet. We use the packet priority to select the
1835 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1836 * should be sent as regular or control, bits 1-3 select the queue set.
1837 */
t3_offload_tx(struct t3cdev * tdev,struct sk_buff * skb)1838 int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
1839 {
1840 struct adapter *adap = tdev2adap(tdev);
1841 struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1842
1843 if (unlikely(is_ctrl_pkt(skb)))
1844 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);
1845
1846 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);
1847 }
1848
1849 /**
1850 * offload_enqueue - add an offload packet to an SGE offload receive queue
1851 * @q: the SGE response queue
1852 * @skb: the packet
1853 *
1854 * Add a new offload packet to an SGE response queue's offload packet
1855 * queue. If the packet is the first on the queue it schedules the RX
1856 * softirq to process the queue.
1857 */
offload_enqueue(struct sge_rspq * q,struct sk_buff * skb)1858 static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
1859 {
1860 int was_empty = skb_queue_empty(&q->rx_queue);
1861
1862 __skb_queue_tail(&q->rx_queue, skb);
1863
1864 if (was_empty) {
1865 struct sge_qset *qs = rspq_to_qset(q);
1866
1867 napi_schedule(&qs->napi);
1868 }
1869 }
1870
1871 /**
1872 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1873 * @tdev: the offload device that will be receiving the packets
1874 * @q: the SGE response queue that assembled the bundle
1875 * @skbs: the partial bundle
1876 * @n: the number of packets in the bundle
1877 *
1878 * Delivers a (partial) bundle of Rx offload packets to an offload device.
1879 */
deliver_partial_bundle(struct t3cdev * tdev,struct sge_rspq * q,struct sk_buff * skbs[],int n)1880 static inline void deliver_partial_bundle(struct t3cdev *tdev,
1881 struct sge_rspq *q,
1882 struct sk_buff *skbs[], int n)
1883 {
1884 if (n) {
1885 q->offload_bundles++;
1886 tdev->recv(tdev, skbs, n);
1887 }
1888 }
1889
1890 /**
1891 * ofld_poll - NAPI handler for offload packets in interrupt mode
1892 * @napi: the network device doing the polling
1893 * @budget: polling budget
1894 *
1895 * The NAPI handler for offload packets when a response queue is serviced
1896 * by the hard interrupt handler, i.e., when it's operating in non-polling
1897 * mode. Creates small packet batches and sends them through the offload
1898 * receive handler. Batches need to be of modest size as we do prefetches
1899 * on the packets in each.
1900 */
ofld_poll(struct napi_struct * napi,int budget)1901 static int ofld_poll(struct napi_struct *napi, int budget)
1902 {
1903 struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
1904 struct sge_rspq *q = &qs->rspq;
1905 struct adapter *adapter = qs->adap;
1906 int work_done = 0;
1907
1908 while (work_done < budget) {
1909 struct sk_buff *skb, *tmp, *skbs[RX_BUNDLE_SIZE];
1910 struct sk_buff_head queue;
1911 int ngathered;
1912
1913 spin_lock_irq(&q->lock);
1914 __skb_queue_head_init(&queue);
1915 skb_queue_splice_init(&q->rx_queue, &queue);
1916 if (skb_queue_empty(&queue)) {
1917 napi_complete_done(napi, work_done);
1918 spin_unlock_irq(&q->lock);
1919 return work_done;
1920 }
1921 spin_unlock_irq(&q->lock);
1922
1923 ngathered = 0;
1924 skb_queue_walk_safe(&queue, skb, tmp) {
1925 if (work_done >= budget)
1926 break;
1927 work_done++;
1928
1929 __skb_unlink(skb, &queue);
1930 prefetch(skb->data);
1931 skbs[ngathered] = skb;
1932 if (++ngathered == RX_BUNDLE_SIZE) {
1933 q->offload_bundles++;
1934 adapter->tdev.recv(&adapter->tdev, skbs,
1935 ngathered);
1936 ngathered = 0;
1937 }
1938 }
1939 if (!skb_queue_empty(&queue)) {
1940 /* splice remaining packets back onto Rx queue */
1941 spin_lock_irq(&q->lock);
1942 skb_queue_splice(&queue, &q->rx_queue);
1943 spin_unlock_irq(&q->lock);
1944 }
1945 deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
1946 }
1947
1948 return work_done;
1949 }
1950
1951 /**
1952 * rx_offload - process a received offload packet
1953 * @tdev: the offload device receiving the packet
1954 * @rq: the response queue that received the packet
1955 * @skb: the packet
1956 * @rx_gather: a gather list of packets if we are building a bundle
1957 * @gather_idx: index of the next available slot in the bundle
1958 *
1959 * Process an ingress offload packet and add it to the offload ingress
1960 * queue. Returns the index of the next available slot in the bundle.
1961 */
rx_offload(struct t3cdev * tdev,struct sge_rspq * rq,struct sk_buff * skb,struct sk_buff * rx_gather[],unsigned int gather_idx)1962 static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1963 struct sk_buff *skb, struct sk_buff *rx_gather[],
1964 unsigned int gather_idx)
1965 {
1966 skb_reset_mac_header(skb);
1967 skb_reset_network_header(skb);
1968 skb_reset_transport_header(skb);
1969
1970 if (rq->polling) {
1971 rx_gather[gather_idx++] = skb;
1972 if (gather_idx == RX_BUNDLE_SIZE) {
1973 tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1974 gather_idx = 0;
1975 rq->offload_bundles++;
1976 }
1977 } else
1978 offload_enqueue(rq, skb);
1979
1980 return gather_idx;
1981 }
1982
1983 /**
1984 * restart_tx - check whether to restart suspended Tx queues
1985 * @qs: the queue set to resume
1986 *
1987 * Restarts suspended Tx queues of an SGE queue set if they have enough
1988 * free resources to resume operation.
1989 */
restart_tx(struct sge_qset * qs)1990 static void restart_tx(struct sge_qset *qs)
1991 {
1992 if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1993 should_restart_tx(&qs->txq[TXQ_ETH]) &&
1994 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1995 qs->txq[TXQ_ETH].restarts++;
1996 if (netif_running(qs->netdev))
1997 netif_tx_wake_queue(qs->tx_q);
1998 }
1999
2000 if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
2001 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2002 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2003 qs->txq[TXQ_OFLD].restarts++;
2004
2005 /* The work can be quite lengthy so we use driver's own queue */
2006 queue_work(cxgb3_wq, &qs->txq[TXQ_OFLD].qresume_task);
2007 }
2008 if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
2009 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2010 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2011 qs->txq[TXQ_CTRL].restarts++;
2012
2013 /* The work can be quite lengthy so we use driver's own queue */
2014 queue_work(cxgb3_wq, &qs->txq[TXQ_CTRL].qresume_task);
2015 }
2016 }
2017
2018 /**
2019 * cxgb3_arp_process - process an ARP request probing a private IP address
2020 * @pi: the port info
2021 * @skb: the skbuff containing the ARP request
2022 *
2023 * Check if the ARP request is probing the private IP address
2024 * dedicated to iSCSI, generate an ARP reply if so.
2025 */
cxgb3_arp_process(struct port_info * pi,struct sk_buff * skb)2026 static void cxgb3_arp_process(struct port_info *pi, struct sk_buff *skb)
2027 {
2028 struct net_device *dev = skb->dev;
2029 struct arphdr *arp;
2030 unsigned char *arp_ptr;
2031 unsigned char *sha;
2032 __be32 sip, tip;
2033
2034 if (!dev)
2035 return;
2036
2037 skb_reset_network_header(skb);
2038 arp = arp_hdr(skb);
2039
2040 if (arp->ar_op != htons(ARPOP_REQUEST))
2041 return;
2042
2043 arp_ptr = (unsigned char *)(arp + 1);
2044 sha = arp_ptr;
2045 arp_ptr += dev->addr_len;
2046 memcpy(&sip, arp_ptr, sizeof(sip));
2047 arp_ptr += sizeof(sip);
2048 arp_ptr += dev->addr_len;
2049 memcpy(&tip, arp_ptr, sizeof(tip));
2050
2051 if (tip != pi->iscsi_ipv4addr)
2052 return;
2053
2054 arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
2055 pi->iscsic.mac_addr, sha);
2056
2057 }
2058
is_arp(struct sk_buff * skb)2059 static inline int is_arp(struct sk_buff *skb)
2060 {
2061 return skb->protocol == htons(ETH_P_ARP);
2062 }
2063
cxgb3_process_iscsi_prov_pack(struct port_info * pi,struct sk_buff * skb)2064 static void cxgb3_process_iscsi_prov_pack(struct port_info *pi,
2065 struct sk_buff *skb)
2066 {
2067 if (is_arp(skb)) {
2068 cxgb3_arp_process(pi, skb);
2069 return;
2070 }
2071
2072 if (pi->iscsic.recv)
2073 pi->iscsic.recv(pi, skb);
2074
2075 }
2076
2077 /**
2078 * rx_eth - process an ingress ethernet packet
2079 * @adap: the adapter
2080 * @rq: the response queue that received the packet
2081 * @skb: the packet
2082 * @pad: padding
2083 * @lro: large receive offload
2084 *
2085 * Process an ingress ethernet packet and deliver it to the stack.
2086 * The padding is 2 if the packet was delivered in an Rx buffer and 0
2087 * if it was immediate data in a response.
2088 */
rx_eth(struct adapter * adap,struct sge_rspq * rq,struct sk_buff * skb,int pad,int lro)2089 static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
2090 struct sk_buff *skb, int pad, int lro)
2091 {
2092 struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
2093 struct sge_qset *qs = rspq_to_qset(rq);
2094 struct port_info *pi;
2095
2096 skb_pull(skb, sizeof(*p) + pad);
2097 skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
2098 pi = netdev_priv(skb->dev);
2099 if ((skb->dev->features & NETIF_F_RXCSUM) && p->csum_valid &&
2100 p->csum == htons(0xffff) && !p->fragment) {
2101 qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2102 skb->ip_summed = CHECKSUM_UNNECESSARY;
2103 } else
2104 skb_checksum_none_assert(skb);
2105 skb_record_rx_queue(skb, qs - &adap->sge.qs[pi->first_qset]);
2106
2107 if (p->vlan_valid) {
2108 qs->port_stats[SGE_PSTAT_VLANEX]++;
2109 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(p->vlan));
2110 }
2111 if (rq->polling) {
2112 if (lro)
2113 napi_gro_receive(&qs->napi, skb);
2114 else {
2115 if (unlikely(pi->iscsic.flags))
2116 cxgb3_process_iscsi_prov_pack(pi, skb);
2117 netif_receive_skb(skb);
2118 }
2119 } else
2120 netif_rx(skb);
2121 }
2122
is_eth_tcp(u32 rss)2123 static inline int is_eth_tcp(u32 rss)
2124 {
2125 return G_HASHTYPE(ntohl(rss)) == RSS_HASH_4_TUPLE;
2126 }
2127
2128 /**
2129 * lro_add_page - add a page chunk to an LRO session
2130 * @adap: the adapter
2131 * @qs: the associated queue set
2132 * @fl: the free list containing the page chunk to add
2133 * @len: packet length
2134 * @complete: Indicates the last fragment of a frame
2135 *
2136 * Add a received packet contained in a page chunk to an existing LRO
2137 * session.
2138 */
lro_add_page(struct adapter * adap,struct sge_qset * qs,struct sge_fl * fl,int len,int complete)2139 static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2140 struct sge_fl *fl, int len, int complete)
2141 {
2142 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2143 struct port_info *pi = netdev_priv(qs->netdev);
2144 struct sk_buff *skb = NULL;
2145 struct cpl_rx_pkt *cpl;
2146 skb_frag_t *rx_frag;
2147 int nr_frags;
2148 int offset = 0;
2149
2150 if (!qs->nomem) {
2151 skb = napi_get_frags(&qs->napi);
2152 qs->nomem = !skb;
2153 }
2154
2155 fl->credits--;
2156
2157 dma_sync_single_for_cpu(&adap->pdev->dev,
2158 dma_unmap_addr(sd, dma_addr),
2159 fl->buf_size - SGE_PG_RSVD, DMA_FROM_DEVICE);
2160
2161 (*sd->pg_chunk.p_cnt)--;
2162 if (!*sd->pg_chunk.p_cnt && sd->pg_chunk.page != fl->pg_chunk.page)
2163 dma_unmap_page(&adap->pdev->dev, sd->pg_chunk.mapping,
2164 fl->alloc_size, DMA_FROM_DEVICE);
2165
2166 if (!skb) {
2167 put_page(sd->pg_chunk.page);
2168 if (complete)
2169 qs->nomem = 0;
2170 return;
2171 }
2172
2173 rx_frag = skb_shinfo(skb)->frags;
2174 nr_frags = skb_shinfo(skb)->nr_frags;
2175
2176 if (!nr_frags) {
2177 offset = 2 + sizeof(struct cpl_rx_pkt);
2178 cpl = qs->lro_va = sd->pg_chunk.va + 2;
2179
2180 if ((qs->netdev->features & NETIF_F_RXCSUM) &&
2181 cpl->csum_valid && cpl->csum == htons(0xffff)) {
2182 skb->ip_summed = CHECKSUM_UNNECESSARY;
2183 qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2184 } else
2185 skb->ip_summed = CHECKSUM_NONE;
2186 } else
2187 cpl = qs->lro_va;
2188
2189 len -= offset;
2190
2191 rx_frag += nr_frags;
2192 __skb_frag_set_page(rx_frag, sd->pg_chunk.page);
2193 skb_frag_off_set(rx_frag, sd->pg_chunk.offset + offset);
2194 skb_frag_size_set(rx_frag, len);
2195
2196 skb->len += len;
2197 skb->data_len += len;
2198 skb->truesize += len;
2199 skb_shinfo(skb)->nr_frags++;
2200
2201 if (!complete)
2202 return;
2203
2204 skb_record_rx_queue(skb, qs - &adap->sge.qs[pi->first_qset]);
2205
2206 if (cpl->vlan_valid) {
2207 qs->port_stats[SGE_PSTAT_VLANEX]++;
2208 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cpl->vlan));
2209 }
2210 napi_gro_frags(&qs->napi);
2211 }
2212
2213 /**
2214 * handle_rsp_cntrl_info - handles control information in a response
2215 * @qs: the queue set corresponding to the response
2216 * @flags: the response control flags
2217 *
2218 * Handles the control information of an SGE response, such as GTS
2219 * indications and completion credits for the queue set's Tx queues.
2220 * HW coalesces credits, we don't do any extra SW coalescing.
2221 */
handle_rsp_cntrl_info(struct sge_qset * qs,u32 flags)2222 static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
2223 {
2224 unsigned int credits;
2225
2226 #if USE_GTS
2227 if (flags & F_RSPD_TXQ0_GTS)
2228 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2229 #endif
2230
2231 credits = G_RSPD_TXQ0_CR(flags);
2232 if (credits)
2233 qs->txq[TXQ_ETH].processed += credits;
2234
2235 credits = G_RSPD_TXQ2_CR(flags);
2236 if (credits)
2237 qs->txq[TXQ_CTRL].processed += credits;
2238
2239 # if USE_GTS
2240 if (flags & F_RSPD_TXQ1_GTS)
2241 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2242 # endif
2243 credits = G_RSPD_TXQ1_CR(flags);
2244 if (credits)
2245 qs->txq[TXQ_OFLD].processed += credits;
2246 }
2247
2248 /**
2249 * check_ring_db - check if we need to ring any doorbells
2250 * @adap: the adapter
2251 * @qs: the queue set whose Tx queues are to be examined
2252 * @sleeping: indicates which Tx queue sent GTS
2253 *
2254 * Checks if some of a queue set's Tx queues need to ring their doorbells
2255 * to resume transmission after idling while they still have unprocessed
2256 * descriptors.
2257 */
check_ring_db(struct adapter * adap,struct sge_qset * qs,unsigned int sleeping)2258 static void check_ring_db(struct adapter *adap, struct sge_qset *qs,
2259 unsigned int sleeping)
2260 {
2261 if (sleeping & F_RSPD_TXQ0_GTS) {
2262 struct sge_txq *txq = &qs->txq[TXQ_ETH];
2263
2264 if (txq->cleaned + txq->in_use != txq->processed &&
2265 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2266 set_bit(TXQ_RUNNING, &txq->flags);
2267 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2268 V_EGRCNTX(txq->cntxt_id));
2269 }
2270 }
2271
2272 if (sleeping & F_RSPD_TXQ1_GTS) {
2273 struct sge_txq *txq = &qs->txq[TXQ_OFLD];
2274
2275 if (txq->cleaned + txq->in_use != txq->processed &&
2276 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2277 set_bit(TXQ_RUNNING, &txq->flags);
2278 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2279 V_EGRCNTX(txq->cntxt_id));
2280 }
2281 }
2282 }
2283
2284 /**
2285 * is_new_response - check if a response is newly written
2286 * @r: the response descriptor
2287 * @q: the response queue
2288 *
2289 * Returns true if a response descriptor contains a yet unprocessed
2290 * response.
2291 */
is_new_response(const struct rsp_desc * r,const struct sge_rspq * q)2292 static inline int is_new_response(const struct rsp_desc *r,
2293 const struct sge_rspq *q)
2294 {
2295 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2296 }
2297
clear_rspq_bufstate(struct sge_rspq * const q)2298 static inline void clear_rspq_bufstate(struct sge_rspq * const q)
2299 {
2300 q->pg_skb = NULL;
2301 q->rx_recycle_buf = 0;
2302 }
2303
2304 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2305 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2306 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2307 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2308 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2309
2310 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2311 #define NOMEM_INTR_DELAY 2500
2312
2313 /**
2314 * process_responses - process responses from an SGE response queue
2315 * @adap: the adapter
2316 * @qs: the queue set to which the response queue belongs
2317 * @budget: how many responses can be processed in this round
2318 *
2319 * Process responses from an SGE response queue up to the supplied budget.
2320 * Responses include received packets as well as credits and other events
2321 * for the queues that belong to the response queue's queue set.
2322 * A negative budget is effectively unlimited.
2323 *
2324 * Additionally choose the interrupt holdoff time for the next interrupt
2325 * on this queue. If the system is under memory shortage use a fairly
2326 * long delay to help recovery.
2327 */
process_responses(struct adapter * adap,struct sge_qset * qs,int budget)2328 static int process_responses(struct adapter *adap, struct sge_qset *qs,
2329 int budget)
2330 {
2331 struct sge_rspq *q = &qs->rspq;
2332 struct rsp_desc *r = &q->desc[q->cidx];
2333 int budget_left = budget;
2334 unsigned int sleeping = 0;
2335 struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
2336 int ngathered = 0;
2337
2338 q->next_holdoff = q->holdoff_tmr;
2339
2340 while (likely(budget_left && is_new_response(r, q))) {
2341 int packet_complete, eth, ethpad = 2;
2342 int lro = !!(qs->netdev->features & NETIF_F_GRO);
2343 struct sk_buff *skb = NULL;
2344 u32 len, flags;
2345 __be32 rss_hi, rss_lo;
2346
2347 dma_rmb();
2348 eth = r->rss_hdr.opcode == CPL_RX_PKT;
2349 rss_hi = *(const __be32 *)r;
2350 rss_lo = r->rss_hdr.rss_hash_val;
2351 flags = ntohl(r->flags);
2352
2353 if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
2354 skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC);
2355 if (!skb)
2356 goto no_mem;
2357
2358 __skb_put_data(skb, r, AN_PKT_SIZE);
2359 skb->data[0] = CPL_ASYNC_NOTIF;
2360 rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
2361 q->async_notif++;
2362 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2363 skb = get_imm_packet(r);
2364 if (unlikely(!skb)) {
2365 no_mem:
2366 q->next_holdoff = NOMEM_INTR_DELAY;
2367 q->nomem++;
2368 /* consume one credit since we tried */
2369 budget_left--;
2370 break;
2371 }
2372 q->imm_data++;
2373 ethpad = 0;
2374 } else if ((len = ntohl(r->len_cq)) != 0) {
2375 struct sge_fl *fl;
2376
2377 lro &= eth && is_eth_tcp(rss_hi);
2378
2379 fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2380 if (fl->use_pages) {
2381 void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
2382
2383 net_prefetch(addr);
2384 __refill_fl(adap, fl);
2385 if (lro > 0) {
2386 lro_add_page(adap, qs, fl,
2387 G_RSPD_LEN(len),
2388 flags & F_RSPD_EOP);
2389 goto next_fl;
2390 }
2391
2392 skb = get_packet_pg(adap, fl, q,
2393 G_RSPD_LEN(len),
2394 eth ?
2395 SGE_RX_DROP_THRES : 0);
2396 q->pg_skb = skb;
2397 } else
2398 skb = get_packet(adap, fl, G_RSPD_LEN(len),
2399 eth ? SGE_RX_DROP_THRES : 0);
2400 if (unlikely(!skb)) {
2401 if (!eth)
2402 goto no_mem;
2403 q->rx_drops++;
2404 } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2405 __skb_pull(skb, 2);
2406 next_fl:
2407 if (++fl->cidx == fl->size)
2408 fl->cidx = 0;
2409 } else
2410 q->pure_rsps++;
2411
2412 if (flags & RSPD_CTRL_MASK) {
2413 sleeping |= flags & RSPD_GTS_MASK;
2414 handle_rsp_cntrl_info(qs, flags);
2415 }
2416
2417 r++;
2418 if (unlikely(++q->cidx == q->size)) {
2419 q->cidx = 0;
2420 q->gen ^= 1;
2421 r = q->desc;
2422 }
2423 prefetch(r);
2424
2425 if (++q->credits >= (q->size / 4)) {
2426 refill_rspq(adap, q, q->credits);
2427 q->credits = 0;
2428 }
2429
2430 packet_complete = flags &
2431 (F_RSPD_EOP | F_RSPD_IMM_DATA_VALID |
2432 F_RSPD_ASYNC_NOTIF);
2433
2434 if (skb != NULL && packet_complete) {
2435 if (eth)
2436 rx_eth(adap, q, skb, ethpad, lro);
2437 else {
2438 q->offload_pkts++;
2439 /* Preserve the RSS info in csum & priority */
2440 skb->csum = rss_hi;
2441 skb->priority = rss_lo;
2442 ngathered = rx_offload(&adap->tdev, q, skb,
2443 offload_skbs,
2444 ngathered);
2445 }
2446
2447 if (flags & F_RSPD_EOP)
2448 clear_rspq_bufstate(q);
2449 }
2450 --budget_left;
2451 }
2452
2453 deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
2454
2455 if (sleeping)
2456 check_ring_db(adap, qs, sleeping);
2457
2458 smp_mb(); /* commit Tx queue .processed updates */
2459 if (unlikely(qs->txq_stopped != 0))
2460 restart_tx(qs);
2461
2462 budget -= budget_left;
2463 return budget;
2464 }
2465
is_pure_response(const struct rsp_desc * r)2466 static inline int is_pure_response(const struct rsp_desc *r)
2467 {
2468 __be32 n = r->flags & htonl(F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);
2469
2470 return (n | r->len_cq) == 0;
2471 }
2472
2473 /**
2474 * napi_rx_handler - the NAPI handler for Rx processing
2475 * @napi: the napi instance
2476 * @budget: how many packets we can process in this round
2477 *
2478 * Handler for new data events when using NAPI.
2479 */
napi_rx_handler(struct napi_struct * napi,int budget)2480 static int napi_rx_handler(struct napi_struct *napi, int budget)
2481 {
2482 struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
2483 struct adapter *adap = qs->adap;
2484 int work_done = process_responses(adap, qs, budget);
2485
2486 if (likely(work_done < budget)) {
2487 napi_complete_done(napi, work_done);
2488
2489 /*
2490 * Because we don't atomically flush the following
2491 * write it is possible that in very rare cases it can
2492 * reach the device in a way that races with a new
2493 * response being written plus an error interrupt
2494 * causing the NAPI interrupt handler below to return
2495 * unhandled status to the OS. To protect against
2496 * this would require flushing the write and doing
2497 * both the write and the flush with interrupts off.
2498 * Way too expensive and unjustifiable given the
2499 * rarity of the race.
2500 *
2501 * The race cannot happen at all with MSI-X.
2502 */
2503 t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2504 V_NEWTIMER(qs->rspq.next_holdoff) |
2505 V_NEWINDEX(qs->rspq.cidx));
2506 }
2507 return work_done;
2508 }
2509
2510 /*
2511 * Returns true if the device is already scheduled for polling.
2512 */
napi_is_scheduled(struct napi_struct * napi)2513 static inline int napi_is_scheduled(struct napi_struct *napi)
2514 {
2515 return test_bit(NAPI_STATE_SCHED, &napi->state);
2516 }
2517
2518 /**
2519 * process_pure_responses - process pure responses from a response queue
2520 * @adap: the adapter
2521 * @qs: the queue set owning the response queue
2522 * @r: the first pure response to process
2523 *
2524 * A simpler version of process_responses() that handles only pure (i.e.,
2525 * non data-carrying) responses. Such respones are too light-weight to
2526 * justify calling a softirq under NAPI, so we handle them specially in
2527 * the interrupt handler. The function is called with a pointer to a
2528 * response, which the caller must ensure is a valid pure response.
2529 *
2530 * Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2531 */
process_pure_responses(struct adapter * adap,struct sge_qset * qs,struct rsp_desc * r)2532 static int process_pure_responses(struct adapter *adap, struct sge_qset *qs,
2533 struct rsp_desc *r)
2534 {
2535 struct sge_rspq *q = &qs->rspq;
2536 unsigned int sleeping = 0;
2537
2538 do {
2539 u32 flags = ntohl(r->flags);
2540
2541 r++;
2542 if (unlikely(++q->cidx == q->size)) {
2543 q->cidx = 0;
2544 q->gen ^= 1;
2545 r = q->desc;
2546 }
2547 prefetch(r);
2548
2549 if (flags & RSPD_CTRL_MASK) {
2550 sleeping |= flags & RSPD_GTS_MASK;
2551 handle_rsp_cntrl_info(qs, flags);
2552 }
2553
2554 q->pure_rsps++;
2555 if (++q->credits >= (q->size / 4)) {
2556 refill_rspq(adap, q, q->credits);
2557 q->credits = 0;
2558 }
2559 if (!is_new_response(r, q))
2560 break;
2561 dma_rmb();
2562 } while (is_pure_response(r));
2563
2564 if (sleeping)
2565 check_ring_db(adap, qs, sleeping);
2566
2567 smp_mb(); /* commit Tx queue .processed updates */
2568 if (unlikely(qs->txq_stopped != 0))
2569 restart_tx(qs);
2570
2571 return is_new_response(r, q);
2572 }
2573
2574 /**
2575 * handle_responses - decide what to do with new responses in NAPI mode
2576 * @adap: the adapter
2577 * @q: the response queue
2578 *
2579 * This is used by the NAPI interrupt handlers to decide what to do with
2580 * new SGE responses. If there are no new responses it returns -1. If
2581 * there are new responses and they are pure (i.e., non-data carrying)
2582 * it handles them straight in hard interrupt context as they are very
2583 * cheap and don't deliver any packets. Finally, if there are any data
2584 * signaling responses it schedules the NAPI handler. Returns 1 if it
2585 * schedules NAPI, 0 if all new responses were pure.
2586 *
2587 * The caller must ascertain NAPI is not already running.
2588 */
handle_responses(struct adapter * adap,struct sge_rspq * q)2589 static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
2590 {
2591 struct sge_qset *qs = rspq_to_qset(q);
2592 struct rsp_desc *r = &q->desc[q->cidx];
2593
2594 if (!is_new_response(r, q))
2595 return -1;
2596 dma_rmb();
2597 if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) {
2598 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2599 V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
2600 return 0;
2601 }
2602 napi_schedule(&qs->napi);
2603 return 1;
2604 }
2605
2606 /*
2607 * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2608 * (i.e., response queue serviced in hard interrupt).
2609 */
t3_sge_intr_msix(int irq,void * cookie)2610 static irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2611 {
2612 struct sge_qset *qs = cookie;
2613 struct adapter *adap = qs->adap;
2614 struct sge_rspq *q = &qs->rspq;
2615
2616 spin_lock(&q->lock);
2617 if (process_responses(adap, qs, -1) == 0)
2618 q->unhandled_irqs++;
2619 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2620 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2621 spin_unlock(&q->lock);
2622 return IRQ_HANDLED;
2623 }
2624
2625 /*
2626 * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2627 * (i.e., response queue serviced by NAPI polling).
2628 */
t3_sge_intr_msix_napi(int irq,void * cookie)2629 static irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
2630 {
2631 struct sge_qset *qs = cookie;
2632 struct sge_rspq *q = &qs->rspq;
2633
2634 spin_lock(&q->lock);
2635
2636 if (handle_responses(qs->adap, q) < 0)
2637 q->unhandled_irqs++;
2638 spin_unlock(&q->lock);
2639 return IRQ_HANDLED;
2640 }
2641
2642 /*
2643 * The non-NAPI MSI interrupt handler. This needs to handle data events from
2644 * SGE response queues as well as error and other async events as they all use
2645 * the same MSI vector. We use one SGE response queue per port in this mode
2646 * and protect all response queues with queue 0's lock.
2647 */
t3_intr_msi(int irq,void * cookie)2648 static irqreturn_t t3_intr_msi(int irq, void *cookie)
2649 {
2650 int new_packets = 0;
2651 struct adapter *adap = cookie;
2652 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2653
2654 spin_lock(&q->lock);
2655
2656 if (process_responses(adap, &adap->sge.qs[0], -1)) {
2657 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2658 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2659 new_packets = 1;
2660 }
2661
2662 if (adap->params.nports == 2 &&
2663 process_responses(adap, &adap->sge.qs[1], -1)) {
2664 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2665
2666 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |
2667 V_NEWTIMER(q1->next_holdoff) |
2668 V_NEWINDEX(q1->cidx));
2669 new_packets = 1;
2670 }
2671
2672 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2673 q->unhandled_irqs++;
2674
2675 spin_unlock(&q->lock);
2676 return IRQ_HANDLED;
2677 }
2678
rspq_check_napi(struct sge_qset * qs)2679 static int rspq_check_napi(struct sge_qset *qs)
2680 {
2681 struct sge_rspq *q = &qs->rspq;
2682
2683 if (!napi_is_scheduled(&qs->napi) &&
2684 is_new_response(&q->desc[q->cidx], q)) {
2685 napi_schedule(&qs->napi);
2686 return 1;
2687 }
2688 return 0;
2689 }
2690
2691 /*
2692 * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2693 * by NAPI polling). Handles data events from SGE response queues as well as
2694 * error and other async events as they all use the same MSI vector. We use
2695 * one SGE response queue per port in this mode and protect all response
2696 * queues with queue 0's lock.
2697 */
t3_intr_msi_napi(int irq,void * cookie)2698 static irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
2699 {
2700 int new_packets;
2701 struct adapter *adap = cookie;
2702 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2703
2704 spin_lock(&q->lock);
2705
2706 new_packets = rspq_check_napi(&adap->sge.qs[0]);
2707 if (adap->params.nports == 2)
2708 new_packets += rspq_check_napi(&adap->sge.qs[1]);
2709 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2710 q->unhandled_irqs++;
2711
2712 spin_unlock(&q->lock);
2713 return IRQ_HANDLED;
2714 }
2715
2716 /*
2717 * A helper function that processes responses and issues GTS.
2718 */
process_responses_gts(struct adapter * adap,struct sge_rspq * rq)2719 static inline int process_responses_gts(struct adapter *adap,
2720 struct sge_rspq *rq)
2721 {
2722 int work;
2723
2724 work = process_responses(adap, rspq_to_qset(rq), -1);
2725 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2726 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2727 return work;
2728 }
2729
2730 /*
2731 * The legacy INTx interrupt handler. This needs to handle data events from
2732 * SGE response queues as well as error and other async events as they all use
2733 * the same interrupt pin. We use one SGE response queue per port in this mode
2734 * and protect all response queues with queue 0's lock.
2735 */
t3_intr(int irq,void * cookie)2736 static irqreturn_t t3_intr(int irq, void *cookie)
2737 {
2738 int work_done, w0, w1;
2739 struct adapter *adap = cookie;
2740 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2741 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2742
2743 spin_lock(&q0->lock);
2744
2745 w0 = is_new_response(&q0->desc[q0->cidx], q0);
2746 w1 = adap->params.nports == 2 &&
2747 is_new_response(&q1->desc[q1->cidx], q1);
2748
2749 if (likely(w0 | w1)) {
2750 t3_write_reg(adap, A_PL_CLI, 0);
2751 t3_read_reg(adap, A_PL_CLI); /* flush */
2752
2753 if (likely(w0))
2754 process_responses_gts(adap, q0);
2755
2756 if (w1)
2757 process_responses_gts(adap, q1);
2758
2759 work_done = w0 | w1;
2760 } else
2761 work_done = t3_slow_intr_handler(adap);
2762
2763 spin_unlock(&q0->lock);
2764 return IRQ_RETVAL(work_done != 0);
2765 }
2766
2767 /*
2768 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2769 * Handles data events from SGE response queues as well as error and other
2770 * async events as they all use the same interrupt pin. We use one SGE
2771 * response queue per port in this mode and protect all response queues with
2772 * queue 0's lock.
2773 */
t3b_intr(int irq,void * cookie)2774 static irqreturn_t t3b_intr(int irq, void *cookie)
2775 {
2776 u32 map;
2777 struct adapter *adap = cookie;
2778 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2779
2780 t3_write_reg(adap, A_PL_CLI, 0);
2781 map = t3_read_reg(adap, A_SG_DATA_INTR);
2782
2783 if (unlikely(!map)) /* shared interrupt, most likely */
2784 return IRQ_NONE;
2785
2786 spin_lock(&q0->lock);
2787
2788 if (unlikely(map & F_ERRINTR))
2789 t3_slow_intr_handler(adap);
2790
2791 if (likely(map & 1))
2792 process_responses_gts(adap, q0);
2793
2794 if (map & 2)
2795 process_responses_gts(adap, &adap->sge.qs[1].rspq);
2796
2797 spin_unlock(&q0->lock);
2798 return IRQ_HANDLED;
2799 }
2800
2801 /*
2802 * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2803 * Handles data events from SGE response queues as well as error and other
2804 * async events as they all use the same interrupt pin. We use one SGE
2805 * response queue per port in this mode and protect all response queues with
2806 * queue 0's lock.
2807 */
t3b_intr_napi(int irq,void * cookie)2808 static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2809 {
2810 u32 map;
2811 struct adapter *adap = cookie;
2812 struct sge_qset *qs0 = &adap->sge.qs[0];
2813 struct sge_rspq *q0 = &qs0->rspq;
2814
2815 t3_write_reg(adap, A_PL_CLI, 0);
2816 map = t3_read_reg(adap, A_SG_DATA_INTR);
2817
2818 if (unlikely(!map)) /* shared interrupt, most likely */
2819 return IRQ_NONE;
2820
2821 spin_lock(&q0->lock);
2822
2823 if (unlikely(map & F_ERRINTR))
2824 t3_slow_intr_handler(adap);
2825
2826 if (likely(map & 1))
2827 napi_schedule(&qs0->napi);
2828
2829 if (map & 2)
2830 napi_schedule(&adap->sge.qs[1].napi);
2831
2832 spin_unlock(&q0->lock);
2833 return IRQ_HANDLED;
2834 }
2835
2836 /**
2837 * t3_intr_handler - select the top-level interrupt handler
2838 * @adap: the adapter
2839 * @polling: whether using NAPI to service response queues
2840 *
2841 * Selects the top-level interrupt handler based on the type of interrupts
2842 * (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2843 * response queues.
2844 */
t3_intr_handler(struct adapter * adap,int polling)2845 irq_handler_t t3_intr_handler(struct adapter *adap, int polling)
2846 {
2847 if (adap->flags & USING_MSIX)
2848 return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2849 if (adap->flags & USING_MSI)
2850 return polling ? t3_intr_msi_napi : t3_intr_msi;
2851 if (adap->params.rev > 0)
2852 return polling ? t3b_intr_napi : t3b_intr;
2853 return t3_intr;
2854 }
2855
2856 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
2857 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
2858 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
2859 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
2860 F_HIRCQPARITYERROR)
2861 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
2862 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
2863 F_RSPQDISABLED)
2864
2865 /**
2866 * t3_sge_err_intr_handler - SGE async event interrupt handler
2867 * @adapter: the adapter
2868 *
2869 * Interrupt handler for SGE asynchronous (non-data) events.
2870 */
t3_sge_err_intr_handler(struct adapter * adapter)2871 void t3_sge_err_intr_handler(struct adapter *adapter)
2872 {
2873 unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE) &
2874 ~F_FLEMPTY;
2875
2876 if (status & SGE_PARERR)
2877 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
2878 status & SGE_PARERR);
2879 if (status & SGE_FRAMINGERR)
2880 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
2881 status & SGE_FRAMINGERR);
2882
2883 if (status & F_RSPQCREDITOVERFOW)
2884 CH_ALERT(adapter, "SGE response queue credit overflow\n");
2885
2886 if (status & F_RSPQDISABLED) {
2887 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2888
2889 CH_ALERT(adapter,
2890 "packet delivered to disabled response queue "
2891 "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
2892 }
2893
2894 if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR))
2895 queue_work(cxgb3_wq, &adapter->db_drop_task);
2896
2897 if (status & (F_HIPRIORITYDBFULL | F_LOPRIORITYDBFULL))
2898 queue_work(cxgb3_wq, &adapter->db_full_task);
2899
2900 if (status & (F_HIPRIORITYDBEMPTY | F_LOPRIORITYDBEMPTY))
2901 queue_work(cxgb3_wq, &adapter->db_empty_task);
2902
2903 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
2904 if (status & SGE_FATALERR)
2905 t3_fatal_err(adapter);
2906 }
2907
2908 /**
2909 * sge_timer_tx - perform periodic maintenance of an SGE qset
2910 * @t: a timer list containing the SGE queue set to maintain
2911 *
2912 * Runs periodically from a timer to perform maintenance of an SGE queue
2913 * set. It performs two tasks:
2914 *
2915 * Cleans up any completed Tx descriptors that may still be pending.
2916 * Normal descriptor cleanup happens when new packets are added to a Tx
2917 * queue so this timer is relatively infrequent and does any cleanup only
2918 * if the Tx queue has not seen any new packets in a while. We make a
2919 * best effort attempt to reclaim descriptors, in that we don't wait
2920 * around if we cannot get a queue's lock (which most likely is because
2921 * someone else is queueing new packets and so will also handle the clean
2922 * up). Since control queues use immediate data exclusively we don't
2923 * bother cleaning them up here.
2924 *
2925 */
sge_timer_tx(struct timer_list * t)2926 static void sge_timer_tx(struct timer_list *t)
2927 {
2928 struct sge_qset *qs = from_timer(qs, t, tx_reclaim_timer);
2929 struct port_info *pi = netdev_priv(qs->netdev);
2930 struct adapter *adap = pi->adapter;
2931 unsigned int tbd[SGE_TXQ_PER_SET] = {0, 0};
2932 unsigned long next_period;
2933
2934 if (__netif_tx_trylock(qs->tx_q)) {
2935 tbd[TXQ_ETH] = reclaim_completed_tx(adap, &qs->txq[TXQ_ETH],
2936 TX_RECLAIM_TIMER_CHUNK);
2937 __netif_tx_unlock(qs->tx_q);
2938 }
2939
2940 if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {
2941 tbd[TXQ_OFLD] = reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD],
2942 TX_RECLAIM_TIMER_CHUNK);
2943 spin_unlock(&qs->txq[TXQ_OFLD].lock);
2944 }
2945
2946 next_period = TX_RECLAIM_PERIOD >>
2947 (max(tbd[TXQ_ETH], tbd[TXQ_OFLD]) /
2948 TX_RECLAIM_TIMER_CHUNK);
2949 mod_timer(&qs->tx_reclaim_timer, jiffies + next_period);
2950 }
2951
2952 /**
2953 * sge_timer_rx - perform periodic maintenance of an SGE qset
2954 * @t: the timer list containing the SGE queue set to maintain
2955 *
2956 * a) Replenishes Rx queues that have run out due to memory shortage.
2957 * Normally new Rx buffers are added when existing ones are consumed but
2958 * when out of memory a queue can become empty. We try to add only a few
2959 * buffers here, the queue will be replenished fully as these new buffers
2960 * are used up if memory shortage has subsided.
2961 *
2962 * b) Return coalesced response queue credits in case a response queue is
2963 * starved.
2964 *
2965 */
sge_timer_rx(struct timer_list * t)2966 static void sge_timer_rx(struct timer_list *t)
2967 {
2968 spinlock_t *lock;
2969 struct sge_qset *qs = from_timer(qs, t, rx_reclaim_timer);
2970 struct port_info *pi = netdev_priv(qs->netdev);
2971 struct adapter *adap = pi->adapter;
2972 u32 status;
2973
2974 lock = adap->params.rev > 0 ?
2975 &qs->rspq.lock : &adap->sge.qs[0].rspq.lock;
2976
2977 if (!spin_trylock_irq(lock))
2978 goto out;
2979
2980 if (napi_is_scheduled(&qs->napi))
2981 goto unlock;
2982
2983 if (adap->params.rev < 4) {
2984 status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
2985
2986 if (status & (1 << qs->rspq.cntxt_id)) {
2987 qs->rspq.starved++;
2988 if (qs->rspq.credits) {
2989 qs->rspq.credits--;
2990 refill_rspq(adap, &qs->rspq, 1);
2991 qs->rspq.restarted++;
2992 t3_write_reg(adap, A_SG_RSPQ_FL_STATUS,
2993 1 << qs->rspq.cntxt_id);
2994 }
2995 }
2996 }
2997
2998 if (qs->fl[0].credits < qs->fl[0].size)
2999 __refill_fl(adap, &qs->fl[0]);
3000 if (qs->fl[1].credits < qs->fl[1].size)
3001 __refill_fl(adap, &qs->fl[1]);
3002
3003 unlock:
3004 spin_unlock_irq(lock);
3005 out:
3006 mod_timer(&qs->rx_reclaim_timer, jiffies + RX_RECLAIM_PERIOD);
3007 }
3008
3009 /**
3010 * t3_update_qset_coalesce - update coalescing settings for a queue set
3011 * @qs: the SGE queue set
3012 * @p: new queue set parameters
3013 *
3014 * Update the coalescing settings for an SGE queue set. Nothing is done
3015 * if the queue set is not initialized yet.
3016 */
t3_update_qset_coalesce(struct sge_qset * qs,const struct qset_params * p)3017 void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
3018 {
3019 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
3020 qs->rspq.polling = p->polling;
3021 qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll;
3022 }
3023
3024 /**
3025 * t3_sge_alloc_qset - initialize an SGE queue set
3026 * @adapter: the adapter
3027 * @id: the queue set id
3028 * @nports: how many Ethernet ports will be using this queue set
3029 * @irq_vec_idx: the IRQ vector index for response queue interrupts
3030 * @p: configuration parameters for this queue set
3031 * @ntxq: number of Tx queues for the queue set
3032 * @dev: net device associated with this queue set
3033 * @netdevq: net device TX queue associated with this queue set
3034 *
3035 * Allocate resources and initialize an SGE queue set. A queue set
3036 * comprises a response queue, two Rx free-buffer queues, and up to 3
3037 * Tx queues. The Tx queues are assigned roles in the order Ethernet
3038 * queue, offload queue, and control queue.
3039 */
t3_sge_alloc_qset(struct adapter * adapter,unsigned int id,int nports,int irq_vec_idx,const struct qset_params * p,int ntxq,struct net_device * dev,struct netdev_queue * netdevq)3040 int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
3041 int irq_vec_idx, const struct qset_params *p,
3042 int ntxq, struct net_device *dev,
3043 struct netdev_queue *netdevq)
3044 {
3045 int i, avail, ret = -ENOMEM;
3046 struct sge_qset *q = &adapter->sge.qs[id];
3047
3048 init_qset_cntxt(q, id);
3049 timer_setup(&q->tx_reclaim_timer, sge_timer_tx, 0);
3050 timer_setup(&q->rx_reclaim_timer, sge_timer_rx, 0);
3051
3052 q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
3053 sizeof(struct rx_desc),
3054 sizeof(struct rx_sw_desc),
3055 &q->fl[0].phys_addr, &q->fl[0].sdesc);
3056 if (!q->fl[0].desc)
3057 goto err;
3058
3059 q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,
3060 sizeof(struct rx_desc),
3061 sizeof(struct rx_sw_desc),
3062 &q->fl[1].phys_addr, &q->fl[1].sdesc);
3063 if (!q->fl[1].desc)
3064 goto err;
3065
3066 q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,
3067 sizeof(struct rsp_desc), 0,
3068 &q->rspq.phys_addr, NULL);
3069 if (!q->rspq.desc)
3070 goto err;
3071
3072 for (i = 0; i < ntxq; ++i) {
3073 /*
3074 * The control queue always uses immediate data so does not
3075 * need to keep track of any sk_buffs.
3076 */
3077 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
3078
3079 q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],
3080 sizeof(struct tx_desc), sz,
3081 &q->txq[i].phys_addr,
3082 &q->txq[i].sdesc);
3083 if (!q->txq[i].desc)
3084 goto err;
3085
3086 q->txq[i].gen = 1;
3087 q->txq[i].size = p->txq_size[i];
3088 spin_lock_init(&q->txq[i].lock);
3089 skb_queue_head_init(&q->txq[i].sendq);
3090 }
3091
3092 INIT_WORK(&q->txq[TXQ_OFLD].qresume_task, restart_offloadq);
3093 INIT_WORK(&q->txq[TXQ_CTRL].qresume_task, restart_ctrlq);
3094
3095 q->fl[0].gen = q->fl[1].gen = 1;
3096 q->fl[0].size = p->fl_size;
3097 q->fl[1].size = p->jumbo_size;
3098
3099 q->rspq.gen = 1;
3100 q->rspq.size = p->rspq_size;
3101 spin_lock_init(&q->rspq.lock);
3102 skb_queue_head_init(&q->rspq.rx_queue);
3103
3104 q->txq[TXQ_ETH].stop_thres = nports *
3105 flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
3106
3107 #if FL0_PG_CHUNK_SIZE > 0
3108 q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;
3109 #else
3110 q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
3111 #endif
3112 #if FL1_PG_CHUNK_SIZE > 0
3113 q->fl[1].buf_size = FL1_PG_CHUNK_SIZE;
3114 #else
3115 q->fl[1].buf_size = is_offload(adapter) ?
3116 (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
3117 MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
3118 #endif
3119
3120 q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
3121 q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0;
3122 q->fl[0].order = FL0_PG_ORDER;
3123 q->fl[1].order = FL1_PG_ORDER;
3124 q->fl[0].alloc_size = FL0_PG_ALLOC_SIZE;
3125 q->fl[1].alloc_size = FL1_PG_ALLOC_SIZE;
3126
3127 spin_lock_irq(&adapter->sge.reg_lock);
3128
3129 /* FL threshold comparison uses < */
3130 ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
3131 q->rspq.phys_addr, q->rspq.size,
3132 q->fl[0].buf_size - SGE_PG_RSVD, 1, 0);
3133 if (ret)
3134 goto err_unlock;
3135
3136 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
3137 ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
3138 q->fl[i].phys_addr, q->fl[i].size,
3139 q->fl[i].buf_size - SGE_PG_RSVD,
3140 p->cong_thres, 1, 0);
3141 if (ret)
3142 goto err_unlock;
3143 }
3144
3145 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
3146 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
3147 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
3148 1, 0);
3149 if (ret)
3150 goto err_unlock;
3151
3152 if (ntxq > 1) {
3153 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,
3154 USE_GTS, SGE_CNTXT_OFLD, id,
3155 q->txq[TXQ_OFLD].phys_addr,
3156 q->txq[TXQ_OFLD].size, 0, 1, 0);
3157 if (ret)
3158 goto err_unlock;
3159 }
3160
3161 if (ntxq > 2) {
3162 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,
3163 SGE_CNTXT_CTRL, id,
3164 q->txq[TXQ_CTRL].phys_addr,
3165 q->txq[TXQ_CTRL].size,
3166 q->txq[TXQ_CTRL].token, 1, 0);
3167 if (ret)
3168 goto err_unlock;
3169 }
3170
3171 spin_unlock_irq(&adapter->sge.reg_lock);
3172
3173 q->adap = adapter;
3174 q->netdev = dev;
3175 q->tx_q = netdevq;
3176 t3_update_qset_coalesce(q, p);
3177
3178 avail = refill_fl(adapter, &q->fl[0], q->fl[0].size,
3179 GFP_KERNEL | __GFP_COMP);
3180 if (!avail) {
3181 CH_ALERT(adapter, "free list queue 0 initialization failed\n");
3182 ret = -ENOMEM;
3183 goto err;
3184 }
3185 if (avail < q->fl[0].size)
3186 CH_WARN(adapter, "free list queue 0 enabled with %d credits\n",
3187 avail);
3188
3189 avail = refill_fl(adapter, &q->fl[1], q->fl[1].size,
3190 GFP_KERNEL | __GFP_COMP);
3191 if (avail < q->fl[1].size)
3192 CH_WARN(adapter, "free list queue 1 enabled with %d credits\n",
3193 avail);
3194 refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
3195
3196 t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
3197 V_NEWTIMER(q->rspq.holdoff_tmr));
3198
3199 return 0;
3200
3201 err_unlock:
3202 spin_unlock_irq(&adapter->sge.reg_lock);
3203 err:
3204 t3_free_qset(adapter, q);
3205 return ret;
3206 }
3207
3208 /**
3209 * t3_start_sge_timers - start SGE timer call backs
3210 * @adap: the adapter
3211 *
3212 * Starts each SGE queue set's timer call back
3213 */
t3_start_sge_timers(struct adapter * adap)3214 void t3_start_sge_timers(struct adapter *adap)
3215 {
3216 int i;
3217
3218 for (i = 0; i < SGE_QSETS; ++i) {
3219 struct sge_qset *q = &adap->sge.qs[i];
3220
3221 if (q->tx_reclaim_timer.function)
3222 mod_timer(&q->tx_reclaim_timer,
3223 jiffies + TX_RECLAIM_PERIOD);
3224
3225 if (q->rx_reclaim_timer.function)
3226 mod_timer(&q->rx_reclaim_timer,
3227 jiffies + RX_RECLAIM_PERIOD);
3228 }
3229 }
3230
3231 /**
3232 * t3_stop_sge_timers - stop SGE timer call backs
3233 * @adap: the adapter
3234 *
3235 * Stops each SGE queue set's timer call back
3236 */
t3_stop_sge_timers(struct adapter * adap)3237 void t3_stop_sge_timers(struct adapter *adap)
3238 {
3239 int i;
3240
3241 for (i = 0; i < SGE_QSETS; ++i) {
3242 struct sge_qset *q = &adap->sge.qs[i];
3243
3244 if (q->tx_reclaim_timer.function)
3245 del_timer_sync(&q->tx_reclaim_timer);
3246 if (q->rx_reclaim_timer.function)
3247 del_timer_sync(&q->rx_reclaim_timer);
3248 }
3249 }
3250
3251 /**
3252 * t3_free_sge_resources - free SGE resources
3253 * @adap: the adapter
3254 *
3255 * Frees resources used by the SGE queue sets.
3256 */
t3_free_sge_resources(struct adapter * adap)3257 void t3_free_sge_resources(struct adapter *adap)
3258 {
3259 int i;
3260
3261 for (i = 0; i < SGE_QSETS; ++i)
3262 t3_free_qset(adap, &adap->sge.qs[i]);
3263 }
3264
3265 /**
3266 * t3_sge_start - enable SGE
3267 * @adap: the adapter
3268 *
3269 * Enables the SGE for DMAs. This is the last step in starting packet
3270 * transfers.
3271 */
t3_sge_start(struct adapter * adap)3272 void t3_sge_start(struct adapter *adap)
3273 {
3274 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
3275 }
3276
3277 /**
3278 * t3_sge_stop_dma - Disable SGE DMA engine operation
3279 * @adap: the adapter
3280 *
3281 * Can be invoked from interrupt context e.g. error handler.
3282 *
3283 * Note that this function cannot disable the restart of works as
3284 * it cannot wait if called from interrupt context, however the
3285 * works will have no effect since the doorbells are disabled. The
3286 * driver will call tg3_sge_stop() later from process context, at
3287 * which time the works will be stopped if they are still running.
3288 */
t3_sge_stop_dma(struct adapter * adap)3289 void t3_sge_stop_dma(struct adapter *adap)
3290 {
3291 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
3292 }
3293
3294 /**
3295 * t3_sge_stop - disable SGE operation completly
3296 * @adap: the adapter
3297 *
3298 * Called from process context. Disables the DMA engine and any
3299 * pending queue restart works.
3300 */
t3_sge_stop(struct adapter * adap)3301 void t3_sge_stop(struct adapter *adap)
3302 {
3303 int i;
3304
3305 t3_sge_stop_dma(adap);
3306
3307 /* workqueues aren't initialized otherwise */
3308 if (!(adap->flags & FULL_INIT_DONE))
3309 return;
3310 for (i = 0; i < SGE_QSETS; ++i) {
3311 struct sge_qset *qs = &adap->sge.qs[i];
3312
3313 cancel_work_sync(&qs->txq[TXQ_OFLD].qresume_task);
3314 cancel_work_sync(&qs->txq[TXQ_CTRL].qresume_task);
3315 }
3316 }
3317
3318 /**
3319 * t3_sge_init - initialize SGE
3320 * @adap: the adapter
3321 * @p: the SGE parameters
3322 *
3323 * Performs SGE initialization needed every time after a chip reset.
3324 * We do not initialize any of the queue sets here, instead the driver
3325 * top-level must request those individually. We also do not enable DMA
3326 * here, that should be done after the queues have been set up.
3327 */
t3_sge_init(struct adapter * adap,struct sge_params * p)3328 void t3_sge_init(struct adapter *adap, struct sge_params *p)
3329 {
3330 unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);
3331
3332 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
3333 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
3334 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
3335 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
3336 #if SGE_NUM_GENBITS == 1
3337 ctrl |= F_EGRGENCTRL;
3338 #endif
3339 if (adap->params.rev > 0) {
3340 if (!(adap->flags & (USING_MSIX | USING_MSI)))
3341 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
3342 }
3343 t3_write_reg(adap, A_SG_CONTROL, ctrl);
3344 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
3345 V_LORCQDRBTHRSH(512));
3346 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
3347 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
3348 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
3349 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
3350 adap->params.rev < T3_REV_C ? 1000 : 500);
3351 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
3352 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
3353 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
3354 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
3355 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
3356 }
3357
3358 /**
3359 * t3_sge_prep - one-time SGE initialization
3360 * @adap: the associated adapter
3361 * @p: SGE parameters
3362 *
3363 * Performs one-time initialization of SGE SW state. Includes determining
3364 * defaults for the assorted SGE parameters, which admins can change until
3365 * they are used to initialize the SGE.
3366 */
t3_sge_prep(struct adapter * adap,struct sge_params * p)3367 void t3_sge_prep(struct adapter *adap, struct sge_params *p)
3368 {
3369 int i;
3370
3371 p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -
3372 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
3373
3374 for (i = 0; i < SGE_QSETS; ++i) {
3375 struct qset_params *q = p->qset + i;
3376
3377 q->polling = adap->params.rev > 0;
3378 q->coalesce_usecs = 5;
3379 q->rspq_size = 1024;
3380 q->fl_size = 1024;
3381 q->jumbo_size = 512;
3382 q->txq_size[TXQ_ETH] = 1024;
3383 q->txq_size[TXQ_OFLD] = 1024;
3384 q->txq_size[TXQ_CTRL] = 256;
3385 q->cong_thres = 0;
3386 }
3387
3388 spin_lock_init(&adap->sge.reg_lock);
3389 }
3390