1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3 *
4 * Copyright (C) 2015-2021 Google, Inc.
5 */
6
7 #include "gve.h"
8 #include "gve_dqo.h"
9 #include "gve_adminq.h"
10 #include "gve_utils.h"
11 #include <linux/ip.h>
12 #include <linux/ipv6.h>
13 #include <linux/skbuff.h>
14 #include <linux/slab.h>
15 #include <net/ip6_checksum.h>
16 #include <net/ipv6.h>
17 #include <net/tcp.h>
18
gve_buf_ref_cnt(struct gve_rx_buf_state_dqo * bs)19 static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs)
20 {
21 return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias;
22 }
23
gve_free_page_dqo(struct gve_priv * priv,struct gve_rx_buf_state_dqo * bs)24 static void gve_free_page_dqo(struct gve_priv *priv,
25 struct gve_rx_buf_state_dqo *bs)
26 {
27 page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1);
28 gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr,
29 DMA_FROM_DEVICE);
30 bs->page_info.page = NULL;
31 }
32
gve_alloc_buf_state(struct gve_rx_ring * rx)33 static struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx)
34 {
35 struct gve_rx_buf_state_dqo *buf_state;
36 s16 buffer_id;
37
38 buffer_id = rx->dqo.free_buf_states;
39 if (unlikely(buffer_id == -1))
40 return NULL;
41
42 buf_state = &rx->dqo.buf_states[buffer_id];
43
44 /* Remove buf_state from free list */
45 rx->dqo.free_buf_states = buf_state->next;
46
47 /* Point buf_state to itself to mark it as allocated */
48 buf_state->next = buffer_id;
49
50 return buf_state;
51 }
52
gve_buf_state_is_allocated(struct gve_rx_ring * rx,struct gve_rx_buf_state_dqo * buf_state)53 static bool gve_buf_state_is_allocated(struct gve_rx_ring *rx,
54 struct gve_rx_buf_state_dqo *buf_state)
55 {
56 s16 buffer_id = buf_state - rx->dqo.buf_states;
57
58 return buf_state->next == buffer_id;
59 }
60
gve_free_buf_state(struct gve_rx_ring * rx,struct gve_rx_buf_state_dqo * buf_state)61 static void gve_free_buf_state(struct gve_rx_ring *rx,
62 struct gve_rx_buf_state_dqo *buf_state)
63 {
64 s16 buffer_id = buf_state - rx->dqo.buf_states;
65
66 buf_state->next = rx->dqo.free_buf_states;
67 rx->dqo.free_buf_states = buffer_id;
68 }
69
70 static struct gve_rx_buf_state_dqo *
gve_dequeue_buf_state(struct gve_rx_ring * rx,struct gve_index_list * list)71 gve_dequeue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list)
72 {
73 struct gve_rx_buf_state_dqo *buf_state;
74 s16 buffer_id;
75
76 buffer_id = list->head;
77 if (unlikely(buffer_id == -1))
78 return NULL;
79
80 buf_state = &rx->dqo.buf_states[buffer_id];
81
82 /* Remove buf_state from list */
83 list->head = buf_state->next;
84 if (buf_state->next == -1)
85 list->tail = -1;
86
87 /* Point buf_state to itself to mark it as allocated */
88 buf_state->next = buffer_id;
89
90 return buf_state;
91 }
92
gve_enqueue_buf_state(struct gve_rx_ring * rx,struct gve_index_list * list,struct gve_rx_buf_state_dqo * buf_state)93 static void gve_enqueue_buf_state(struct gve_rx_ring *rx,
94 struct gve_index_list *list,
95 struct gve_rx_buf_state_dqo *buf_state)
96 {
97 s16 buffer_id = buf_state - rx->dqo.buf_states;
98
99 buf_state->next = -1;
100
101 if (list->head == -1) {
102 list->head = buffer_id;
103 list->tail = buffer_id;
104 } else {
105 int tail = list->tail;
106
107 rx->dqo.buf_states[tail].next = buffer_id;
108 list->tail = buffer_id;
109 }
110 }
111
112 static struct gve_rx_buf_state_dqo *
gve_get_recycled_buf_state(struct gve_rx_ring * rx)113 gve_get_recycled_buf_state(struct gve_rx_ring *rx)
114 {
115 struct gve_rx_buf_state_dqo *buf_state;
116 int i;
117
118 /* Recycled buf states are immediately usable. */
119 buf_state = gve_dequeue_buf_state(rx, &rx->dqo.recycled_buf_states);
120 if (likely(buf_state))
121 return buf_state;
122
123 if (unlikely(rx->dqo.used_buf_states.head == -1))
124 return NULL;
125
126 /* Used buf states are only usable when ref count reaches 0, which means
127 * no SKBs refer to them.
128 *
129 * Search a limited number before giving up.
130 */
131 for (i = 0; i < 5; i++) {
132 buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
133 if (gve_buf_ref_cnt(buf_state) == 0)
134 return buf_state;
135
136 gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
137 }
138
139 /* If there are no free buf states discard an entry from
140 * `used_buf_states` so it can be used.
141 */
142 if (unlikely(rx->dqo.free_buf_states == -1)) {
143 buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
144 if (gve_buf_ref_cnt(buf_state) == 0)
145 return buf_state;
146
147 gve_free_page_dqo(rx->gve, buf_state);
148 gve_free_buf_state(rx, buf_state);
149 }
150
151 return NULL;
152 }
153
gve_alloc_page_dqo(struct gve_priv * priv,struct gve_rx_buf_state_dqo * buf_state)154 static int gve_alloc_page_dqo(struct gve_priv *priv,
155 struct gve_rx_buf_state_dqo *buf_state)
156 {
157 int err;
158
159 err = gve_alloc_page(priv, &priv->pdev->dev, &buf_state->page_info.page,
160 &buf_state->addr, DMA_FROM_DEVICE, GFP_KERNEL);
161 if (err)
162 return err;
163
164 buf_state->page_info.page_offset = 0;
165 buf_state->page_info.page_address =
166 page_address(buf_state->page_info.page);
167 buf_state->last_single_ref_offset = 0;
168
169 /* The page already has 1 ref. */
170 page_ref_add(buf_state->page_info.page, INT_MAX - 1);
171 buf_state->page_info.pagecnt_bias = INT_MAX;
172
173 return 0;
174 }
175
gve_rx_free_ring_dqo(struct gve_priv * priv,int idx)176 static void gve_rx_free_ring_dqo(struct gve_priv *priv, int idx)
177 {
178 struct gve_rx_ring *rx = &priv->rx[idx];
179 struct device *hdev = &priv->pdev->dev;
180 size_t completion_queue_slots;
181 size_t buffer_queue_slots;
182 size_t size;
183 int i;
184
185 completion_queue_slots = rx->dqo.complq.mask + 1;
186 buffer_queue_slots = rx->dqo.bufq.mask + 1;
187
188 gve_rx_remove_from_block(priv, idx);
189
190 if (rx->q_resources) {
191 dma_free_coherent(hdev, sizeof(*rx->q_resources),
192 rx->q_resources, rx->q_resources_bus);
193 rx->q_resources = NULL;
194 }
195
196 for (i = 0; i < rx->dqo.num_buf_states; i++) {
197 struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
198
199 if (bs->page_info.page)
200 gve_free_page_dqo(priv, bs);
201 }
202
203 if (rx->dqo.bufq.desc_ring) {
204 size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
205 dma_free_coherent(hdev, size, rx->dqo.bufq.desc_ring,
206 rx->dqo.bufq.bus);
207 rx->dqo.bufq.desc_ring = NULL;
208 }
209
210 if (rx->dqo.complq.desc_ring) {
211 size = sizeof(rx->dqo.complq.desc_ring[0]) *
212 completion_queue_slots;
213 dma_free_coherent(hdev, size, rx->dqo.complq.desc_ring,
214 rx->dqo.complq.bus);
215 rx->dqo.complq.desc_ring = NULL;
216 }
217
218 kvfree(rx->dqo.buf_states);
219 rx->dqo.buf_states = NULL;
220
221 netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
222 }
223
gve_rx_alloc_ring_dqo(struct gve_priv * priv,int idx)224 static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx)
225 {
226 struct gve_rx_ring *rx = &priv->rx[idx];
227 struct device *hdev = &priv->pdev->dev;
228 size_t size;
229 int i;
230
231 const u32 buffer_queue_slots =
232 priv->options_dqo_rda.rx_buff_ring_entries;
233 const u32 completion_queue_slots = priv->rx_desc_cnt;
234
235 netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n");
236
237 memset(rx, 0, sizeof(*rx));
238 rx->gve = priv;
239 rx->q_num = idx;
240 rx->dqo.bufq.mask = buffer_queue_slots - 1;
241 rx->dqo.complq.num_free_slots = completion_queue_slots;
242 rx->dqo.complq.mask = completion_queue_slots - 1;
243 rx->ctx.skb_head = NULL;
244 rx->ctx.skb_tail = NULL;
245
246 rx->dqo.num_buf_states = min_t(s16, S16_MAX, buffer_queue_slots * 4);
247 rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states,
248 sizeof(rx->dqo.buf_states[0]),
249 GFP_KERNEL);
250 if (!rx->dqo.buf_states)
251 return -ENOMEM;
252
253 /* Set up linked list of buffer IDs */
254 for (i = 0; i < rx->dqo.num_buf_states - 1; i++)
255 rx->dqo.buf_states[i].next = i + 1;
256
257 rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1;
258 rx->dqo.recycled_buf_states.head = -1;
259 rx->dqo.recycled_buf_states.tail = -1;
260 rx->dqo.used_buf_states.head = -1;
261 rx->dqo.used_buf_states.tail = -1;
262
263 /* Allocate RX completion queue */
264 size = sizeof(rx->dqo.complq.desc_ring[0]) *
265 completion_queue_slots;
266 rx->dqo.complq.desc_ring =
267 dma_alloc_coherent(hdev, size, &rx->dqo.complq.bus, GFP_KERNEL);
268 if (!rx->dqo.complq.desc_ring)
269 goto err;
270
271 /* Allocate RX buffer queue */
272 size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
273 rx->dqo.bufq.desc_ring =
274 dma_alloc_coherent(hdev, size, &rx->dqo.bufq.bus, GFP_KERNEL);
275 if (!rx->dqo.bufq.desc_ring)
276 goto err;
277
278 rx->q_resources = dma_alloc_coherent(hdev, sizeof(*rx->q_resources),
279 &rx->q_resources_bus, GFP_KERNEL);
280 if (!rx->q_resources)
281 goto err;
282
283 gve_rx_add_to_block(priv, idx);
284
285 return 0;
286
287 err:
288 gve_rx_free_ring_dqo(priv, idx);
289 return -ENOMEM;
290 }
291
gve_rx_write_doorbell_dqo(const struct gve_priv * priv,int queue_idx)292 void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx)
293 {
294 const struct gve_rx_ring *rx = &priv->rx[queue_idx];
295 u64 index = be32_to_cpu(rx->q_resources->db_index);
296
297 iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]);
298 }
299
gve_rx_alloc_rings_dqo(struct gve_priv * priv)300 int gve_rx_alloc_rings_dqo(struct gve_priv *priv)
301 {
302 int err = 0;
303 int i;
304
305 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
306 err = gve_rx_alloc_ring_dqo(priv, i);
307 if (err) {
308 netif_err(priv, drv, priv->dev,
309 "Failed to alloc rx ring=%d: err=%d\n",
310 i, err);
311 goto err;
312 }
313 }
314
315 return 0;
316
317 err:
318 for (i--; i >= 0; i--)
319 gve_rx_free_ring_dqo(priv, i);
320
321 return err;
322 }
323
gve_rx_free_rings_dqo(struct gve_priv * priv)324 void gve_rx_free_rings_dqo(struct gve_priv *priv)
325 {
326 int i;
327
328 for (i = 0; i < priv->rx_cfg.num_queues; i++)
329 gve_rx_free_ring_dqo(priv, i);
330 }
331
gve_rx_post_buffers_dqo(struct gve_rx_ring * rx)332 void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
333 {
334 struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
335 struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
336 struct gve_priv *priv = rx->gve;
337 u32 num_avail_slots;
338 u32 num_full_slots;
339 u32 num_posted = 0;
340
341 num_full_slots = (bufq->tail - bufq->head) & bufq->mask;
342 num_avail_slots = bufq->mask - num_full_slots;
343
344 num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots);
345 while (num_posted < num_avail_slots) {
346 struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail];
347 struct gve_rx_buf_state_dqo *buf_state;
348
349 buf_state = gve_get_recycled_buf_state(rx);
350 if (unlikely(!buf_state)) {
351 buf_state = gve_alloc_buf_state(rx);
352 if (unlikely(!buf_state))
353 break;
354
355 if (unlikely(gve_alloc_page_dqo(priv, buf_state))) {
356 u64_stats_update_begin(&rx->statss);
357 rx->rx_buf_alloc_fail++;
358 u64_stats_update_end(&rx->statss);
359 gve_free_buf_state(rx, buf_state);
360 break;
361 }
362 }
363
364 desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states);
365 desc->buf_addr = cpu_to_le64(buf_state->addr +
366 buf_state->page_info.page_offset);
367
368 bufq->tail = (bufq->tail + 1) & bufq->mask;
369 complq->num_free_slots--;
370 num_posted++;
371
372 if ((bufq->tail & (GVE_RX_BUF_THRESH_DQO - 1)) == 0)
373 gve_rx_write_doorbell_dqo(priv, rx->q_num);
374 }
375
376 rx->fill_cnt += num_posted;
377 }
378
gve_try_recycle_buf(struct gve_priv * priv,struct gve_rx_ring * rx,struct gve_rx_buf_state_dqo * buf_state)379 static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx,
380 struct gve_rx_buf_state_dqo *buf_state)
381 {
382 const int data_buffer_size = priv->data_buffer_size_dqo;
383 int pagecount;
384
385 /* Can't reuse if we only fit one buffer per page */
386 if (data_buffer_size * 2 > PAGE_SIZE)
387 goto mark_used;
388
389 pagecount = gve_buf_ref_cnt(buf_state);
390
391 /* Record the offset when we have a single remaining reference.
392 *
393 * When this happens, we know all of the other offsets of the page are
394 * usable.
395 */
396 if (pagecount == 1) {
397 buf_state->last_single_ref_offset =
398 buf_state->page_info.page_offset;
399 }
400
401 /* Use the next buffer sized chunk in the page. */
402 buf_state->page_info.page_offset += data_buffer_size;
403 buf_state->page_info.page_offset &= (PAGE_SIZE - 1);
404
405 /* If we wrap around to the same offset without ever dropping to 1
406 * reference, then we don't know if this offset was ever freed.
407 */
408 if (buf_state->page_info.page_offset ==
409 buf_state->last_single_ref_offset) {
410 goto mark_used;
411 }
412
413 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
414 return;
415
416 mark_used:
417 gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
418 }
419
gve_rx_skb_csum(struct sk_buff * skb,const struct gve_rx_compl_desc_dqo * desc,struct gve_ptype ptype)420 static void gve_rx_skb_csum(struct sk_buff *skb,
421 const struct gve_rx_compl_desc_dqo *desc,
422 struct gve_ptype ptype)
423 {
424 skb->ip_summed = CHECKSUM_NONE;
425
426 /* HW did not identify and process L3 and L4 headers. */
427 if (unlikely(!desc->l3_l4_processed))
428 return;
429
430 if (ptype.l3_type == GVE_L3_TYPE_IPV4) {
431 if (unlikely(desc->csum_ip_err || desc->csum_external_ip_err))
432 return;
433 } else if (ptype.l3_type == GVE_L3_TYPE_IPV6) {
434 /* Checksum should be skipped if this flag is set. */
435 if (unlikely(desc->ipv6_ex_add))
436 return;
437 }
438
439 if (unlikely(desc->csum_l4_err))
440 return;
441
442 switch (ptype.l4_type) {
443 case GVE_L4_TYPE_TCP:
444 case GVE_L4_TYPE_UDP:
445 case GVE_L4_TYPE_ICMP:
446 case GVE_L4_TYPE_SCTP:
447 skb->ip_summed = CHECKSUM_UNNECESSARY;
448 break;
449 default:
450 break;
451 }
452 }
453
gve_rx_skb_hash(struct sk_buff * skb,const struct gve_rx_compl_desc_dqo * compl_desc,struct gve_ptype ptype)454 static void gve_rx_skb_hash(struct sk_buff *skb,
455 const struct gve_rx_compl_desc_dqo *compl_desc,
456 struct gve_ptype ptype)
457 {
458 enum pkt_hash_types hash_type = PKT_HASH_TYPE_L2;
459
460 if (ptype.l4_type != GVE_L4_TYPE_UNKNOWN)
461 hash_type = PKT_HASH_TYPE_L4;
462 else if (ptype.l3_type != GVE_L3_TYPE_UNKNOWN)
463 hash_type = PKT_HASH_TYPE_L3;
464
465 skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type);
466 }
467
gve_rx_free_skb(struct gve_rx_ring * rx)468 static void gve_rx_free_skb(struct gve_rx_ring *rx)
469 {
470 if (!rx->ctx.skb_head)
471 return;
472
473 dev_kfree_skb_any(rx->ctx.skb_head);
474 rx->ctx.skb_head = NULL;
475 rx->ctx.skb_tail = NULL;
476 }
477
478 /* Chains multi skbs for single rx packet.
479 * Returns 0 if buffer is appended, -1 otherwise.
480 */
gve_rx_append_frags(struct napi_struct * napi,struct gve_rx_buf_state_dqo * buf_state,u16 buf_len,struct gve_rx_ring * rx,struct gve_priv * priv)481 static int gve_rx_append_frags(struct napi_struct *napi,
482 struct gve_rx_buf_state_dqo *buf_state,
483 u16 buf_len, struct gve_rx_ring *rx,
484 struct gve_priv *priv)
485 {
486 int num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
487
488 if (unlikely(num_frags == MAX_SKB_FRAGS)) {
489 struct sk_buff *skb;
490
491 skb = napi_alloc_skb(napi, 0);
492 if (!skb)
493 return -1;
494
495 skb_shinfo(rx->ctx.skb_tail)->frag_list = skb;
496 rx->ctx.skb_tail = skb;
497 num_frags = 0;
498 }
499 if (rx->ctx.skb_tail != rx->ctx.skb_head) {
500 rx->ctx.skb_head->len += buf_len;
501 rx->ctx.skb_head->data_len += buf_len;
502 rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo;
503 }
504
505 skb_add_rx_frag(rx->ctx.skb_tail, num_frags,
506 buf_state->page_info.page,
507 buf_state->page_info.page_offset,
508 buf_len, priv->data_buffer_size_dqo);
509 gve_dec_pagecnt_bias(&buf_state->page_info);
510
511 return 0;
512 }
513
514 /* Returns 0 if descriptor is completed successfully.
515 * Returns -EINVAL if descriptor is invalid.
516 * Returns -ENOMEM if data cannot be copied to skb.
517 */
gve_rx_dqo(struct napi_struct * napi,struct gve_rx_ring * rx,const struct gve_rx_compl_desc_dqo * compl_desc,int queue_idx)518 static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
519 const struct gve_rx_compl_desc_dqo *compl_desc,
520 int queue_idx)
521 {
522 const u16 buffer_id = le16_to_cpu(compl_desc->buf_id);
523 const bool eop = compl_desc->end_of_packet != 0;
524 struct gve_rx_buf_state_dqo *buf_state;
525 struct gve_priv *priv = rx->gve;
526 u16 buf_len;
527
528 if (unlikely(buffer_id >= rx->dqo.num_buf_states)) {
529 net_err_ratelimited("%s: Invalid RX buffer_id=%u\n",
530 priv->dev->name, buffer_id);
531 return -EINVAL;
532 }
533 buf_state = &rx->dqo.buf_states[buffer_id];
534 if (unlikely(!gve_buf_state_is_allocated(rx, buf_state))) {
535 net_err_ratelimited("%s: RX buffer_id is not allocated: %u\n",
536 priv->dev->name, buffer_id);
537 return -EINVAL;
538 }
539
540 if (unlikely(compl_desc->rx_error)) {
541 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
542 buf_state);
543 return -EINVAL;
544 }
545
546 buf_len = compl_desc->packet_len;
547
548 /* Page might have not been used for awhile and was likely last written
549 * by a different thread.
550 */
551 prefetch(buf_state->page_info.page);
552
553 /* Sync the portion of dma buffer for CPU to read. */
554 dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr,
555 buf_state->page_info.page_offset,
556 buf_len, DMA_FROM_DEVICE);
557
558 /* Append to current skb if one exists. */
559 if (rx->ctx.skb_head) {
560 if (unlikely(gve_rx_append_frags(napi, buf_state, buf_len, rx,
561 priv)) != 0) {
562 goto error;
563 }
564
565 gve_try_recycle_buf(priv, rx, buf_state);
566 return 0;
567 }
568
569 if (eop && buf_len <= priv->rx_copybreak) {
570 rx->ctx.skb_head = gve_rx_copy(priv->dev, napi,
571 &buf_state->page_info, buf_len, 0, NULL);
572 if (unlikely(!rx->ctx.skb_head))
573 goto error;
574 rx->ctx.skb_tail = rx->ctx.skb_head;
575
576 u64_stats_update_begin(&rx->statss);
577 rx->rx_copied_pkt++;
578 rx->rx_copybreak_pkt++;
579 u64_stats_update_end(&rx->statss);
580
581 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
582 buf_state);
583 return 0;
584 }
585
586 rx->ctx.skb_head = napi_get_frags(napi);
587 if (unlikely(!rx->ctx.skb_head))
588 goto error;
589 rx->ctx.skb_tail = rx->ctx.skb_head;
590
591 skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page,
592 buf_state->page_info.page_offset, buf_len,
593 priv->data_buffer_size_dqo);
594 gve_dec_pagecnt_bias(&buf_state->page_info);
595
596 gve_try_recycle_buf(priv, rx, buf_state);
597 return 0;
598
599 error:
600 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
601 return -ENOMEM;
602 }
603
gve_rx_complete_rsc(struct sk_buff * skb,const struct gve_rx_compl_desc_dqo * desc,struct gve_ptype ptype)604 static int gve_rx_complete_rsc(struct sk_buff *skb,
605 const struct gve_rx_compl_desc_dqo *desc,
606 struct gve_ptype ptype)
607 {
608 struct skb_shared_info *shinfo = skb_shinfo(skb);
609
610 /* Only TCP is supported right now. */
611 if (ptype.l4_type != GVE_L4_TYPE_TCP)
612 return -EINVAL;
613
614 switch (ptype.l3_type) {
615 case GVE_L3_TYPE_IPV4:
616 shinfo->gso_type = SKB_GSO_TCPV4;
617 break;
618 case GVE_L3_TYPE_IPV6:
619 shinfo->gso_type = SKB_GSO_TCPV6;
620 break;
621 default:
622 return -EINVAL;
623 }
624
625 shinfo->gso_size = le16_to_cpu(desc->rsc_seg_len);
626 return 0;
627 }
628
629 /* Returns 0 if skb is completed successfully, -1 otherwise. */
gve_rx_complete_skb(struct gve_rx_ring * rx,struct napi_struct * napi,const struct gve_rx_compl_desc_dqo * desc,netdev_features_t feat)630 static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi,
631 const struct gve_rx_compl_desc_dqo *desc,
632 netdev_features_t feat)
633 {
634 struct gve_ptype ptype =
635 rx->gve->ptype_lut_dqo->ptypes[desc->packet_type];
636 int err;
637
638 skb_record_rx_queue(rx->ctx.skb_head, rx->q_num);
639
640 if (feat & NETIF_F_RXHASH)
641 gve_rx_skb_hash(rx->ctx.skb_head, desc, ptype);
642
643 if (feat & NETIF_F_RXCSUM)
644 gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype);
645
646 /* RSC packets must set gso_size otherwise the TCP stack will complain
647 * that packets are larger than MTU.
648 */
649 if (desc->rsc) {
650 err = gve_rx_complete_rsc(rx->ctx.skb_head, desc, ptype);
651 if (err < 0)
652 return err;
653 }
654
655 if (skb_headlen(rx->ctx.skb_head) == 0)
656 napi_gro_frags(napi);
657 else
658 napi_gro_receive(napi, rx->ctx.skb_head);
659
660 return 0;
661 }
662
gve_rx_poll_dqo(struct gve_notify_block * block,int budget)663 int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
664 {
665 struct napi_struct *napi = &block->napi;
666 netdev_features_t feat = napi->dev->features;
667
668 struct gve_rx_ring *rx = block->rx;
669 struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
670
671 u32 work_done = 0;
672 u64 bytes = 0;
673 int err;
674
675 while (work_done < budget) {
676 struct gve_rx_compl_desc_dqo *compl_desc =
677 &complq->desc_ring[complq->head];
678 u32 pkt_bytes;
679
680 /* No more new packets */
681 if (compl_desc->generation == complq->cur_gen_bit)
682 break;
683
684 /* Prefetch the next two descriptors. */
685 prefetch(&complq->desc_ring[(complq->head + 1) & complq->mask]);
686 prefetch(&complq->desc_ring[(complq->head + 2) & complq->mask]);
687
688 /* Do not read data until we own the descriptor */
689 dma_rmb();
690
691 err = gve_rx_dqo(napi, rx, compl_desc, rx->q_num);
692 if (err < 0) {
693 gve_rx_free_skb(rx);
694 u64_stats_update_begin(&rx->statss);
695 if (err == -ENOMEM)
696 rx->rx_skb_alloc_fail++;
697 else if (err == -EINVAL)
698 rx->rx_desc_err_dropped_pkt++;
699 u64_stats_update_end(&rx->statss);
700 }
701
702 complq->head = (complq->head + 1) & complq->mask;
703 complq->num_free_slots++;
704
705 /* When the ring wraps, the generation bit is flipped. */
706 complq->cur_gen_bit ^= (complq->head == 0);
707
708 /* Receiving a completion means we have space to post another
709 * buffer on the buffer queue.
710 */
711 {
712 struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
713
714 bufq->head = (bufq->head + 1) & bufq->mask;
715 }
716
717 /* Free running counter of completed descriptors */
718 rx->cnt++;
719
720 if (!rx->ctx.skb_head)
721 continue;
722
723 if (!compl_desc->end_of_packet)
724 continue;
725
726 work_done++;
727 pkt_bytes = rx->ctx.skb_head->len;
728 /* The ethernet header (first ETH_HLEN bytes) is snipped off
729 * by eth_type_trans.
730 */
731 if (skb_headlen(rx->ctx.skb_head))
732 pkt_bytes += ETH_HLEN;
733
734 /* gve_rx_complete_skb() will consume skb if successful */
735 if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) {
736 gve_rx_free_skb(rx);
737 u64_stats_update_begin(&rx->statss);
738 rx->rx_desc_err_dropped_pkt++;
739 u64_stats_update_end(&rx->statss);
740 continue;
741 }
742
743 bytes += pkt_bytes;
744 rx->ctx.skb_head = NULL;
745 rx->ctx.skb_tail = NULL;
746 }
747
748 gve_rx_post_buffers_dqo(rx);
749
750 u64_stats_update_begin(&rx->statss);
751 rx->rpackets += work_done;
752 rx->rbytes += bytes;
753 u64_stats_update_end(&rx->statss);
754
755 return work_done;
756 }
757