1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3  *
4  * Copyright (C) 2015-2021 Google, Inc.
5  */
6 
7 #include "gve.h"
8 #include "gve_dqo.h"
9 #include "gve_adminq.h"
10 #include "gve_utils.h"
11 #include <linux/ip.h>
12 #include <linux/ipv6.h>
13 #include <linux/skbuff.h>
14 #include <linux/slab.h>
15 #include <net/ip6_checksum.h>
16 #include <net/ipv6.h>
17 #include <net/tcp.h>
18 
gve_buf_ref_cnt(struct gve_rx_buf_state_dqo * bs)19 static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs)
20 {
21 	return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias;
22 }
23 
gve_free_page_dqo(struct gve_priv * priv,struct gve_rx_buf_state_dqo * bs)24 static void gve_free_page_dqo(struct gve_priv *priv,
25 			      struct gve_rx_buf_state_dqo *bs)
26 {
27 	page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1);
28 	gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr,
29 		      DMA_FROM_DEVICE);
30 	bs->page_info.page = NULL;
31 }
32 
gve_alloc_buf_state(struct gve_rx_ring * rx)33 static struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx)
34 {
35 	struct gve_rx_buf_state_dqo *buf_state;
36 	s16 buffer_id;
37 
38 	buffer_id = rx->dqo.free_buf_states;
39 	if (unlikely(buffer_id == -1))
40 		return NULL;
41 
42 	buf_state = &rx->dqo.buf_states[buffer_id];
43 
44 	/* Remove buf_state from free list */
45 	rx->dqo.free_buf_states = buf_state->next;
46 
47 	/* Point buf_state to itself to mark it as allocated */
48 	buf_state->next = buffer_id;
49 
50 	return buf_state;
51 }
52 
gve_buf_state_is_allocated(struct gve_rx_ring * rx,struct gve_rx_buf_state_dqo * buf_state)53 static bool gve_buf_state_is_allocated(struct gve_rx_ring *rx,
54 				       struct gve_rx_buf_state_dqo *buf_state)
55 {
56 	s16 buffer_id = buf_state - rx->dqo.buf_states;
57 
58 	return buf_state->next == buffer_id;
59 }
60 
gve_free_buf_state(struct gve_rx_ring * rx,struct gve_rx_buf_state_dqo * buf_state)61 static void gve_free_buf_state(struct gve_rx_ring *rx,
62 			       struct gve_rx_buf_state_dqo *buf_state)
63 {
64 	s16 buffer_id = buf_state - rx->dqo.buf_states;
65 
66 	buf_state->next = rx->dqo.free_buf_states;
67 	rx->dqo.free_buf_states = buffer_id;
68 }
69 
70 static struct gve_rx_buf_state_dqo *
gve_dequeue_buf_state(struct gve_rx_ring * rx,struct gve_index_list * list)71 gve_dequeue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list)
72 {
73 	struct gve_rx_buf_state_dqo *buf_state;
74 	s16 buffer_id;
75 
76 	buffer_id = list->head;
77 	if (unlikely(buffer_id == -1))
78 		return NULL;
79 
80 	buf_state = &rx->dqo.buf_states[buffer_id];
81 
82 	/* Remove buf_state from list */
83 	list->head = buf_state->next;
84 	if (buf_state->next == -1)
85 		list->tail = -1;
86 
87 	/* Point buf_state to itself to mark it as allocated */
88 	buf_state->next = buffer_id;
89 
90 	return buf_state;
91 }
92 
gve_enqueue_buf_state(struct gve_rx_ring * rx,struct gve_index_list * list,struct gve_rx_buf_state_dqo * buf_state)93 static void gve_enqueue_buf_state(struct gve_rx_ring *rx,
94 				  struct gve_index_list *list,
95 				  struct gve_rx_buf_state_dqo *buf_state)
96 {
97 	s16 buffer_id = buf_state - rx->dqo.buf_states;
98 
99 	buf_state->next = -1;
100 
101 	if (list->head == -1) {
102 		list->head = buffer_id;
103 		list->tail = buffer_id;
104 	} else {
105 		int tail = list->tail;
106 
107 		rx->dqo.buf_states[tail].next = buffer_id;
108 		list->tail = buffer_id;
109 	}
110 }
111 
112 static struct gve_rx_buf_state_dqo *
gve_get_recycled_buf_state(struct gve_rx_ring * rx)113 gve_get_recycled_buf_state(struct gve_rx_ring *rx)
114 {
115 	struct gve_rx_buf_state_dqo *buf_state;
116 	int i;
117 
118 	/* Recycled buf states are immediately usable. */
119 	buf_state = gve_dequeue_buf_state(rx, &rx->dqo.recycled_buf_states);
120 	if (likely(buf_state))
121 		return buf_state;
122 
123 	if (unlikely(rx->dqo.used_buf_states.head == -1))
124 		return NULL;
125 
126 	/* Used buf states are only usable when ref count reaches 0, which means
127 	 * no SKBs refer to them.
128 	 *
129 	 * Search a limited number before giving up.
130 	 */
131 	for (i = 0; i < 5; i++) {
132 		buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
133 		if (gve_buf_ref_cnt(buf_state) == 0)
134 			return buf_state;
135 
136 		gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
137 	}
138 
139 	/* If there are no free buf states discard an entry from
140 	 * `used_buf_states` so it can be used.
141 	 */
142 	if (unlikely(rx->dqo.free_buf_states == -1)) {
143 		buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
144 		if (gve_buf_ref_cnt(buf_state) == 0)
145 			return buf_state;
146 
147 		gve_free_page_dqo(rx->gve, buf_state);
148 		gve_free_buf_state(rx, buf_state);
149 	}
150 
151 	return NULL;
152 }
153 
gve_alloc_page_dqo(struct gve_priv * priv,struct gve_rx_buf_state_dqo * buf_state)154 static int gve_alloc_page_dqo(struct gve_priv *priv,
155 			      struct gve_rx_buf_state_dqo *buf_state)
156 {
157 	int err;
158 
159 	err = gve_alloc_page(priv, &priv->pdev->dev, &buf_state->page_info.page,
160 			     &buf_state->addr, DMA_FROM_DEVICE, GFP_KERNEL);
161 	if (err)
162 		return err;
163 
164 	buf_state->page_info.page_offset = 0;
165 	buf_state->page_info.page_address =
166 		page_address(buf_state->page_info.page);
167 	buf_state->last_single_ref_offset = 0;
168 
169 	/* The page already has 1 ref. */
170 	page_ref_add(buf_state->page_info.page, INT_MAX - 1);
171 	buf_state->page_info.pagecnt_bias = INT_MAX;
172 
173 	return 0;
174 }
175 
gve_rx_free_ring_dqo(struct gve_priv * priv,int idx)176 static void gve_rx_free_ring_dqo(struct gve_priv *priv, int idx)
177 {
178 	struct gve_rx_ring *rx = &priv->rx[idx];
179 	struct device *hdev = &priv->pdev->dev;
180 	size_t completion_queue_slots;
181 	size_t buffer_queue_slots;
182 	size_t size;
183 	int i;
184 
185 	completion_queue_slots = rx->dqo.complq.mask + 1;
186 	buffer_queue_slots = rx->dqo.bufq.mask + 1;
187 
188 	gve_rx_remove_from_block(priv, idx);
189 
190 	if (rx->q_resources) {
191 		dma_free_coherent(hdev, sizeof(*rx->q_resources),
192 				  rx->q_resources, rx->q_resources_bus);
193 		rx->q_resources = NULL;
194 	}
195 
196 	for (i = 0; i < rx->dqo.num_buf_states; i++) {
197 		struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
198 
199 		if (bs->page_info.page)
200 			gve_free_page_dqo(priv, bs);
201 	}
202 
203 	if (rx->dqo.bufq.desc_ring) {
204 		size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
205 		dma_free_coherent(hdev, size, rx->dqo.bufq.desc_ring,
206 				  rx->dqo.bufq.bus);
207 		rx->dqo.bufq.desc_ring = NULL;
208 	}
209 
210 	if (rx->dqo.complq.desc_ring) {
211 		size = sizeof(rx->dqo.complq.desc_ring[0]) *
212 			completion_queue_slots;
213 		dma_free_coherent(hdev, size, rx->dqo.complq.desc_ring,
214 				  rx->dqo.complq.bus);
215 		rx->dqo.complq.desc_ring = NULL;
216 	}
217 
218 	kvfree(rx->dqo.buf_states);
219 	rx->dqo.buf_states = NULL;
220 
221 	netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
222 }
223 
gve_rx_alloc_ring_dqo(struct gve_priv * priv,int idx)224 static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx)
225 {
226 	struct gve_rx_ring *rx = &priv->rx[idx];
227 	struct device *hdev = &priv->pdev->dev;
228 	size_t size;
229 	int i;
230 
231 	const u32 buffer_queue_slots =
232 		priv->options_dqo_rda.rx_buff_ring_entries;
233 	const u32 completion_queue_slots = priv->rx_desc_cnt;
234 
235 	netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n");
236 
237 	memset(rx, 0, sizeof(*rx));
238 	rx->gve = priv;
239 	rx->q_num = idx;
240 	rx->dqo.bufq.mask = buffer_queue_slots - 1;
241 	rx->dqo.complq.num_free_slots = completion_queue_slots;
242 	rx->dqo.complq.mask = completion_queue_slots - 1;
243 	rx->ctx.skb_head = NULL;
244 	rx->ctx.skb_tail = NULL;
245 
246 	rx->dqo.num_buf_states = min_t(s16, S16_MAX, buffer_queue_slots * 4);
247 	rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states,
248 				      sizeof(rx->dqo.buf_states[0]),
249 				      GFP_KERNEL);
250 	if (!rx->dqo.buf_states)
251 		return -ENOMEM;
252 
253 	/* Set up linked list of buffer IDs */
254 	for (i = 0; i < rx->dqo.num_buf_states - 1; i++)
255 		rx->dqo.buf_states[i].next = i + 1;
256 
257 	rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1;
258 	rx->dqo.recycled_buf_states.head = -1;
259 	rx->dqo.recycled_buf_states.tail = -1;
260 	rx->dqo.used_buf_states.head = -1;
261 	rx->dqo.used_buf_states.tail = -1;
262 
263 	/* Allocate RX completion queue */
264 	size = sizeof(rx->dqo.complq.desc_ring[0]) *
265 		completion_queue_slots;
266 	rx->dqo.complq.desc_ring =
267 		dma_alloc_coherent(hdev, size, &rx->dqo.complq.bus, GFP_KERNEL);
268 	if (!rx->dqo.complq.desc_ring)
269 		goto err;
270 
271 	/* Allocate RX buffer queue */
272 	size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
273 	rx->dqo.bufq.desc_ring =
274 		dma_alloc_coherent(hdev, size, &rx->dqo.bufq.bus, GFP_KERNEL);
275 	if (!rx->dqo.bufq.desc_ring)
276 		goto err;
277 
278 	rx->q_resources = dma_alloc_coherent(hdev, sizeof(*rx->q_resources),
279 					     &rx->q_resources_bus, GFP_KERNEL);
280 	if (!rx->q_resources)
281 		goto err;
282 
283 	gve_rx_add_to_block(priv, idx);
284 
285 	return 0;
286 
287 err:
288 	gve_rx_free_ring_dqo(priv, idx);
289 	return -ENOMEM;
290 }
291 
gve_rx_write_doorbell_dqo(const struct gve_priv * priv,int queue_idx)292 void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx)
293 {
294 	const struct gve_rx_ring *rx = &priv->rx[queue_idx];
295 	u64 index = be32_to_cpu(rx->q_resources->db_index);
296 
297 	iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]);
298 }
299 
gve_rx_alloc_rings_dqo(struct gve_priv * priv)300 int gve_rx_alloc_rings_dqo(struct gve_priv *priv)
301 {
302 	int err = 0;
303 	int i;
304 
305 	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
306 		err = gve_rx_alloc_ring_dqo(priv, i);
307 		if (err) {
308 			netif_err(priv, drv, priv->dev,
309 				  "Failed to alloc rx ring=%d: err=%d\n",
310 				  i, err);
311 			goto err;
312 		}
313 	}
314 
315 	return 0;
316 
317 err:
318 	for (i--; i >= 0; i--)
319 		gve_rx_free_ring_dqo(priv, i);
320 
321 	return err;
322 }
323 
gve_rx_free_rings_dqo(struct gve_priv * priv)324 void gve_rx_free_rings_dqo(struct gve_priv *priv)
325 {
326 	int i;
327 
328 	for (i = 0; i < priv->rx_cfg.num_queues; i++)
329 		gve_rx_free_ring_dqo(priv, i);
330 }
331 
gve_rx_post_buffers_dqo(struct gve_rx_ring * rx)332 void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
333 {
334 	struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
335 	struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
336 	struct gve_priv *priv = rx->gve;
337 	u32 num_avail_slots;
338 	u32 num_full_slots;
339 	u32 num_posted = 0;
340 
341 	num_full_slots = (bufq->tail - bufq->head) & bufq->mask;
342 	num_avail_slots = bufq->mask - num_full_slots;
343 
344 	num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots);
345 	while (num_posted < num_avail_slots) {
346 		struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail];
347 		struct gve_rx_buf_state_dqo *buf_state;
348 
349 		buf_state = gve_get_recycled_buf_state(rx);
350 		if (unlikely(!buf_state)) {
351 			buf_state = gve_alloc_buf_state(rx);
352 			if (unlikely(!buf_state))
353 				break;
354 
355 			if (unlikely(gve_alloc_page_dqo(priv, buf_state))) {
356 				u64_stats_update_begin(&rx->statss);
357 				rx->rx_buf_alloc_fail++;
358 				u64_stats_update_end(&rx->statss);
359 				gve_free_buf_state(rx, buf_state);
360 				break;
361 			}
362 		}
363 
364 		desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states);
365 		desc->buf_addr = cpu_to_le64(buf_state->addr +
366 					     buf_state->page_info.page_offset);
367 
368 		bufq->tail = (bufq->tail + 1) & bufq->mask;
369 		complq->num_free_slots--;
370 		num_posted++;
371 
372 		if ((bufq->tail & (GVE_RX_BUF_THRESH_DQO - 1)) == 0)
373 			gve_rx_write_doorbell_dqo(priv, rx->q_num);
374 	}
375 
376 	rx->fill_cnt += num_posted;
377 }
378 
gve_try_recycle_buf(struct gve_priv * priv,struct gve_rx_ring * rx,struct gve_rx_buf_state_dqo * buf_state)379 static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx,
380 				struct gve_rx_buf_state_dqo *buf_state)
381 {
382 	const int data_buffer_size = priv->data_buffer_size_dqo;
383 	int pagecount;
384 
385 	/* Can't reuse if we only fit one buffer per page */
386 	if (data_buffer_size * 2 > PAGE_SIZE)
387 		goto mark_used;
388 
389 	pagecount = gve_buf_ref_cnt(buf_state);
390 
391 	/* Record the offset when we have a single remaining reference.
392 	 *
393 	 * When this happens, we know all of the other offsets of the page are
394 	 * usable.
395 	 */
396 	if (pagecount == 1) {
397 		buf_state->last_single_ref_offset =
398 			buf_state->page_info.page_offset;
399 	}
400 
401 	/* Use the next buffer sized chunk in the page. */
402 	buf_state->page_info.page_offset += data_buffer_size;
403 	buf_state->page_info.page_offset &= (PAGE_SIZE - 1);
404 
405 	/* If we wrap around to the same offset without ever dropping to 1
406 	 * reference, then we don't know if this offset was ever freed.
407 	 */
408 	if (buf_state->page_info.page_offset ==
409 	    buf_state->last_single_ref_offset) {
410 		goto mark_used;
411 	}
412 
413 	gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
414 	return;
415 
416 mark_used:
417 	gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
418 }
419 
gve_rx_skb_csum(struct sk_buff * skb,const struct gve_rx_compl_desc_dqo * desc,struct gve_ptype ptype)420 static void gve_rx_skb_csum(struct sk_buff *skb,
421 			    const struct gve_rx_compl_desc_dqo *desc,
422 			    struct gve_ptype ptype)
423 {
424 	skb->ip_summed = CHECKSUM_NONE;
425 
426 	/* HW did not identify and process L3 and L4 headers. */
427 	if (unlikely(!desc->l3_l4_processed))
428 		return;
429 
430 	if (ptype.l3_type == GVE_L3_TYPE_IPV4) {
431 		if (unlikely(desc->csum_ip_err || desc->csum_external_ip_err))
432 			return;
433 	} else if (ptype.l3_type == GVE_L3_TYPE_IPV6) {
434 		/* Checksum should be skipped if this flag is set. */
435 		if (unlikely(desc->ipv6_ex_add))
436 			return;
437 	}
438 
439 	if (unlikely(desc->csum_l4_err))
440 		return;
441 
442 	switch (ptype.l4_type) {
443 	case GVE_L4_TYPE_TCP:
444 	case GVE_L4_TYPE_UDP:
445 	case GVE_L4_TYPE_ICMP:
446 	case GVE_L4_TYPE_SCTP:
447 		skb->ip_summed = CHECKSUM_UNNECESSARY;
448 		break;
449 	default:
450 		break;
451 	}
452 }
453 
gve_rx_skb_hash(struct sk_buff * skb,const struct gve_rx_compl_desc_dqo * compl_desc,struct gve_ptype ptype)454 static void gve_rx_skb_hash(struct sk_buff *skb,
455 			    const struct gve_rx_compl_desc_dqo *compl_desc,
456 			    struct gve_ptype ptype)
457 {
458 	enum pkt_hash_types hash_type = PKT_HASH_TYPE_L2;
459 
460 	if (ptype.l4_type != GVE_L4_TYPE_UNKNOWN)
461 		hash_type = PKT_HASH_TYPE_L4;
462 	else if (ptype.l3_type != GVE_L3_TYPE_UNKNOWN)
463 		hash_type = PKT_HASH_TYPE_L3;
464 
465 	skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type);
466 }
467 
gve_rx_free_skb(struct gve_rx_ring * rx)468 static void gve_rx_free_skb(struct gve_rx_ring *rx)
469 {
470 	if (!rx->ctx.skb_head)
471 		return;
472 
473 	dev_kfree_skb_any(rx->ctx.skb_head);
474 	rx->ctx.skb_head = NULL;
475 	rx->ctx.skb_tail = NULL;
476 }
477 
478 /* Chains multi skbs for single rx packet.
479  * Returns 0 if buffer is appended, -1 otherwise.
480  */
gve_rx_append_frags(struct napi_struct * napi,struct gve_rx_buf_state_dqo * buf_state,u16 buf_len,struct gve_rx_ring * rx,struct gve_priv * priv)481 static int gve_rx_append_frags(struct napi_struct *napi,
482 			       struct gve_rx_buf_state_dqo *buf_state,
483 			       u16 buf_len, struct gve_rx_ring *rx,
484 			       struct gve_priv *priv)
485 {
486 	int num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
487 
488 	if (unlikely(num_frags == MAX_SKB_FRAGS)) {
489 		struct sk_buff *skb;
490 
491 		skb = napi_alloc_skb(napi, 0);
492 		if (!skb)
493 			return -1;
494 
495 		skb_shinfo(rx->ctx.skb_tail)->frag_list = skb;
496 		rx->ctx.skb_tail = skb;
497 		num_frags = 0;
498 	}
499 	if (rx->ctx.skb_tail != rx->ctx.skb_head) {
500 		rx->ctx.skb_head->len += buf_len;
501 		rx->ctx.skb_head->data_len += buf_len;
502 		rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo;
503 	}
504 
505 	skb_add_rx_frag(rx->ctx.skb_tail, num_frags,
506 			buf_state->page_info.page,
507 			buf_state->page_info.page_offset,
508 			buf_len, priv->data_buffer_size_dqo);
509 	gve_dec_pagecnt_bias(&buf_state->page_info);
510 
511 	return 0;
512 }
513 
514 /* Returns 0 if descriptor is completed successfully.
515  * Returns -EINVAL if descriptor is invalid.
516  * Returns -ENOMEM if data cannot be copied to skb.
517  */
gve_rx_dqo(struct napi_struct * napi,struct gve_rx_ring * rx,const struct gve_rx_compl_desc_dqo * compl_desc,int queue_idx)518 static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
519 		      const struct gve_rx_compl_desc_dqo *compl_desc,
520 		      int queue_idx)
521 {
522 	const u16 buffer_id = le16_to_cpu(compl_desc->buf_id);
523 	const bool eop = compl_desc->end_of_packet != 0;
524 	struct gve_rx_buf_state_dqo *buf_state;
525 	struct gve_priv *priv = rx->gve;
526 	u16 buf_len;
527 
528 	if (unlikely(buffer_id >= rx->dqo.num_buf_states)) {
529 		net_err_ratelimited("%s: Invalid RX buffer_id=%u\n",
530 				    priv->dev->name, buffer_id);
531 		return -EINVAL;
532 	}
533 	buf_state = &rx->dqo.buf_states[buffer_id];
534 	if (unlikely(!gve_buf_state_is_allocated(rx, buf_state))) {
535 		net_err_ratelimited("%s: RX buffer_id is not allocated: %u\n",
536 				    priv->dev->name, buffer_id);
537 		return -EINVAL;
538 	}
539 
540 	if (unlikely(compl_desc->rx_error)) {
541 		gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
542 				      buf_state);
543 		return -EINVAL;
544 	}
545 
546 	buf_len = compl_desc->packet_len;
547 
548 	/* Page might have not been used for awhile and was likely last written
549 	 * by a different thread.
550 	 */
551 	prefetch(buf_state->page_info.page);
552 
553 	/* Sync the portion of dma buffer for CPU to read. */
554 	dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr,
555 				      buf_state->page_info.page_offset,
556 				      buf_len, DMA_FROM_DEVICE);
557 
558 	/* Append to current skb if one exists. */
559 	if (rx->ctx.skb_head) {
560 		if (unlikely(gve_rx_append_frags(napi, buf_state, buf_len, rx,
561 						 priv)) != 0) {
562 			goto error;
563 		}
564 
565 		gve_try_recycle_buf(priv, rx, buf_state);
566 		return 0;
567 	}
568 
569 	if (eop && buf_len <= priv->rx_copybreak) {
570 		rx->ctx.skb_head = gve_rx_copy(priv->dev, napi,
571 					       &buf_state->page_info, buf_len, 0, NULL);
572 		if (unlikely(!rx->ctx.skb_head))
573 			goto error;
574 		rx->ctx.skb_tail = rx->ctx.skb_head;
575 
576 		u64_stats_update_begin(&rx->statss);
577 		rx->rx_copied_pkt++;
578 		rx->rx_copybreak_pkt++;
579 		u64_stats_update_end(&rx->statss);
580 
581 		gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
582 				      buf_state);
583 		return 0;
584 	}
585 
586 	rx->ctx.skb_head = napi_get_frags(napi);
587 	if (unlikely(!rx->ctx.skb_head))
588 		goto error;
589 	rx->ctx.skb_tail = rx->ctx.skb_head;
590 
591 	skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page,
592 			buf_state->page_info.page_offset, buf_len,
593 			priv->data_buffer_size_dqo);
594 	gve_dec_pagecnt_bias(&buf_state->page_info);
595 
596 	gve_try_recycle_buf(priv, rx, buf_state);
597 	return 0;
598 
599 error:
600 	gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
601 	return -ENOMEM;
602 }
603 
gve_rx_complete_rsc(struct sk_buff * skb,const struct gve_rx_compl_desc_dqo * desc,struct gve_ptype ptype)604 static int gve_rx_complete_rsc(struct sk_buff *skb,
605 			       const struct gve_rx_compl_desc_dqo *desc,
606 			       struct gve_ptype ptype)
607 {
608 	struct skb_shared_info *shinfo = skb_shinfo(skb);
609 
610 	/* Only TCP is supported right now. */
611 	if (ptype.l4_type != GVE_L4_TYPE_TCP)
612 		return -EINVAL;
613 
614 	switch (ptype.l3_type) {
615 	case GVE_L3_TYPE_IPV4:
616 		shinfo->gso_type = SKB_GSO_TCPV4;
617 		break;
618 	case GVE_L3_TYPE_IPV6:
619 		shinfo->gso_type = SKB_GSO_TCPV6;
620 		break;
621 	default:
622 		return -EINVAL;
623 	}
624 
625 	shinfo->gso_size = le16_to_cpu(desc->rsc_seg_len);
626 	return 0;
627 }
628 
629 /* Returns 0 if skb is completed successfully, -1 otherwise. */
gve_rx_complete_skb(struct gve_rx_ring * rx,struct napi_struct * napi,const struct gve_rx_compl_desc_dqo * desc,netdev_features_t feat)630 static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi,
631 			       const struct gve_rx_compl_desc_dqo *desc,
632 			       netdev_features_t feat)
633 {
634 	struct gve_ptype ptype =
635 		rx->gve->ptype_lut_dqo->ptypes[desc->packet_type];
636 	int err;
637 
638 	skb_record_rx_queue(rx->ctx.skb_head, rx->q_num);
639 
640 	if (feat & NETIF_F_RXHASH)
641 		gve_rx_skb_hash(rx->ctx.skb_head, desc, ptype);
642 
643 	if (feat & NETIF_F_RXCSUM)
644 		gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype);
645 
646 	/* RSC packets must set gso_size otherwise the TCP stack will complain
647 	 * that packets are larger than MTU.
648 	 */
649 	if (desc->rsc) {
650 		err = gve_rx_complete_rsc(rx->ctx.skb_head, desc, ptype);
651 		if (err < 0)
652 			return err;
653 	}
654 
655 	if (skb_headlen(rx->ctx.skb_head) == 0)
656 		napi_gro_frags(napi);
657 	else
658 		napi_gro_receive(napi, rx->ctx.skb_head);
659 
660 	return 0;
661 }
662 
gve_rx_poll_dqo(struct gve_notify_block * block,int budget)663 int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
664 {
665 	struct napi_struct *napi = &block->napi;
666 	netdev_features_t feat = napi->dev->features;
667 
668 	struct gve_rx_ring *rx = block->rx;
669 	struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
670 
671 	u32 work_done = 0;
672 	u64 bytes = 0;
673 	int err;
674 
675 	while (work_done < budget) {
676 		struct gve_rx_compl_desc_dqo *compl_desc =
677 			&complq->desc_ring[complq->head];
678 		u32 pkt_bytes;
679 
680 		/* No more new packets */
681 		if (compl_desc->generation == complq->cur_gen_bit)
682 			break;
683 
684 		/* Prefetch the next two descriptors. */
685 		prefetch(&complq->desc_ring[(complq->head + 1) & complq->mask]);
686 		prefetch(&complq->desc_ring[(complq->head + 2) & complq->mask]);
687 
688 		/* Do not read data until we own the descriptor */
689 		dma_rmb();
690 
691 		err = gve_rx_dqo(napi, rx, compl_desc, rx->q_num);
692 		if (err < 0) {
693 			gve_rx_free_skb(rx);
694 			u64_stats_update_begin(&rx->statss);
695 			if (err == -ENOMEM)
696 				rx->rx_skb_alloc_fail++;
697 			else if (err == -EINVAL)
698 				rx->rx_desc_err_dropped_pkt++;
699 			u64_stats_update_end(&rx->statss);
700 		}
701 
702 		complq->head = (complq->head + 1) & complq->mask;
703 		complq->num_free_slots++;
704 
705 		/* When the ring wraps, the generation bit is flipped. */
706 		complq->cur_gen_bit ^= (complq->head == 0);
707 
708 		/* Receiving a completion means we have space to post another
709 		 * buffer on the buffer queue.
710 		 */
711 		{
712 			struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
713 
714 			bufq->head = (bufq->head + 1) & bufq->mask;
715 		}
716 
717 		/* Free running counter of completed descriptors */
718 		rx->cnt++;
719 
720 		if (!rx->ctx.skb_head)
721 			continue;
722 
723 		if (!compl_desc->end_of_packet)
724 			continue;
725 
726 		work_done++;
727 		pkt_bytes = rx->ctx.skb_head->len;
728 		/* The ethernet header (first ETH_HLEN bytes) is snipped off
729 		 * by eth_type_trans.
730 		 */
731 		if (skb_headlen(rx->ctx.skb_head))
732 			pkt_bytes += ETH_HLEN;
733 
734 		/* gve_rx_complete_skb() will consume skb if successful */
735 		if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) {
736 			gve_rx_free_skb(rx);
737 			u64_stats_update_begin(&rx->statss);
738 			rx->rx_desc_err_dropped_pkt++;
739 			u64_stats_update_end(&rx->statss);
740 			continue;
741 		}
742 
743 		bytes += pkt_bytes;
744 		rx->ctx.skb_head = NULL;
745 		rx->ctx.skb_tail = NULL;
746 	}
747 
748 	gve_rx_post_buffers_dqo(rx);
749 
750 	u64_stats_update_begin(&rx->statss);
751 	rx->rpackets += work_done;
752 	rx->rbytes += bytes;
753 	u64_stats_update_end(&rx->statss);
754 
755 	return work_done;
756 }
757