1 /*
2 * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <linux/ip.h>
34 #include <linux/ipv6.h>
35 #include <linux/tcp.h>
36 #include <linux/bitmap.h>
37 #include <linux/filter.h>
38 #include <net/ip6_checksum.h>
39 #include <net/page_pool.h>
40 #include <net/inet_ecn.h>
41 #include <net/gro.h>
42 #include <net/udp.h>
43 #include <net/tcp.h>
44 #include <net/xdp_sock_drv.h>
45 #include "en.h"
46 #include "en/txrx.h"
47 #include "en_tc.h"
48 #include "eswitch.h"
49 #include "en_rep.h"
50 #include "en/rep/tc.h"
51 #include "ipoib/ipoib.h"
52 #include "en_accel/ipsec.h"
53 #include "en_accel/macsec.h"
54 #include "en_accel/ipsec_rxtx.h"
55 #include "en_accel/ktls_txrx.h"
56 #include "en/xdp.h"
57 #include "en/xsk/rx.h"
58 #include "en/health.h"
59 #include "en/params.h"
60 #include "devlink.h"
61 #include "en/devlink.h"
62
63 static struct sk_buff *
64 mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
65 u16 cqe_bcnt, u32 head_offset, u32 page_idx);
66 static struct sk_buff *
67 mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
68 u16 cqe_bcnt, u32 head_offset, u32 page_idx);
69 static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
70 static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
71 static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
72
73 const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic = {
74 .handle_rx_cqe = mlx5e_handle_rx_cqe,
75 .handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
76 .handle_rx_cqe_mpwqe_shampo = mlx5e_handle_rx_cqe_mpwrq_shampo,
77 };
78
mlx5e_rx_hw_stamp(struct hwtstamp_config * config)79 static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
80 {
81 return config->rx_filter == HWTSTAMP_FILTER_ALL;
82 }
83
mlx5e_read_cqe_slot(struct mlx5_cqwq * wq,u32 cqcc,void * data)84 static inline void mlx5e_read_cqe_slot(struct mlx5_cqwq *wq,
85 u32 cqcc, void *data)
86 {
87 u32 ci = mlx5_cqwq_ctr2ix(wq, cqcc);
88
89 memcpy(data, mlx5_cqwq_get_wqe(wq, ci), sizeof(struct mlx5_cqe64));
90 }
91
mlx5e_read_title_slot(struct mlx5e_rq * rq,struct mlx5_cqwq * wq,u32 cqcc)92 static inline void mlx5e_read_title_slot(struct mlx5e_rq *rq,
93 struct mlx5_cqwq *wq,
94 u32 cqcc)
95 {
96 struct mlx5e_cq_decomp *cqd = &rq->cqd;
97 struct mlx5_cqe64 *title = &cqd->title;
98
99 mlx5e_read_cqe_slot(wq, cqcc, title);
100 cqd->left = be32_to_cpu(title->byte_cnt);
101 cqd->wqe_counter = be16_to_cpu(title->wqe_counter);
102 rq->stats->cqe_compress_blks++;
103 }
104
mlx5e_read_mini_arr_slot(struct mlx5_cqwq * wq,struct mlx5e_cq_decomp * cqd,u32 cqcc)105 static inline void mlx5e_read_mini_arr_slot(struct mlx5_cqwq *wq,
106 struct mlx5e_cq_decomp *cqd,
107 u32 cqcc)
108 {
109 mlx5e_read_cqe_slot(wq, cqcc, cqd->mini_arr);
110 cqd->mini_arr_idx = 0;
111 }
112
mlx5e_cqes_update_owner(struct mlx5_cqwq * wq,int n)113 static inline void mlx5e_cqes_update_owner(struct mlx5_cqwq *wq, int n)
114 {
115 u32 cqcc = wq->cc;
116 u8 op_own = mlx5_cqwq_get_ctr_wrap_cnt(wq, cqcc) & 1;
117 u32 ci = mlx5_cqwq_ctr2ix(wq, cqcc);
118 u32 wq_sz = mlx5_cqwq_get_size(wq);
119 u32 ci_top = min_t(u32, wq_sz, ci + n);
120
121 for (; ci < ci_top; ci++, n--) {
122 struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci);
123
124 cqe->op_own = op_own;
125 }
126
127 if (unlikely(ci == wq_sz)) {
128 op_own = !op_own;
129 for (ci = 0; ci < n; ci++) {
130 struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci);
131
132 cqe->op_own = op_own;
133 }
134 }
135 }
136
mlx5e_decompress_cqe(struct mlx5e_rq * rq,struct mlx5_cqwq * wq,u32 cqcc)137 static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq,
138 struct mlx5_cqwq *wq,
139 u32 cqcc)
140 {
141 struct mlx5e_cq_decomp *cqd = &rq->cqd;
142 struct mlx5_mini_cqe8 *mini_cqe = &cqd->mini_arr[cqd->mini_arr_idx];
143 struct mlx5_cqe64 *title = &cqd->title;
144
145 title->byte_cnt = mini_cqe->byte_cnt;
146 title->check_sum = mini_cqe->checksum;
147 title->op_own &= 0xf0;
148 title->op_own |= 0x01 & (cqcc >> wq->fbc.log_sz);
149
150 /* state bit set implies linked-list striding RQ wq type and
151 * HW stride index capability supported
152 */
153 if (test_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state)) {
154 title->wqe_counter = mini_cqe->stridx;
155 return;
156 }
157
158 /* HW stride index capability not supported */
159 title->wqe_counter = cpu_to_be16(cqd->wqe_counter);
160 if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
161 cqd->wqe_counter += mpwrq_get_cqe_consumed_strides(title);
162 else
163 cqd->wqe_counter =
164 mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, cqd->wqe_counter + 1);
165 }
166
mlx5e_decompress_cqe_no_hash(struct mlx5e_rq * rq,struct mlx5_cqwq * wq,u32 cqcc)167 static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq,
168 struct mlx5_cqwq *wq,
169 u32 cqcc)
170 {
171 struct mlx5e_cq_decomp *cqd = &rq->cqd;
172
173 mlx5e_decompress_cqe(rq, wq, cqcc);
174 cqd->title.rss_hash_type = 0;
175 cqd->title.rss_hash_result = 0;
176 }
177
mlx5e_decompress_cqes_cont(struct mlx5e_rq * rq,struct mlx5_cqwq * wq,int update_owner_only,int budget_rem)178 static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq,
179 struct mlx5_cqwq *wq,
180 int update_owner_only,
181 int budget_rem)
182 {
183 struct mlx5e_cq_decomp *cqd = &rq->cqd;
184 u32 cqcc = wq->cc + update_owner_only;
185 u32 cqe_count;
186 u32 i;
187
188 cqe_count = min_t(u32, cqd->left, budget_rem);
189
190 for (i = update_owner_only; i < cqe_count;
191 i++, cqd->mini_arr_idx++, cqcc++) {
192 if (cqd->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE)
193 mlx5e_read_mini_arr_slot(wq, cqd, cqcc);
194
195 mlx5e_decompress_cqe_no_hash(rq, wq, cqcc);
196 INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
197 mlx5e_handle_rx_cqe_mpwrq_shampo, mlx5e_handle_rx_cqe,
198 rq, &cqd->title);
199 }
200 mlx5e_cqes_update_owner(wq, cqcc - wq->cc);
201 wq->cc = cqcc;
202 cqd->left -= cqe_count;
203 rq->stats->cqe_compress_pkts += cqe_count;
204
205 return cqe_count;
206 }
207
mlx5e_decompress_cqes_start(struct mlx5e_rq * rq,struct mlx5_cqwq * wq,int budget_rem)208 static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
209 struct mlx5_cqwq *wq,
210 int budget_rem)
211 {
212 struct mlx5e_cq_decomp *cqd = &rq->cqd;
213 u32 cc = wq->cc;
214
215 mlx5e_read_title_slot(rq, wq, cc);
216 mlx5e_read_mini_arr_slot(wq, cqd, cc + 1);
217 mlx5e_decompress_cqe(rq, wq, cc);
218 INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
219 mlx5e_handle_rx_cqe_mpwrq_shampo, mlx5e_handle_rx_cqe,
220 rq, &cqd->title);
221 cqd->mini_arr_idx++;
222
223 return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1;
224 }
225
mlx5e_rx_cache_put(struct mlx5e_rq * rq,struct page * page)226 static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, struct page *page)
227 {
228 struct mlx5e_page_cache *cache = &rq->page_cache;
229 u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1);
230 struct mlx5e_rq_stats *stats = rq->stats;
231
232 if (tail_next == cache->head) {
233 stats->cache_full++;
234 return false;
235 }
236
237 if (!dev_page_is_reusable(page)) {
238 stats->cache_waive++;
239 return false;
240 }
241
242 cache->page_cache[cache->tail] = page;
243 cache->tail = tail_next;
244 return true;
245 }
246
mlx5e_rx_cache_get(struct mlx5e_rq * rq,union mlx5e_alloc_unit * au)247 static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au)
248 {
249 struct mlx5e_page_cache *cache = &rq->page_cache;
250 struct mlx5e_rq_stats *stats = rq->stats;
251 dma_addr_t addr;
252
253 if (unlikely(cache->head == cache->tail)) {
254 stats->cache_empty++;
255 return false;
256 }
257
258 if (page_ref_count(cache->page_cache[cache->head]) != 1) {
259 stats->cache_busy++;
260 return false;
261 }
262
263 au->page = cache->page_cache[cache->head];
264 cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1);
265 stats->cache_reuse++;
266
267 addr = page_pool_get_dma_addr(au->page);
268 /* Non-XSK always uses PAGE_SIZE. */
269 dma_sync_single_for_device(rq->pdev, addr, PAGE_SIZE, rq->buff.map_dir);
270 return true;
271 }
272
mlx5e_page_alloc_pool(struct mlx5e_rq * rq,union mlx5e_alloc_unit * au)273 static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au)
274 {
275 dma_addr_t addr;
276
277 if (mlx5e_rx_cache_get(rq, au))
278 return 0;
279
280 au->page = page_pool_dev_alloc_pages(rq->page_pool);
281 if (unlikely(!au->page))
282 return -ENOMEM;
283
284 /* Non-XSK always uses PAGE_SIZE. */
285 addr = dma_map_page(rq->pdev, au->page, 0, PAGE_SIZE, rq->buff.map_dir);
286 if (unlikely(dma_mapping_error(rq->pdev, addr))) {
287 page_pool_recycle_direct(rq->page_pool, au->page);
288 au->page = NULL;
289 return -ENOMEM;
290 }
291 page_pool_set_dma_addr(au->page, addr);
292
293 return 0;
294 }
295
mlx5e_page_dma_unmap(struct mlx5e_rq * rq,struct page * page)296 void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page)
297 {
298 dma_addr_t dma_addr = page_pool_get_dma_addr(page);
299
300 dma_unmap_page_attrs(rq->pdev, dma_addr, PAGE_SIZE, rq->buff.map_dir,
301 DMA_ATTR_SKIP_CPU_SYNC);
302 page_pool_set_dma_addr(page, 0);
303 }
304
mlx5e_page_release_dynamic(struct mlx5e_rq * rq,struct page * page,bool recycle)305 void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle)
306 {
307 if (likely(recycle)) {
308 if (mlx5e_rx_cache_put(rq, page))
309 return;
310
311 mlx5e_page_dma_unmap(rq, page);
312 page_pool_recycle_direct(rq->page_pool, page);
313 } else {
314 mlx5e_page_dma_unmap(rq, page);
315 page_pool_release_page(rq->page_pool, page);
316 put_page(page);
317 }
318 }
319
mlx5e_get_rx_frag(struct mlx5e_rq * rq,struct mlx5e_wqe_frag_info * frag)320 static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
321 struct mlx5e_wqe_frag_info *frag)
322 {
323 int err = 0;
324
325 if (!frag->offset)
326 /* On first frag (offset == 0), replenish page (alloc_unit actually).
327 * Other frags that point to the same alloc_unit (with a different
328 * offset) should just use the new one without replenishing again
329 * by themselves.
330 */
331 err = mlx5e_page_alloc_pool(rq, frag->au);
332
333 return err;
334 }
335
mlx5e_put_rx_frag(struct mlx5e_rq * rq,struct mlx5e_wqe_frag_info * frag,bool recycle)336 static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq,
337 struct mlx5e_wqe_frag_info *frag,
338 bool recycle)
339 {
340 if (frag->last_in_page)
341 mlx5e_page_release_dynamic(rq, frag->au->page, recycle);
342 }
343
get_frag(struct mlx5e_rq * rq,u16 ix)344 static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix)
345 {
346 return &rq->wqe.frags[ix << rq->wqe.info.log_num_frags];
347 }
348
mlx5e_alloc_rx_wqe(struct mlx5e_rq * rq,struct mlx5e_rx_wqe_cyc * wqe,u16 ix)349 static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe,
350 u16 ix)
351 {
352 struct mlx5e_wqe_frag_info *frag = get_frag(rq, ix);
353 int err;
354 int i;
355
356 for (i = 0; i < rq->wqe.info.num_frags; i++, frag++) {
357 dma_addr_t addr;
358 u16 headroom;
359
360 err = mlx5e_get_rx_frag(rq, frag);
361 if (unlikely(err))
362 goto free_frags;
363
364 headroom = i == 0 ? rq->buff.headroom : 0;
365 addr = page_pool_get_dma_addr(frag->au->page);
366 wqe->data[i].addr = cpu_to_be64(addr + frag->offset + headroom);
367 }
368
369 return 0;
370
371 free_frags:
372 while (--i >= 0)
373 mlx5e_put_rx_frag(rq, --frag, true);
374
375 return err;
376 }
377
mlx5e_free_rx_wqe(struct mlx5e_rq * rq,struct mlx5e_wqe_frag_info * wi,bool recycle)378 static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq,
379 struct mlx5e_wqe_frag_info *wi,
380 bool recycle)
381 {
382 int i;
383
384 if (rq->xsk_pool) {
385 /* The `recycle` parameter is ignored, and the page is always
386 * put into the Reuse Ring, because there is no way to return
387 * the page to the userspace when the interface goes down.
388 */
389 xsk_buff_free(wi->au->xsk);
390 return;
391 }
392
393 for (i = 0; i < rq->wqe.info.num_frags; i++, wi++)
394 mlx5e_put_rx_frag(rq, wi, recycle);
395 }
396
mlx5e_dealloc_rx_wqe(struct mlx5e_rq * rq,u16 ix)397 static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
398 {
399 struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix);
400
401 mlx5e_free_rx_wqe(rq, wi, false);
402 }
403
mlx5e_alloc_rx_wqes(struct mlx5e_rq * rq,u16 ix,int wqe_bulk)404 static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
405 {
406 struct mlx5_wq_cyc *wq = &rq->wqe.wq;
407 int i;
408
409 for (i = 0; i < wqe_bulk; i++) {
410 int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
411 struct mlx5e_rx_wqe_cyc *wqe;
412
413 wqe = mlx5_wq_cyc_get_wqe(wq, j);
414
415 if (unlikely(mlx5e_alloc_rx_wqe(rq, wqe, j)))
416 break;
417 }
418
419 return i;
420 }
421
422 static inline void
mlx5e_add_skb_frag(struct mlx5e_rq * rq,struct sk_buff * skb,union mlx5e_alloc_unit * au,u32 frag_offset,u32 len,unsigned int truesize)423 mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb,
424 union mlx5e_alloc_unit *au, u32 frag_offset, u32 len,
425 unsigned int truesize)
426 {
427 dma_addr_t addr = page_pool_get_dma_addr(au->page);
428
429 dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len,
430 rq->buff.map_dir);
431 page_ref_inc(au->page);
432 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
433 au->page, frag_offset, len, truesize);
434 }
435
436 static inline void
mlx5e_copy_skb_header(struct mlx5e_rq * rq,struct sk_buff * skb,struct page * page,dma_addr_t addr,int offset_from,int dma_offset,u32 headlen)437 mlx5e_copy_skb_header(struct mlx5e_rq *rq, struct sk_buff *skb,
438 struct page *page, dma_addr_t addr,
439 int offset_from, int dma_offset, u32 headlen)
440 {
441 const void *from = page_address(page) + offset_from;
442 /* Aligning len to sizeof(long) optimizes memcpy performance */
443 unsigned int len = ALIGN(headlen, sizeof(long));
444
445 dma_sync_single_for_cpu(rq->pdev, addr + dma_offset, len,
446 rq->buff.map_dir);
447 skb_copy_to_linear_data(skb, from, len);
448 }
449
450 static void
mlx5e_free_rx_mpwqe(struct mlx5e_rq * rq,struct mlx5e_mpw_info * wi,bool recycle)451 mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle)
452 {
453 union mlx5e_alloc_unit *alloc_units = wi->alloc_units;
454 bool no_xdp_xmit;
455 int i;
456
457 /* A common case for AF_XDP. */
458 if (bitmap_full(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe))
459 return;
460
461 no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
462
463 if (rq->xsk_pool) {
464 /* The `recycle` parameter is ignored, and the page is always
465 * put into the Reuse Ring, because there is no way to return
466 * the page to the userspace when the interface goes down.
467 */
468 for (i = 0; i < rq->mpwqe.pages_per_wqe; i++)
469 if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
470 xsk_buff_free(alloc_units[i].xsk);
471 } else {
472 for (i = 0; i < rq->mpwqe.pages_per_wqe; i++)
473 if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
474 mlx5e_page_release_dynamic(rq, alloc_units[i].page, recycle);
475 }
476 }
477
mlx5e_post_rx_mpwqe(struct mlx5e_rq * rq,u8 n)478 static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n)
479 {
480 struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
481
482 do {
483 u16 next_wqe_index = mlx5_wq_ll_get_wqe_next_ix(wq, wq->head);
484
485 mlx5_wq_ll_push(wq, next_wqe_index);
486 } while (--n);
487
488 /* ensure wqes are visible to device before updating doorbell record */
489 dma_wmb();
490
491 mlx5_wq_ll_update_db_record(wq);
492 }
493
494 /* This function returns the size of the continuous free space inside a bitmap
495 * that starts from first and no longer than len including circular ones.
496 */
bitmap_find_window(unsigned long * bitmap,int len,int bitmap_size,int first)497 static int bitmap_find_window(unsigned long *bitmap, int len,
498 int bitmap_size, int first)
499 {
500 int next_one, count;
501
502 next_one = find_next_bit(bitmap, bitmap_size, first);
503 if (next_one == bitmap_size) {
504 if (bitmap_size - first >= len)
505 return len;
506 next_one = find_next_bit(bitmap, bitmap_size, 0);
507 count = next_one + bitmap_size - first;
508 } else {
509 count = next_one - first;
510 }
511
512 return min(len, count);
513 }
514
build_klm_umr(struct mlx5e_icosq * sq,struct mlx5e_umr_wqe * umr_wqe,__be32 key,u16 offset,u16 klm_len,u16 wqe_bbs)515 static void build_klm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe,
516 __be32 key, u16 offset, u16 klm_len, u16 wqe_bbs)
517 {
518 memset(umr_wqe, 0, offsetof(struct mlx5e_umr_wqe, inline_klms));
519 umr_wqe->ctrl.opmod_idx_opcode =
520 cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
521 MLX5_OPCODE_UMR);
522 umr_wqe->ctrl.umr_mkey = key;
523 umr_wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT)
524 | MLX5E_KLM_UMR_DS_CNT(klm_len));
525 umr_wqe->uctrl.flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
526 umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
527 umr_wqe->uctrl.xlt_octowords = cpu_to_be16(klm_len);
528 umr_wqe->uctrl.mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
529 }
530
mlx5e_build_shampo_hd_umr(struct mlx5e_rq * rq,struct mlx5e_icosq * sq,u16 klm_entries,u16 index)531 static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
532 struct mlx5e_icosq *sq,
533 u16 klm_entries, u16 index)
534 {
535 struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
536 u16 entries, pi, header_offset, err, wqe_bbs, new_entries;
537 u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey;
538 struct page *page = shampo->last_page;
539 u64 addr = shampo->last_addr;
540 struct mlx5e_dma_info *dma_info;
541 struct mlx5e_umr_wqe *umr_wqe;
542 int headroom, i;
543
544 headroom = rq->buff.headroom;
545 new_entries = klm_entries - (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1));
546 entries = ALIGN(klm_entries, MLX5_UMR_KLM_ALIGNMENT);
547 wqe_bbs = MLX5E_KLM_UMR_WQEBBS(entries);
548 pi = mlx5e_icosq_get_next_pi(sq, wqe_bbs);
549 umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
550 build_klm_umr(sq, umr_wqe, shampo->key, index, entries, wqe_bbs);
551
552 for (i = 0; i < entries; i++, index++) {
553 dma_info = &shampo->info[index];
554 if (i >= klm_entries || (index < shampo->pi && shampo->pi - index <
555 MLX5_UMR_KLM_ALIGNMENT))
556 goto update_klm;
557 header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) <<
558 MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE;
559 if (!(header_offset & (PAGE_SIZE - 1))) {
560 union mlx5e_alloc_unit au;
561
562 err = mlx5e_page_alloc_pool(rq, &au);
563 if (unlikely(err))
564 goto err_unmap;
565 page = dma_info->page = au.page;
566 addr = dma_info->addr = page_pool_get_dma_addr(au.page);
567 } else {
568 dma_info->addr = addr + header_offset;
569 dma_info->page = page;
570 }
571
572 update_klm:
573 umr_wqe->inline_klms[i].bcount =
574 cpu_to_be32(MLX5E_RX_MAX_HEAD);
575 umr_wqe->inline_klms[i].key = cpu_to_be32(lkey);
576 umr_wqe->inline_klms[i].va =
577 cpu_to_be64(dma_info->addr + headroom);
578 }
579
580 sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
581 .wqe_type = MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR,
582 .num_wqebbs = wqe_bbs,
583 .shampo.len = new_entries,
584 };
585
586 shampo->pi = (shampo->pi + new_entries) & (shampo->hd_per_wq - 1);
587 shampo->last_page = page;
588 shampo->last_addr = addr;
589 sq->pc += wqe_bbs;
590 sq->doorbell_cseg = &umr_wqe->ctrl;
591
592 return 0;
593
594 err_unmap:
595 while (--i >= 0) {
596 dma_info = &shampo->info[--index];
597 if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) {
598 dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE);
599 mlx5e_page_release_dynamic(rq, dma_info->page, true);
600 }
601 }
602 rq->stats->buff_alloc_err++;
603 return err;
604 }
605
mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq * rq)606 static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq)
607 {
608 struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
609 u16 klm_entries, num_wqe, index, entries_before;
610 struct mlx5e_icosq *sq = rq->icosq;
611 int i, err, max_klm_entries, len;
612
613 max_klm_entries = MLX5E_MAX_KLM_PER_WQE(rq->mdev);
614 klm_entries = bitmap_find_window(shampo->bitmap,
615 shampo->hd_per_wqe,
616 shampo->hd_per_wq, shampo->pi);
617 if (!klm_entries)
618 return 0;
619
620 klm_entries += (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1));
621 index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KLM_ALIGNMENT);
622 entries_before = shampo->hd_per_wq - index;
623
624 if (unlikely(entries_before < klm_entries))
625 num_wqe = DIV_ROUND_UP(entries_before, max_klm_entries) +
626 DIV_ROUND_UP(klm_entries - entries_before, max_klm_entries);
627 else
628 num_wqe = DIV_ROUND_UP(klm_entries, max_klm_entries);
629
630 for (i = 0; i < num_wqe; i++) {
631 len = (klm_entries > max_klm_entries) ? max_klm_entries :
632 klm_entries;
633 if (unlikely(index + len > shampo->hd_per_wq))
634 len = shampo->hd_per_wq - index;
635 err = mlx5e_build_shampo_hd_umr(rq, sq, len, index);
636 if (unlikely(err))
637 return err;
638 index = (index + len) & (rq->mpwqe.shampo->hd_per_wq - 1);
639 klm_entries -= len;
640 }
641
642 return 0;
643 }
644
mlx5e_alloc_rx_mpwqe(struct mlx5e_rq * rq,u16 ix)645 static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
646 {
647 struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
648 union mlx5e_alloc_unit *au = &wi->alloc_units[0];
649 struct mlx5e_icosq *sq = rq->icosq;
650 struct mlx5_wq_cyc *wq = &sq->wq;
651 struct mlx5e_umr_wqe *umr_wqe;
652 u32 offset; /* 17-bit value with MTT. */
653 u16 pi;
654 int err;
655 int i;
656
657 if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) {
658 err = mlx5e_alloc_rx_hd_mpwqe(rq);
659 if (unlikely(err))
660 goto err;
661 }
662
663 pi = mlx5e_icosq_get_next_pi(sq, rq->mpwqe.umr_wqebbs);
664 umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
665 memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe));
666
667 for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, au++) {
668 dma_addr_t addr;
669
670 err = mlx5e_page_alloc_pool(rq, au);
671 if (unlikely(err))
672 goto err_unmap;
673 addr = page_pool_get_dma_addr(au->page);
674 umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
675 .ptag = cpu_to_be64(addr | MLX5_EN_WR),
676 };
677 }
678
679 bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
680 wi->consumed_strides = 0;
681
682 umr_wqe->ctrl.opmod_idx_opcode =
683 cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
684 MLX5_OPCODE_UMR);
685
686 offset = (ix * rq->mpwqe.mtts_per_wqe) * sizeof(struct mlx5_mtt) / MLX5_OCTWORD;
687 umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
688
689 sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
690 .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX,
691 .num_wqebbs = rq->mpwqe.umr_wqebbs,
692 .umr.rq = rq,
693 };
694
695 sq->pc += rq->mpwqe.umr_wqebbs;
696
697 sq->doorbell_cseg = &umr_wqe->ctrl;
698
699 return 0;
700
701 err_unmap:
702 while (--i >= 0) {
703 au--;
704 mlx5e_page_release_dynamic(rq, au->page, true);
705 }
706
707 err:
708 rq->stats->buff_alloc_err++;
709
710 return err;
711 }
712
713 /* This function is responsible to dealloc SHAMPO header buffer.
714 * close == true specifies that we are in the middle of closing RQ operation so
715 * we go over all the entries and if they are not in use we free them,
716 * otherwise we only go over a specific range inside the header buffer that are
717 * not in use.
718 */
mlx5e_shampo_dealloc_hd(struct mlx5e_rq * rq,u16 len,u16 start,bool close)719 void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close)
720 {
721 struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
722 int hd_per_wq = shampo->hd_per_wq;
723 struct page *deleted_page = NULL;
724 struct mlx5e_dma_info *hd_info;
725 int i, index = start;
726
727 for (i = 0; i < len; i++, index++) {
728 if (index == hd_per_wq)
729 index = 0;
730
731 if (close && !test_bit(index, shampo->bitmap))
732 continue;
733
734 hd_info = &shampo->info[index];
735 hd_info->addr = ALIGN_DOWN(hd_info->addr, PAGE_SIZE);
736 if (hd_info->page != deleted_page) {
737 deleted_page = hd_info->page;
738 mlx5e_page_release_dynamic(rq, hd_info->page, false);
739 }
740 }
741
742 if (start + len > hd_per_wq) {
743 len -= hd_per_wq - start;
744 bitmap_clear(shampo->bitmap, start, hd_per_wq - start);
745 start = 0;
746 }
747
748 bitmap_clear(shampo->bitmap, start, len);
749 }
750
mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq * rq,u16 ix)751 static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
752 {
753 struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
754 /* Don't recycle, this function is called on rq/netdev close */
755 mlx5e_free_rx_mpwqe(rq, wi, false);
756 }
757
mlx5e_post_rx_wqes(struct mlx5e_rq * rq)758 INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
759 {
760 struct mlx5_wq_cyc *wq = &rq->wqe.wq;
761 int wqe_bulk, count;
762 bool busy = false;
763 u16 head;
764
765 if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
766 return false;
767
768 if (mlx5_wq_cyc_missing(wq) < rq->wqe.info.wqe_bulk)
769 return false;
770
771 if (rq->page_pool)
772 page_pool_nid_changed(rq->page_pool, numa_mem_id());
773
774 wqe_bulk = mlx5_wq_cyc_missing(wq);
775 head = mlx5_wq_cyc_get_head(wq);
776
777 /* Don't allow any newly allocated WQEs to share the same page with old
778 * WQEs that aren't completed yet. Stop earlier.
779 */
780 wqe_bulk -= (head + wqe_bulk) & rq->wqe.info.wqe_index_mask;
781
782 if (!rq->xsk_pool)
783 count = mlx5e_alloc_rx_wqes(rq, head, wqe_bulk);
784 else if (likely(!rq->xsk_pool->dma_need_sync))
785 count = mlx5e_xsk_alloc_rx_wqes_batched(rq, head, wqe_bulk);
786 else
787 /* If dma_need_sync is true, it's more efficient to call
788 * xsk_buff_alloc in a loop, rather than xsk_buff_alloc_batch,
789 * because the latter does the same check and returns only one
790 * frame.
791 */
792 count = mlx5e_xsk_alloc_rx_wqes(rq, head, wqe_bulk);
793
794 mlx5_wq_cyc_push_n(wq, count);
795 if (unlikely(count != wqe_bulk)) {
796 rq->stats->buff_alloc_err++;
797 busy = true;
798 }
799
800 /* ensure wqes are visible to device before updating doorbell record */
801 dma_wmb();
802
803 mlx5_wq_cyc_update_db_record(wq);
804
805 return busy;
806 }
807
mlx5e_free_icosq_descs(struct mlx5e_icosq * sq)808 void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq)
809 {
810 u16 sqcc;
811
812 sqcc = sq->cc;
813
814 while (sqcc != sq->pc) {
815 struct mlx5e_icosq_wqe_info *wi;
816 u16 ci;
817
818 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
819 wi = &sq->db.wqe_info[ci];
820 sqcc += wi->num_wqebbs;
821 #ifdef CONFIG_MLX5_EN_TLS
822 switch (wi->wqe_type) {
823 case MLX5E_ICOSQ_WQE_SET_PSV_TLS:
824 mlx5e_ktls_handle_ctx_completion(wi);
825 break;
826 case MLX5E_ICOSQ_WQE_GET_PSV_TLS:
827 mlx5e_ktls_handle_get_psv_completion(wi, sq);
828 break;
829 }
830 #endif
831 }
832 sq->cc = sqcc;
833 }
834
mlx5e_handle_shampo_hd_umr(struct mlx5e_shampo_umr umr,struct mlx5e_icosq * sq)835 static void mlx5e_handle_shampo_hd_umr(struct mlx5e_shampo_umr umr,
836 struct mlx5e_icosq *sq)
837 {
838 struct mlx5e_channel *c = container_of(sq, struct mlx5e_channel, icosq);
839 struct mlx5e_shampo_hd *shampo;
840 /* assume 1:1 relationship between RQ and icosq */
841 struct mlx5e_rq *rq = &c->rq;
842 int end, from, len = umr.len;
843
844 shampo = rq->mpwqe.shampo;
845 end = shampo->hd_per_wq;
846 from = shampo->ci;
847 if (from + len > shampo->hd_per_wq) {
848 len -= end - from;
849 bitmap_set(shampo->bitmap, from, end - from);
850 from = 0;
851 }
852
853 bitmap_set(shampo->bitmap, from, len);
854 shampo->ci = (shampo->ci + umr.len) & (shampo->hd_per_wq - 1);
855 }
856
mlx5e_poll_ico_cq(struct mlx5e_cq * cq)857 int mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
858 {
859 struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq);
860 struct mlx5_cqe64 *cqe;
861 u16 sqcc;
862 int i;
863
864 if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
865 return 0;
866
867 cqe = mlx5_cqwq_get_cqe(&cq->wq);
868 if (likely(!cqe))
869 return 0;
870
871 /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
872 * otherwise a cq overrun may occur
873 */
874 sqcc = sq->cc;
875
876 i = 0;
877 do {
878 u16 wqe_counter;
879 bool last_wqe;
880
881 mlx5_cqwq_pop(&cq->wq);
882
883 wqe_counter = be16_to_cpu(cqe->wqe_counter);
884
885 do {
886 struct mlx5e_icosq_wqe_info *wi;
887 u16 ci;
888
889 last_wqe = (sqcc == wqe_counter);
890
891 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
892 wi = &sq->db.wqe_info[ci];
893 sqcc += wi->num_wqebbs;
894
895 if (last_wqe && unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
896 netdev_WARN_ONCE(cq->netdev,
897 "Bad OP in ICOSQ CQE: 0x%x\n",
898 get_cqe_opcode(cqe));
899 mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
900 (struct mlx5_err_cqe *)cqe);
901 mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
902 if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
903 queue_work(cq->priv->wq, &sq->recover_work);
904 break;
905 }
906
907 switch (wi->wqe_type) {
908 case MLX5E_ICOSQ_WQE_UMR_RX:
909 wi->umr.rq->mpwqe.umr_completed++;
910 break;
911 case MLX5E_ICOSQ_WQE_NOP:
912 break;
913 case MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR:
914 mlx5e_handle_shampo_hd_umr(wi->shampo, sq);
915 break;
916 #ifdef CONFIG_MLX5_EN_TLS
917 case MLX5E_ICOSQ_WQE_UMR_TLS:
918 break;
919 case MLX5E_ICOSQ_WQE_SET_PSV_TLS:
920 mlx5e_ktls_handle_ctx_completion(wi);
921 break;
922 case MLX5E_ICOSQ_WQE_GET_PSV_TLS:
923 mlx5e_ktls_handle_get_psv_completion(wi, sq);
924 break;
925 #endif
926 default:
927 netdev_WARN_ONCE(cq->netdev,
928 "Bad WQE type in ICOSQ WQE info: 0x%x\n",
929 wi->wqe_type);
930 }
931 } while (!last_wqe);
932 } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
933
934 sq->cc = sqcc;
935
936 mlx5_cqwq_update_db_record(&cq->wq);
937
938 return i;
939 }
940
mlx5e_post_rx_mpwqes(struct mlx5e_rq * rq)941 INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
942 {
943 struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
944 u8 umr_completed = rq->mpwqe.umr_completed;
945 struct mlx5e_icosq *sq = rq->icosq;
946 int alloc_err = 0;
947 u8 missing, i;
948 u16 head;
949
950 if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
951 return false;
952
953 if (umr_completed) {
954 mlx5e_post_rx_mpwqe(rq, umr_completed);
955 rq->mpwqe.umr_in_progress -= umr_completed;
956 rq->mpwqe.umr_completed = 0;
957 }
958
959 missing = mlx5_wq_ll_missing(wq) - rq->mpwqe.umr_in_progress;
960
961 if (unlikely(rq->mpwqe.umr_in_progress > rq->mpwqe.umr_last_bulk))
962 rq->stats->congst_umr++;
963
964 if (likely(missing < rq->mpwqe.min_wqe_bulk))
965 return false;
966
967 if (rq->page_pool)
968 page_pool_nid_changed(rq->page_pool, numa_mem_id());
969
970 head = rq->mpwqe.actual_wq_head;
971 i = missing;
972 do {
973 alloc_err = rq->xsk_pool ? mlx5e_xsk_alloc_rx_mpwqe(rq, head) :
974 mlx5e_alloc_rx_mpwqe(rq, head);
975
976 if (unlikely(alloc_err))
977 break;
978 head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
979 } while (--i);
980
981 rq->mpwqe.umr_last_bulk = missing - i;
982 if (sq->doorbell_cseg) {
983 mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, sq->doorbell_cseg);
984 sq->doorbell_cseg = NULL;
985 }
986
987 rq->mpwqe.umr_in_progress += rq->mpwqe.umr_last_bulk;
988 rq->mpwqe.actual_wq_head = head;
989
990 /* If XSK Fill Ring doesn't have enough frames, report the error, so
991 * that one of the actions can be performed:
992 * 1. If need_wakeup is used, signal that the application has to kick
993 * the driver when it refills the Fill Ring.
994 * 2. Otherwise, busy poll by rescheduling the NAPI poll.
995 */
996 if (unlikely(alloc_err == -ENOMEM && rq->xsk_pool))
997 return true;
998
999 return false;
1000 }
1001
mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 * cqe,struct tcphdr * tcp)1002 static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp)
1003 {
1004 u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
1005 u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) ||
1006 (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA);
1007
1008 tcp->check = 0;
1009 tcp->psh = get_cqe_lro_tcppsh(cqe);
1010
1011 if (tcp_ack) {
1012 tcp->ack = 1;
1013 tcp->ack_seq = cqe->lro.ack_seq_num;
1014 tcp->window = cqe->lro.tcp_win;
1015 }
1016 }
1017
mlx5e_lro_update_hdr(struct sk_buff * skb,struct mlx5_cqe64 * cqe,u32 cqe_bcnt)1018 static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
1019 u32 cqe_bcnt)
1020 {
1021 struct ethhdr *eth = (struct ethhdr *)(skb->data);
1022 struct tcphdr *tcp;
1023 int network_depth = 0;
1024 __wsum check;
1025 __be16 proto;
1026 u16 tot_len;
1027 void *ip_p;
1028
1029 proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth);
1030
1031 tot_len = cqe_bcnt - network_depth;
1032 ip_p = skb->data + network_depth;
1033
1034 if (proto == htons(ETH_P_IP)) {
1035 struct iphdr *ipv4 = ip_p;
1036
1037 tcp = ip_p + sizeof(struct iphdr);
1038 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1039
1040 ipv4->ttl = cqe->lro.min_ttl;
1041 ipv4->tot_len = cpu_to_be16(tot_len);
1042 ipv4->check = 0;
1043 ipv4->check = ip_fast_csum((unsigned char *)ipv4,
1044 ipv4->ihl);
1045
1046 mlx5e_lro_update_tcp_hdr(cqe, tcp);
1047 check = csum_partial(tcp, tcp->doff * 4,
1048 csum_unfold((__force __sum16)cqe->check_sum));
1049 /* Almost done, don't forget the pseudo header */
1050 tcp->check = csum_tcpudp_magic(ipv4->saddr, ipv4->daddr,
1051 tot_len - sizeof(struct iphdr),
1052 IPPROTO_TCP, check);
1053 } else {
1054 u16 payload_len = tot_len - sizeof(struct ipv6hdr);
1055 struct ipv6hdr *ipv6 = ip_p;
1056
1057 tcp = ip_p + sizeof(struct ipv6hdr);
1058 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
1059
1060 ipv6->hop_limit = cqe->lro.min_ttl;
1061 ipv6->payload_len = cpu_to_be16(payload_len);
1062
1063 mlx5e_lro_update_tcp_hdr(cqe, tcp);
1064 check = csum_partial(tcp, tcp->doff * 4,
1065 csum_unfold((__force __sum16)cqe->check_sum));
1066 /* Almost done, don't forget the pseudo header */
1067 tcp->check = csum_ipv6_magic(&ipv6->saddr, &ipv6->daddr, payload_len,
1068 IPPROTO_TCP, check);
1069 }
1070 }
1071
mlx5e_shampo_get_packet_hd(struct mlx5e_rq * rq,u16 header_index)1072 static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index)
1073 {
1074 struct mlx5e_dma_info *last_head = &rq->mpwqe.shampo->info[header_index];
1075 u16 head_offset = (last_head->addr & (PAGE_SIZE - 1)) + rq->buff.headroom;
1076
1077 return page_address(last_head->page) + head_offset;
1078 }
1079
mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq * rq,struct iphdr * ipv4)1080 static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4)
1081 {
1082 int udp_off = rq->hw_gro_data->fk.control.thoff;
1083 struct sk_buff *skb = rq->hw_gro_data->skb;
1084 struct udphdr *uh;
1085
1086 uh = (struct udphdr *)(skb->data + udp_off);
1087 uh->len = htons(skb->len - udp_off);
1088
1089 if (uh->check)
1090 uh->check = ~udp_v4_check(skb->len - udp_off, ipv4->saddr,
1091 ipv4->daddr, 0);
1092
1093 skb->csum_start = (unsigned char *)uh - skb->head;
1094 skb->csum_offset = offsetof(struct udphdr, check);
1095
1096 skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
1097 }
1098
mlx5e_shampo_update_ipv6_udp_hdr(struct mlx5e_rq * rq,struct ipv6hdr * ipv6)1099 static void mlx5e_shampo_update_ipv6_udp_hdr(struct mlx5e_rq *rq, struct ipv6hdr *ipv6)
1100 {
1101 int udp_off = rq->hw_gro_data->fk.control.thoff;
1102 struct sk_buff *skb = rq->hw_gro_data->skb;
1103 struct udphdr *uh;
1104
1105 uh = (struct udphdr *)(skb->data + udp_off);
1106 uh->len = htons(skb->len - udp_off);
1107
1108 if (uh->check)
1109 uh->check = ~udp_v6_check(skb->len - udp_off, &ipv6->saddr,
1110 &ipv6->daddr, 0);
1111
1112 skb->csum_start = (unsigned char *)uh - skb->head;
1113 skb->csum_offset = offsetof(struct udphdr, check);
1114
1115 skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
1116 }
1117
mlx5e_shampo_update_fin_psh_flags(struct mlx5e_rq * rq,struct mlx5_cqe64 * cqe,struct tcphdr * skb_tcp_hd)1118 static void mlx5e_shampo_update_fin_psh_flags(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
1119 struct tcphdr *skb_tcp_hd)
1120 {
1121 u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe);
1122 struct tcphdr *last_tcp_hd;
1123 void *last_hd_addr;
1124
1125 last_hd_addr = mlx5e_shampo_get_packet_hd(rq, header_index);
1126 last_tcp_hd = last_hd_addr + ETH_HLEN + rq->hw_gro_data->fk.control.thoff;
1127 tcp_flag_word(skb_tcp_hd) |= tcp_flag_word(last_tcp_hd) & (TCP_FLAG_FIN | TCP_FLAG_PSH);
1128 }
1129
mlx5e_shampo_update_ipv4_tcp_hdr(struct mlx5e_rq * rq,struct iphdr * ipv4,struct mlx5_cqe64 * cqe,bool match)1130 static void mlx5e_shampo_update_ipv4_tcp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4,
1131 struct mlx5_cqe64 *cqe, bool match)
1132 {
1133 int tcp_off = rq->hw_gro_data->fk.control.thoff;
1134 struct sk_buff *skb = rq->hw_gro_data->skb;
1135 struct tcphdr *tcp;
1136
1137 tcp = (struct tcphdr *)(skb->data + tcp_off);
1138 if (match)
1139 mlx5e_shampo_update_fin_psh_flags(rq, cqe, tcp);
1140
1141 tcp->check = ~tcp_v4_check(skb->len - tcp_off, ipv4->saddr,
1142 ipv4->daddr, 0);
1143 skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
1144 if (ntohs(ipv4->id) == rq->hw_gro_data->second_ip_id)
1145 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID;
1146
1147 skb->csum_start = (unsigned char *)tcp - skb->head;
1148 skb->csum_offset = offsetof(struct tcphdr, check);
1149
1150 if (tcp->cwr)
1151 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
1152 }
1153
mlx5e_shampo_update_ipv6_tcp_hdr(struct mlx5e_rq * rq,struct ipv6hdr * ipv6,struct mlx5_cqe64 * cqe,bool match)1154 static void mlx5e_shampo_update_ipv6_tcp_hdr(struct mlx5e_rq *rq, struct ipv6hdr *ipv6,
1155 struct mlx5_cqe64 *cqe, bool match)
1156 {
1157 int tcp_off = rq->hw_gro_data->fk.control.thoff;
1158 struct sk_buff *skb = rq->hw_gro_data->skb;
1159 struct tcphdr *tcp;
1160
1161 tcp = (struct tcphdr *)(skb->data + tcp_off);
1162 if (match)
1163 mlx5e_shampo_update_fin_psh_flags(rq, cqe, tcp);
1164
1165 tcp->check = ~tcp_v6_check(skb->len - tcp_off, &ipv6->saddr,
1166 &ipv6->daddr, 0);
1167 skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
1168 skb->csum_start = (unsigned char *)tcp - skb->head;
1169 skb->csum_offset = offsetof(struct tcphdr, check);
1170
1171 if (tcp->cwr)
1172 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
1173 }
1174
mlx5e_shampo_update_hdr(struct mlx5e_rq * rq,struct mlx5_cqe64 * cqe,bool match)1175 static void mlx5e_shampo_update_hdr(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, bool match)
1176 {
1177 bool is_ipv4 = (rq->hw_gro_data->fk.basic.n_proto == htons(ETH_P_IP));
1178 struct sk_buff *skb = rq->hw_gro_data->skb;
1179
1180 skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
1181 skb->ip_summed = CHECKSUM_PARTIAL;
1182
1183 if (is_ipv4) {
1184 int nhoff = rq->hw_gro_data->fk.control.thoff - sizeof(struct iphdr);
1185 struct iphdr *ipv4 = (struct iphdr *)(skb->data + nhoff);
1186 __be16 newlen = htons(skb->len - nhoff);
1187
1188 csum_replace2(&ipv4->check, ipv4->tot_len, newlen);
1189 ipv4->tot_len = newlen;
1190
1191 if (ipv4->protocol == IPPROTO_TCP)
1192 mlx5e_shampo_update_ipv4_tcp_hdr(rq, ipv4, cqe, match);
1193 else
1194 mlx5e_shampo_update_ipv4_udp_hdr(rq, ipv4);
1195 } else {
1196 int nhoff = rq->hw_gro_data->fk.control.thoff - sizeof(struct ipv6hdr);
1197 struct ipv6hdr *ipv6 = (struct ipv6hdr *)(skb->data + nhoff);
1198
1199 ipv6->payload_len = htons(skb->len - nhoff - sizeof(*ipv6));
1200
1201 if (ipv6->nexthdr == IPPROTO_TCP)
1202 mlx5e_shampo_update_ipv6_tcp_hdr(rq, ipv6, cqe, match);
1203 else
1204 mlx5e_shampo_update_ipv6_udp_hdr(rq, ipv6);
1205 }
1206 }
1207
mlx5e_skb_set_hash(struct mlx5_cqe64 * cqe,struct sk_buff * skb)1208 static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe,
1209 struct sk_buff *skb)
1210 {
1211 u8 cht = cqe->rss_hash_type;
1212 int ht = (cht & CQE_RSS_HTYPE_L4) ? PKT_HASH_TYPE_L4 :
1213 (cht & CQE_RSS_HTYPE_IP) ? PKT_HASH_TYPE_L3 :
1214 PKT_HASH_TYPE_NONE;
1215 skb_set_hash(skb, be32_to_cpu(cqe->rss_hash_result), ht);
1216 }
1217
is_last_ethertype_ip(struct sk_buff * skb,int * network_depth,__be16 * proto)1218 static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth,
1219 __be16 *proto)
1220 {
1221 *proto = ((struct ethhdr *)skb->data)->h_proto;
1222 *proto = __vlan_get_protocol(skb, *proto, network_depth);
1223
1224 if (*proto == htons(ETH_P_IP))
1225 return pskb_may_pull(skb, *network_depth + sizeof(struct iphdr));
1226
1227 if (*proto == htons(ETH_P_IPV6))
1228 return pskb_may_pull(skb, *network_depth + sizeof(struct ipv6hdr));
1229
1230 return false;
1231 }
1232
mlx5e_enable_ecn(struct mlx5e_rq * rq,struct sk_buff * skb)1233 static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
1234 {
1235 int network_depth = 0;
1236 __be16 proto;
1237 void *ip;
1238 int rc;
1239
1240 if (unlikely(!is_last_ethertype_ip(skb, &network_depth, &proto)))
1241 return;
1242
1243 ip = skb->data + network_depth;
1244 rc = ((proto == htons(ETH_P_IP)) ? IP_ECN_set_ce((struct iphdr *)ip) :
1245 IP6_ECN_set_ce(skb, (struct ipv6hdr *)ip));
1246
1247 rq->stats->ecn_mark += !!rc;
1248 }
1249
get_ip_proto(struct sk_buff * skb,int network_depth,__be16 proto)1250 static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto)
1251 {
1252 void *ip_p = skb->data + network_depth;
1253
1254 return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol :
1255 ((struct ipv6hdr *)ip_p)->nexthdr;
1256 }
1257
1258 #define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
1259
1260 #define MAX_PADDING 8
1261
1262 static void
tail_padding_csum_slow(struct sk_buff * skb,int offset,int len,struct mlx5e_rq_stats * stats)1263 tail_padding_csum_slow(struct sk_buff *skb, int offset, int len,
1264 struct mlx5e_rq_stats *stats)
1265 {
1266 stats->csum_complete_tail_slow++;
1267 skb->csum = csum_block_add(skb->csum,
1268 skb_checksum(skb, offset, len, 0),
1269 offset);
1270 }
1271
1272 static void
tail_padding_csum(struct sk_buff * skb,int offset,struct mlx5e_rq_stats * stats)1273 tail_padding_csum(struct sk_buff *skb, int offset,
1274 struct mlx5e_rq_stats *stats)
1275 {
1276 u8 tail_padding[MAX_PADDING];
1277 int len = skb->len - offset;
1278 void *tail;
1279
1280 if (unlikely(len > MAX_PADDING)) {
1281 tail_padding_csum_slow(skb, offset, len, stats);
1282 return;
1283 }
1284
1285 tail = skb_header_pointer(skb, offset, len, tail_padding);
1286 if (unlikely(!tail)) {
1287 tail_padding_csum_slow(skb, offset, len, stats);
1288 return;
1289 }
1290
1291 stats->csum_complete_tail++;
1292 skb->csum = csum_block_add(skb->csum, csum_partial(tail, len, 0), offset);
1293 }
1294
1295 static void
mlx5e_skb_csum_fixup(struct sk_buff * skb,int network_depth,__be16 proto,struct mlx5e_rq_stats * stats)1296 mlx5e_skb_csum_fixup(struct sk_buff *skb, int network_depth, __be16 proto,
1297 struct mlx5e_rq_stats *stats)
1298 {
1299 struct ipv6hdr *ip6;
1300 struct iphdr *ip4;
1301 int pkt_len;
1302
1303 /* Fixup vlan headers, if any */
1304 if (network_depth > ETH_HLEN)
1305 /* CQE csum is calculated from the IP header and does
1306 * not cover VLAN headers (if present). This will add
1307 * the checksum manually.
1308 */
1309 skb->csum = csum_partial(skb->data + ETH_HLEN,
1310 network_depth - ETH_HLEN,
1311 skb->csum);
1312
1313 /* Fixup tail padding, if any */
1314 switch (proto) {
1315 case htons(ETH_P_IP):
1316 ip4 = (struct iphdr *)(skb->data + network_depth);
1317 pkt_len = network_depth + ntohs(ip4->tot_len);
1318 break;
1319 case htons(ETH_P_IPV6):
1320 ip6 = (struct ipv6hdr *)(skb->data + network_depth);
1321 pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len);
1322 break;
1323 default:
1324 return;
1325 }
1326
1327 if (likely(pkt_len >= skb->len))
1328 return;
1329
1330 tail_padding_csum(skb, pkt_len, stats);
1331 }
1332
mlx5e_handle_csum(struct net_device * netdev,struct mlx5_cqe64 * cqe,struct mlx5e_rq * rq,struct sk_buff * skb,bool lro)1333 static inline void mlx5e_handle_csum(struct net_device *netdev,
1334 struct mlx5_cqe64 *cqe,
1335 struct mlx5e_rq *rq,
1336 struct sk_buff *skb,
1337 bool lro)
1338 {
1339 struct mlx5e_rq_stats *stats = rq->stats;
1340 int network_depth = 0;
1341 __be16 proto;
1342
1343 if (unlikely(!(netdev->features & NETIF_F_RXCSUM)))
1344 goto csum_none;
1345
1346 if (lro) {
1347 skb->ip_summed = CHECKSUM_UNNECESSARY;
1348 stats->csum_unnecessary++;
1349 return;
1350 }
1351
1352 /* True when explicitly set via priv flag, or XDP prog is loaded */
1353 if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state) ||
1354 get_cqe_tls_offload(cqe))
1355 goto csum_unnecessary;
1356
1357 /* CQE csum doesn't cover padding octets in short ethernet
1358 * frames. And the pad field is appended prior to calculating
1359 * and appending the FCS field.
1360 *
1361 * Detecting these padded frames requires to verify and parse
1362 * IP headers, so we simply force all those small frames to be
1363 * CHECKSUM_UNNECESSARY even if they are not padded.
1364 */
1365 if (short_frame(skb->len))
1366 goto csum_unnecessary;
1367
1368 if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) {
1369 if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP))
1370 goto csum_unnecessary;
1371
1372 stats->csum_complete++;
1373 skb->ip_summed = CHECKSUM_COMPLETE;
1374 skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
1375
1376 if (test_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state))
1377 return; /* CQE csum covers all received bytes */
1378
1379 /* csum might need some fixups ...*/
1380 mlx5e_skb_csum_fixup(skb, network_depth, proto, stats);
1381 return;
1382 }
1383
1384 csum_unnecessary:
1385 if (likely((cqe->hds_ip_ext & CQE_L3_OK) &&
1386 (cqe->hds_ip_ext & CQE_L4_OK))) {
1387 skb->ip_summed = CHECKSUM_UNNECESSARY;
1388 if (cqe_is_tunneled(cqe)) {
1389 skb->csum_level = 1;
1390 skb->encapsulation = 1;
1391 stats->csum_unnecessary_inner++;
1392 return;
1393 }
1394 stats->csum_unnecessary++;
1395 return;
1396 }
1397 csum_none:
1398 skb->ip_summed = CHECKSUM_NONE;
1399 stats->csum_none++;
1400 }
1401
1402 #define MLX5E_CE_BIT_MASK 0x80
1403
mlx5e_build_rx_skb(struct mlx5_cqe64 * cqe,u32 cqe_bcnt,struct mlx5e_rq * rq,struct sk_buff * skb)1404 static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
1405 u32 cqe_bcnt,
1406 struct mlx5e_rq *rq,
1407 struct sk_buff *skb)
1408 {
1409 u8 lro_num_seg = be32_to_cpu(cqe->srqn) >> 24;
1410 struct mlx5e_rq_stats *stats = rq->stats;
1411 struct net_device *netdev = rq->netdev;
1412
1413 skb->mac_len = ETH_HLEN;
1414
1415 if (unlikely(get_cqe_tls_offload(cqe)))
1416 mlx5e_ktls_handle_rx_skb(rq, skb, cqe, &cqe_bcnt);
1417
1418 if (unlikely(mlx5_ipsec_is_rx_flow(cqe)))
1419 mlx5e_ipsec_offload_handle_rx_skb(netdev, skb, cqe);
1420
1421 if (unlikely(mlx5e_macsec_is_rx_flow(cqe)))
1422 mlx5e_macsec_offload_handle_rx_skb(netdev, skb, cqe);
1423
1424 if (lro_num_seg > 1) {
1425 mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt);
1426 skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg);
1427 /* Subtract one since we already counted this as one
1428 * "regular" packet in mlx5e_complete_rx_cqe()
1429 */
1430 stats->packets += lro_num_seg - 1;
1431 stats->lro_packets++;
1432 stats->lro_bytes += cqe_bcnt;
1433 }
1434
1435 if (unlikely(mlx5e_rx_hw_stamp(rq->tstamp)))
1436 skb_hwtstamps(skb)->hwtstamp = mlx5e_cqe_ts_to_ns(rq->ptp_cyc2time,
1437 rq->clock, get_cqe_ts(cqe));
1438 skb_record_rx_queue(skb, rq->ix);
1439
1440 if (likely(netdev->features & NETIF_F_RXHASH))
1441 mlx5e_skb_set_hash(cqe, skb);
1442
1443 if (cqe_has_vlan(cqe)) {
1444 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
1445 be16_to_cpu(cqe->vlan_info));
1446 stats->removed_vlan_packets++;
1447 }
1448
1449 skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK;
1450
1451 mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg);
1452 /* checking CE bit in cqe - MSB in ml_path field */
1453 if (unlikely(cqe->ml_path & MLX5E_CE_BIT_MASK))
1454 mlx5e_enable_ecn(rq, skb);
1455
1456 skb->protocol = eth_type_trans(skb, netdev);
1457
1458 if (unlikely(mlx5e_skb_is_multicast(skb)))
1459 stats->mcast_packets++;
1460 }
1461
mlx5e_shampo_complete_rx_cqe(struct mlx5e_rq * rq,struct mlx5_cqe64 * cqe,u32 cqe_bcnt,struct sk_buff * skb)1462 static void mlx5e_shampo_complete_rx_cqe(struct mlx5e_rq *rq,
1463 struct mlx5_cqe64 *cqe,
1464 u32 cqe_bcnt,
1465 struct sk_buff *skb)
1466 {
1467 struct mlx5e_rq_stats *stats = rq->stats;
1468
1469 stats->packets++;
1470 stats->gro_packets++;
1471 stats->bytes += cqe_bcnt;
1472 stats->gro_bytes += cqe_bcnt;
1473 if (NAPI_GRO_CB(skb)->count != 1)
1474 return;
1475 mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb);
1476 skb_reset_network_header(skb);
1477 if (!skb_flow_dissect_flow_keys(skb, &rq->hw_gro_data->fk, 0)) {
1478 napi_gro_receive(rq->cq.napi, skb);
1479 rq->hw_gro_data->skb = NULL;
1480 }
1481 }
1482
mlx5e_complete_rx_cqe(struct mlx5e_rq * rq,struct mlx5_cqe64 * cqe,u32 cqe_bcnt,struct sk_buff * skb)1483 static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq,
1484 struct mlx5_cqe64 *cqe,
1485 u32 cqe_bcnt,
1486 struct sk_buff *skb)
1487 {
1488 struct mlx5e_rq_stats *stats = rq->stats;
1489
1490 stats->packets++;
1491 stats->bytes += cqe_bcnt;
1492 mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb);
1493 }
1494
1495 static inline
mlx5e_build_linear_skb(struct mlx5e_rq * rq,void * va,u32 frag_size,u16 headroom,u32 cqe_bcnt,u32 metasize)1496 struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va,
1497 u32 frag_size, u16 headroom,
1498 u32 cqe_bcnt, u32 metasize)
1499 {
1500 struct sk_buff *skb = build_skb(va, frag_size);
1501
1502 if (unlikely(!skb)) {
1503 rq->stats->buff_alloc_err++;
1504 return NULL;
1505 }
1506
1507 skb_reserve(skb, headroom);
1508 skb_put(skb, cqe_bcnt);
1509
1510 if (metasize)
1511 skb_metadata_set(skb, metasize);
1512
1513 return skb;
1514 }
1515
mlx5e_fill_xdp_buff(struct mlx5e_rq * rq,void * va,u16 headroom,u32 len,struct xdp_buff * xdp)1516 static void mlx5e_fill_xdp_buff(struct mlx5e_rq *rq, void *va, u16 headroom,
1517 u32 len, struct xdp_buff *xdp)
1518 {
1519 xdp_init_buff(xdp, rq->buff.frame0_sz, &rq->xdp_rxq);
1520 xdp_prepare_buff(xdp, va, headroom, len, true);
1521 }
1522
1523 static struct sk_buff *
mlx5e_skb_from_cqe_linear(struct mlx5e_rq * rq,struct mlx5e_wqe_frag_info * wi,u32 cqe_bcnt)1524 mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
1525 u32 cqe_bcnt)
1526 {
1527 union mlx5e_alloc_unit *au = wi->au;
1528 u16 rx_headroom = rq->buff.headroom;
1529 struct bpf_prog *prog;
1530 struct sk_buff *skb;
1531 u32 metasize = 0;
1532 void *va, *data;
1533 dma_addr_t addr;
1534 u32 frag_size;
1535
1536 va = page_address(au->page) + wi->offset;
1537 data = va + rx_headroom;
1538 frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
1539
1540 addr = page_pool_get_dma_addr(au->page);
1541 dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset,
1542 frag_size, rq->buff.map_dir);
1543 net_prefetch(data);
1544
1545 prog = rcu_dereference(rq->xdp_prog);
1546 if (prog) {
1547 struct xdp_buff xdp;
1548
1549 net_prefetchw(va); /* xdp_frame data area */
1550 mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
1551 if (mlx5e_xdp_handle(rq, au->page, prog, &xdp))
1552 return NULL; /* page/packet was consumed by XDP */
1553
1554 rx_headroom = xdp.data - xdp.data_hard_start;
1555 metasize = xdp.data - xdp.data_meta;
1556 cqe_bcnt = xdp.data_end - xdp.data;
1557 }
1558 frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
1559 skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize);
1560 if (unlikely(!skb))
1561 return NULL;
1562
1563 /* queue up for recycling/reuse */
1564 page_ref_inc(au->page);
1565
1566 return skb;
1567 }
1568
1569 static struct sk_buff *
mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq * rq,struct mlx5e_wqe_frag_info * wi,u32 cqe_bcnt)1570 mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
1571 u32 cqe_bcnt)
1572 {
1573 struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
1574 struct mlx5e_wqe_frag_info *head_wi = wi;
1575 union mlx5e_alloc_unit *au = wi->au;
1576 u16 rx_headroom = rq->buff.headroom;
1577 struct skb_shared_info *sinfo;
1578 u32 frag_consumed_bytes;
1579 struct bpf_prog *prog;
1580 struct xdp_buff xdp;
1581 struct sk_buff *skb;
1582 dma_addr_t addr;
1583 u32 truesize;
1584 void *va;
1585
1586 va = page_address(au->page) + wi->offset;
1587 frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt);
1588
1589 addr = page_pool_get_dma_addr(au->page);
1590 dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset,
1591 rq->buff.frame0_sz, rq->buff.map_dir);
1592 net_prefetchw(va); /* xdp_frame data area */
1593 net_prefetch(va + rx_headroom);
1594
1595 mlx5e_fill_xdp_buff(rq, va, rx_headroom, frag_consumed_bytes, &xdp);
1596 sinfo = xdp_get_shared_info_from_buff(&xdp);
1597 truesize = 0;
1598
1599 cqe_bcnt -= frag_consumed_bytes;
1600 frag_info++;
1601 wi++;
1602
1603 while (cqe_bcnt) {
1604 skb_frag_t *frag;
1605
1606 au = wi->au;
1607
1608 frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt);
1609
1610 addr = page_pool_get_dma_addr(au->page);
1611 dma_sync_single_for_cpu(rq->pdev, addr + wi->offset,
1612 frag_consumed_bytes, rq->buff.map_dir);
1613
1614 if (!xdp_buff_has_frags(&xdp)) {
1615 /* Init on the first fragment to avoid cold cache access
1616 * when possible.
1617 */
1618 sinfo->nr_frags = 0;
1619 sinfo->xdp_frags_size = 0;
1620 xdp_buff_set_frags_flag(&xdp);
1621 }
1622
1623 frag = &sinfo->frags[sinfo->nr_frags++];
1624 __skb_frag_set_page(frag, au->page);
1625 skb_frag_off_set(frag, wi->offset);
1626 skb_frag_size_set(frag, frag_consumed_bytes);
1627
1628 if (page_is_pfmemalloc(au->page))
1629 xdp_buff_set_frag_pfmemalloc(&xdp);
1630
1631 sinfo->xdp_frags_size += frag_consumed_bytes;
1632 truesize += frag_info->frag_stride;
1633
1634 cqe_bcnt -= frag_consumed_bytes;
1635 frag_info++;
1636 wi++;
1637 }
1638
1639 au = head_wi->au;
1640
1641 prog = rcu_dereference(rq->xdp_prog);
1642 if (prog && mlx5e_xdp_handle(rq, au->page, prog, &xdp)) {
1643 if (test_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
1644 int i;
1645
1646 for (i = wi - head_wi; i < rq->wqe.info.num_frags; i++)
1647 mlx5e_put_rx_frag(rq, &head_wi[i], true);
1648 }
1649 return NULL; /* page/packet was consumed by XDP */
1650 }
1651
1652 skb = mlx5e_build_linear_skb(rq, xdp.data_hard_start, rq->buff.frame0_sz,
1653 xdp.data - xdp.data_hard_start,
1654 xdp.data_end - xdp.data,
1655 xdp.data - xdp.data_meta);
1656 if (unlikely(!skb))
1657 return NULL;
1658
1659 page_ref_inc(au->page);
1660
1661 if (unlikely(xdp_buff_has_frags(&xdp))) {
1662 int i;
1663
1664 /* sinfo->nr_frags is reset by build_skb, calculate again. */
1665 xdp_update_skb_shared_info(skb, wi - head_wi - 1,
1666 sinfo->xdp_frags_size, truesize,
1667 xdp_buff_is_frag_pfmemalloc(&xdp));
1668
1669 for (i = 0; i < sinfo->nr_frags; i++) {
1670 skb_frag_t *frag = &sinfo->frags[i];
1671
1672 page_ref_inc(skb_frag_page(frag));
1673 }
1674 }
1675
1676 return skb;
1677 }
1678
trigger_report(struct mlx5e_rq * rq,struct mlx5_cqe64 * cqe)1679 static void trigger_report(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
1680 {
1681 struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe;
1682 struct mlx5e_priv *priv = rq->priv;
1683
1684 if (cqe_syndrome_needs_recover(err_cqe->syndrome) &&
1685 !test_and_set_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state)) {
1686 mlx5e_dump_error_cqe(&rq->cq, rq->rqn, err_cqe);
1687 queue_work(priv->wq, &rq->recover_work);
1688 }
1689 }
1690
mlx5e_handle_rx_err_cqe(struct mlx5e_rq * rq,struct mlx5_cqe64 * cqe)1691 static void mlx5e_handle_rx_err_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
1692 {
1693 trigger_report(rq, cqe);
1694 rq->stats->wqe_err++;
1695 }
1696
mlx5e_handle_rx_cqe(struct mlx5e_rq * rq,struct mlx5_cqe64 * cqe)1697 static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
1698 {
1699 struct mlx5_wq_cyc *wq = &rq->wqe.wq;
1700 struct mlx5e_wqe_frag_info *wi;
1701 struct sk_buff *skb;
1702 u32 cqe_bcnt;
1703 u16 ci;
1704
1705 ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
1706 wi = get_frag(rq, ci);
1707 cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
1708
1709 if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
1710 mlx5e_handle_rx_err_cqe(rq, cqe);
1711 goto free_wqe;
1712 }
1713
1714 skb = INDIRECT_CALL_3(rq->wqe.skb_from_cqe,
1715 mlx5e_skb_from_cqe_linear,
1716 mlx5e_skb_from_cqe_nonlinear,
1717 mlx5e_xsk_skb_from_cqe_linear,
1718 rq, wi, cqe_bcnt);
1719 if (!skb) {
1720 /* probably for XDP */
1721 if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
1722 /* do not return page to cache,
1723 * it will be returned on XDP_TX completion.
1724 */
1725 goto wq_cyc_pop;
1726 }
1727 goto free_wqe;
1728 }
1729
1730 mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
1731
1732 if (mlx5e_cqe_regb_chain(cqe))
1733 if (!mlx5e_tc_update_skb(cqe, skb)) {
1734 dev_kfree_skb_any(skb);
1735 goto free_wqe;
1736 }
1737
1738 napi_gro_receive(rq->cq.napi, skb);
1739
1740 free_wqe:
1741 mlx5e_free_rx_wqe(rq, wi, true);
1742 wq_cyc_pop:
1743 mlx5_wq_cyc_pop(wq);
1744 }
1745
1746 #ifdef CONFIG_MLX5_ESWITCH
mlx5e_handle_rx_cqe_rep(struct mlx5e_rq * rq,struct mlx5_cqe64 * cqe)1747 static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
1748 {
1749 struct net_device *netdev = rq->netdev;
1750 struct mlx5e_priv *priv = netdev_priv(netdev);
1751 struct mlx5e_rep_priv *rpriv = priv->ppriv;
1752 struct mlx5_eswitch_rep *rep = rpriv->rep;
1753 struct mlx5_wq_cyc *wq = &rq->wqe.wq;
1754 struct mlx5e_wqe_frag_info *wi;
1755 struct sk_buff *skb;
1756 u32 cqe_bcnt;
1757 u16 ci;
1758
1759 ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
1760 wi = get_frag(rq, ci);
1761 cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
1762
1763 if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
1764 mlx5e_handle_rx_err_cqe(rq, cqe);
1765 goto free_wqe;
1766 }
1767
1768 skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
1769 mlx5e_skb_from_cqe_linear,
1770 mlx5e_skb_from_cqe_nonlinear,
1771 rq, wi, cqe_bcnt);
1772 if (!skb) {
1773 /* probably for XDP */
1774 if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
1775 /* do not return page to cache,
1776 * it will be returned on XDP_TX completion.
1777 */
1778 goto wq_cyc_pop;
1779 }
1780 goto free_wqe;
1781 }
1782
1783 mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
1784
1785 if (rep->vlan && skb_vlan_tag_present(skb))
1786 skb_vlan_pop(skb);
1787
1788 mlx5e_rep_tc_receive(cqe, rq, skb);
1789
1790 free_wqe:
1791 mlx5e_free_rx_wqe(rq, wi, true);
1792 wq_cyc_pop:
1793 mlx5_wq_cyc_pop(wq);
1794 }
1795
mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq * rq,struct mlx5_cqe64 * cqe)1796 static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
1797 {
1798 u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe);
1799 u16 wqe_id = be16_to_cpu(cqe->wqe_id);
1800 struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, wqe_id);
1801 u16 stride_ix = mpwrq_get_cqe_stride_index(cqe);
1802 u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz;
1803 u32 head_offset = wqe_offset & ((1 << rq->mpwqe.page_shift) - 1);
1804 u32 page_idx = wqe_offset >> rq->mpwqe.page_shift;
1805 struct mlx5e_rx_wqe_ll *wqe;
1806 struct mlx5_wq_ll *wq;
1807 struct sk_buff *skb;
1808 u16 cqe_bcnt;
1809
1810 wi->consumed_strides += cstrides;
1811
1812 if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
1813 mlx5e_handle_rx_err_cqe(rq, cqe);
1814 goto mpwrq_cqe_out;
1815 }
1816
1817 if (unlikely(mpwrq_is_filler_cqe(cqe))) {
1818 struct mlx5e_rq_stats *stats = rq->stats;
1819
1820 stats->mpwqe_filler_cqes++;
1821 stats->mpwqe_filler_strides += cstrides;
1822 goto mpwrq_cqe_out;
1823 }
1824
1825 cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
1826
1827 skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq,
1828 mlx5e_skb_from_cqe_mpwrq_linear,
1829 mlx5e_skb_from_cqe_mpwrq_nonlinear,
1830 rq, wi, cqe_bcnt, head_offset, page_idx);
1831 if (!skb)
1832 goto mpwrq_cqe_out;
1833
1834 mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
1835
1836 mlx5e_rep_tc_receive(cqe, rq, skb);
1837
1838 mpwrq_cqe_out:
1839 if (likely(wi->consumed_strides < rq->mpwqe.num_strides))
1840 return;
1841
1842 wq = &rq->mpwqe.wq;
1843 wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
1844 mlx5e_free_rx_mpwqe(rq, wi, true);
1845 mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
1846 }
1847
1848 const struct mlx5e_rx_handlers mlx5e_rx_handlers_rep = {
1849 .handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
1850 .handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq_rep,
1851 };
1852 #endif
1853
1854 static void
mlx5e_fill_skb_data(struct sk_buff * skb,struct mlx5e_rq * rq,union mlx5e_alloc_unit * au,u32 data_bcnt,u32 data_offset)1855 mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq,
1856 union mlx5e_alloc_unit *au, u32 data_bcnt, u32 data_offset)
1857 {
1858 net_prefetchw(skb->data);
1859
1860 while (data_bcnt) {
1861 /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */
1862 u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - data_offset, data_bcnt);
1863 unsigned int truesize;
1864
1865 if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
1866 truesize = pg_consumed_bytes;
1867 else
1868 truesize = ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
1869
1870 mlx5e_add_skb_frag(rq, skb, au, data_offset,
1871 pg_consumed_bytes, truesize);
1872
1873 data_bcnt -= pg_consumed_bytes;
1874 data_offset = 0;
1875 au++;
1876 }
1877 }
1878
1879 static struct sk_buff *
mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq * rq,struct mlx5e_mpw_info * wi,u16 cqe_bcnt,u32 head_offset,u32 page_idx)1880 mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
1881 u16 cqe_bcnt, u32 head_offset, u32 page_idx)
1882 {
1883 union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx];
1884 u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt);
1885 u32 frag_offset = head_offset + headlen;
1886 u32 byte_cnt = cqe_bcnt - headlen;
1887 union mlx5e_alloc_unit *head_au = au;
1888 struct sk_buff *skb;
1889 dma_addr_t addr;
1890
1891 skb = napi_alloc_skb(rq->cq.napi,
1892 ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long)));
1893 if (unlikely(!skb)) {
1894 rq->stats->buff_alloc_err++;
1895 return NULL;
1896 }
1897
1898 net_prefetchw(skb->data);
1899
1900 /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */
1901 if (unlikely(frag_offset >= PAGE_SIZE)) {
1902 au++;
1903 frag_offset -= PAGE_SIZE;
1904 }
1905
1906 mlx5e_fill_skb_data(skb, rq, au, byte_cnt, frag_offset);
1907 /* copy header */
1908 addr = page_pool_get_dma_addr(head_au->page);
1909 mlx5e_copy_skb_header(rq, skb, head_au->page, addr,
1910 head_offset, head_offset, headlen);
1911 /* skb linear part was allocated with headlen and aligned to long */
1912 skb->tail += headlen;
1913 skb->len += headlen;
1914
1915 return skb;
1916 }
1917
1918 static struct sk_buff *
mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq * rq,struct mlx5e_mpw_info * wi,u16 cqe_bcnt,u32 head_offset,u32 page_idx)1919 mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
1920 u16 cqe_bcnt, u32 head_offset, u32 page_idx)
1921 {
1922 union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx];
1923 u16 rx_headroom = rq->buff.headroom;
1924 struct bpf_prog *prog;
1925 struct sk_buff *skb;
1926 u32 metasize = 0;
1927 void *va, *data;
1928 dma_addr_t addr;
1929 u32 frag_size;
1930
1931 /* Check packet size. Note LRO doesn't use linear SKB */
1932 if (unlikely(cqe_bcnt > rq->hw_mtu)) {
1933 rq->stats->oversize_pkts_sw_drop++;
1934 return NULL;
1935 }
1936
1937 va = page_address(au->page) + head_offset;
1938 data = va + rx_headroom;
1939 frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
1940
1941 addr = page_pool_get_dma_addr(au->page);
1942 dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset,
1943 frag_size, rq->buff.map_dir);
1944 net_prefetch(data);
1945
1946 prog = rcu_dereference(rq->xdp_prog);
1947 if (prog) {
1948 struct xdp_buff xdp;
1949
1950 net_prefetchw(va); /* xdp_frame data area */
1951 mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
1952 if (mlx5e_xdp_handle(rq, au->page, prog, &xdp)) {
1953 if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
1954 __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
1955 return NULL; /* page/packet was consumed by XDP */
1956 }
1957
1958 rx_headroom = xdp.data - xdp.data_hard_start;
1959 metasize = xdp.data - xdp.data_meta;
1960 cqe_bcnt = xdp.data_end - xdp.data;
1961 }
1962 frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
1963 skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize);
1964 if (unlikely(!skb))
1965 return NULL;
1966
1967 /* queue up for recycling/reuse */
1968 page_ref_inc(au->page);
1969
1970 return skb;
1971 }
1972
1973 static struct sk_buff *
mlx5e_skb_from_cqe_shampo(struct mlx5e_rq * rq,struct mlx5e_mpw_info * wi,struct mlx5_cqe64 * cqe,u16 header_index)1974 mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
1975 struct mlx5_cqe64 *cqe, u16 header_index)
1976 {
1977 struct mlx5e_dma_info *head = &rq->mpwqe.shampo->info[header_index];
1978 u16 head_offset = head->addr & (PAGE_SIZE - 1);
1979 u16 head_size = cqe->shampo.header_size;
1980 u16 rx_headroom = rq->buff.headroom;
1981 struct sk_buff *skb = NULL;
1982 void *hdr, *data;
1983 u32 frag_size;
1984
1985 hdr = page_address(head->page) + head_offset;
1986 data = hdr + rx_headroom;
1987 frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + head_size);
1988
1989 if (likely(frag_size <= BIT(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE))) {
1990 /* build SKB around header */
1991 dma_sync_single_range_for_cpu(rq->pdev, head->addr, 0, frag_size, rq->buff.map_dir);
1992 prefetchw(hdr);
1993 prefetch(data);
1994 skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size, 0);
1995
1996 if (unlikely(!skb))
1997 return NULL;
1998
1999 /* queue up for recycling/reuse */
2000 page_ref_inc(head->page);
2001
2002 } else {
2003 /* allocate SKB and copy header for large header */
2004 rq->stats->gro_large_hds++;
2005 skb = napi_alloc_skb(rq->cq.napi,
2006 ALIGN(head_size, sizeof(long)));
2007 if (unlikely(!skb)) {
2008 rq->stats->buff_alloc_err++;
2009 return NULL;
2010 }
2011
2012 prefetchw(skb->data);
2013 mlx5e_copy_skb_header(rq, skb, head->page, head->addr,
2014 head_offset + rx_headroom,
2015 rx_headroom, head_size);
2016 /* skb linear part was allocated with headlen and aligned to long */
2017 skb->tail += head_size;
2018 skb->len += head_size;
2019 }
2020 return skb;
2021 }
2022
2023 static void
mlx5e_shampo_align_fragment(struct sk_buff * skb,u8 log_stride_sz)2024 mlx5e_shampo_align_fragment(struct sk_buff *skb, u8 log_stride_sz)
2025 {
2026 skb_frag_t *last_frag = &skb_shinfo(skb)->frags[skb_shinfo(skb)->nr_frags - 1];
2027 unsigned int frag_size = skb_frag_size(last_frag);
2028 unsigned int frag_truesize;
2029
2030 frag_truesize = ALIGN(frag_size, BIT(log_stride_sz));
2031 skb->truesize += frag_truesize - frag_size;
2032 }
2033
2034 static void
mlx5e_shampo_flush_skb(struct mlx5e_rq * rq,struct mlx5_cqe64 * cqe,bool match)2035 mlx5e_shampo_flush_skb(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, bool match)
2036 {
2037 struct sk_buff *skb = rq->hw_gro_data->skb;
2038 struct mlx5e_rq_stats *stats = rq->stats;
2039
2040 stats->gro_skbs++;
2041 if (likely(skb_shinfo(skb)->nr_frags))
2042 mlx5e_shampo_align_fragment(skb, rq->mpwqe.log_stride_sz);
2043 if (NAPI_GRO_CB(skb)->count > 1)
2044 mlx5e_shampo_update_hdr(rq, cqe, match);
2045 napi_gro_receive(rq->cq.napi, skb);
2046 rq->hw_gro_data->skb = NULL;
2047 }
2048
2049 static bool
mlx5e_hw_gro_skb_has_enough_space(struct sk_buff * skb,u16 data_bcnt)2050 mlx5e_hw_gro_skb_has_enough_space(struct sk_buff *skb, u16 data_bcnt)
2051 {
2052 int nr_frags = skb_shinfo(skb)->nr_frags;
2053
2054 return PAGE_SIZE * nr_frags + data_bcnt <= GRO_LEGACY_MAX_SIZE;
2055 }
2056
2057 static void
mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq * rq,u16 header_index)2058 mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index)
2059 {
2060 struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
2061 u64 addr = shampo->info[header_index].addr;
2062
2063 if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) {
2064 shampo->info[header_index].addr = ALIGN_DOWN(addr, PAGE_SIZE);
2065 mlx5e_page_release_dynamic(rq, shampo->info[header_index].page, true);
2066 }
2067 bitmap_clear(shampo->bitmap, header_index, 1);
2068 }
2069
mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq * rq,struct mlx5_cqe64 * cqe)2070 static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
2071 {
2072 u16 data_bcnt = mpwrq_get_cqe_byte_cnt(cqe) - cqe->shampo.header_size;
2073 u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe);
2074 u32 wqe_offset = be32_to_cpu(cqe->shampo.data_offset);
2075 u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe);
2076 u32 data_offset = wqe_offset & (PAGE_SIZE - 1);
2077 u32 cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
2078 u16 wqe_id = be16_to_cpu(cqe->wqe_id);
2079 u32 page_idx = wqe_offset >> PAGE_SHIFT;
2080 u16 head_size = cqe->shampo.header_size;
2081 struct sk_buff **skb = &rq->hw_gro_data->skb;
2082 bool flush = cqe->shampo.flush;
2083 bool match = cqe->shampo.match;
2084 struct mlx5e_rq_stats *stats = rq->stats;
2085 struct mlx5e_rx_wqe_ll *wqe;
2086 union mlx5e_alloc_unit *au;
2087 struct mlx5e_mpw_info *wi;
2088 struct mlx5_wq_ll *wq;
2089
2090 wi = mlx5e_get_mpw_info(rq, wqe_id);
2091 wi->consumed_strides += cstrides;
2092
2093 if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
2094 mlx5e_handle_rx_err_cqe(rq, cqe);
2095 goto mpwrq_cqe_out;
2096 }
2097
2098 if (unlikely(mpwrq_is_filler_cqe(cqe))) {
2099 stats->mpwqe_filler_cqes++;
2100 stats->mpwqe_filler_strides += cstrides;
2101 goto mpwrq_cqe_out;
2102 }
2103
2104 stats->gro_match_packets += match;
2105
2106 if (*skb && (!match || !(mlx5e_hw_gro_skb_has_enough_space(*skb, data_bcnt)))) {
2107 match = false;
2108 mlx5e_shampo_flush_skb(rq, cqe, match);
2109 }
2110
2111 if (!*skb) {
2112 if (likely(head_size))
2113 *skb = mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index);
2114 else
2115 *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe_bcnt, data_offset,
2116 page_idx);
2117 if (unlikely(!*skb))
2118 goto free_hd_entry;
2119
2120 NAPI_GRO_CB(*skb)->count = 1;
2121 skb_shinfo(*skb)->gso_size = cqe_bcnt - head_size;
2122 } else {
2123 NAPI_GRO_CB(*skb)->count++;
2124 if (NAPI_GRO_CB(*skb)->count == 2 &&
2125 rq->hw_gro_data->fk.basic.n_proto == htons(ETH_P_IP)) {
2126 void *hd_addr = mlx5e_shampo_get_packet_hd(rq, header_index);
2127 int nhoff = ETH_HLEN + rq->hw_gro_data->fk.control.thoff -
2128 sizeof(struct iphdr);
2129 struct iphdr *iph = (struct iphdr *)(hd_addr + nhoff);
2130
2131 rq->hw_gro_data->second_ip_id = ntohs(iph->id);
2132 }
2133 }
2134
2135 if (likely(head_size)) {
2136 au = &wi->alloc_units[page_idx];
2137 mlx5e_fill_skb_data(*skb, rq, au, data_bcnt, data_offset);
2138 }
2139
2140 mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb);
2141 if (flush)
2142 mlx5e_shampo_flush_skb(rq, cqe, match);
2143 free_hd_entry:
2144 mlx5e_free_rx_shampo_hd_entry(rq, header_index);
2145 mpwrq_cqe_out:
2146 if (likely(wi->consumed_strides < rq->mpwqe.num_strides))
2147 return;
2148
2149 wq = &rq->mpwqe.wq;
2150 wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
2151 mlx5e_free_rx_mpwqe(rq, wi, true);
2152 mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
2153 }
2154
mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq * rq,struct mlx5_cqe64 * cqe)2155 static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
2156 {
2157 u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe);
2158 u16 wqe_id = be16_to_cpu(cqe->wqe_id);
2159 struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, wqe_id);
2160 u16 stride_ix = mpwrq_get_cqe_stride_index(cqe);
2161 u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz;
2162 u32 head_offset = wqe_offset & ((1 << rq->mpwqe.page_shift) - 1);
2163 u32 page_idx = wqe_offset >> rq->mpwqe.page_shift;
2164 struct mlx5e_rx_wqe_ll *wqe;
2165 struct mlx5_wq_ll *wq;
2166 struct sk_buff *skb;
2167 u16 cqe_bcnt;
2168
2169 wi->consumed_strides += cstrides;
2170
2171 if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
2172 mlx5e_handle_rx_err_cqe(rq, cqe);
2173 goto mpwrq_cqe_out;
2174 }
2175
2176 if (unlikely(mpwrq_is_filler_cqe(cqe))) {
2177 struct mlx5e_rq_stats *stats = rq->stats;
2178
2179 stats->mpwqe_filler_cqes++;
2180 stats->mpwqe_filler_strides += cstrides;
2181 goto mpwrq_cqe_out;
2182 }
2183
2184 cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
2185
2186 skb = INDIRECT_CALL_3(rq->mpwqe.skb_from_cqe_mpwrq,
2187 mlx5e_skb_from_cqe_mpwrq_linear,
2188 mlx5e_skb_from_cqe_mpwrq_nonlinear,
2189 mlx5e_xsk_skb_from_cqe_mpwrq_linear,
2190 rq, wi, cqe_bcnt, head_offset, page_idx);
2191 if (!skb)
2192 goto mpwrq_cqe_out;
2193
2194 mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
2195
2196 if (mlx5e_cqe_regb_chain(cqe))
2197 if (!mlx5e_tc_update_skb(cqe, skb)) {
2198 dev_kfree_skb_any(skb);
2199 goto mpwrq_cqe_out;
2200 }
2201
2202 napi_gro_receive(rq->cq.napi, skb);
2203
2204 mpwrq_cqe_out:
2205 if (likely(wi->consumed_strides < rq->mpwqe.num_strides))
2206 return;
2207
2208 wq = &rq->mpwqe.wq;
2209 wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
2210 mlx5e_free_rx_mpwqe(rq, wi, true);
2211 mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
2212 }
2213
mlx5e_poll_rx_cq(struct mlx5e_cq * cq,int budget)2214 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
2215 {
2216 struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
2217 struct mlx5_cqwq *cqwq = &cq->wq;
2218 struct mlx5_cqe64 *cqe;
2219 int work_done = 0;
2220
2221 if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
2222 return 0;
2223
2224 if (rq->cqd.left) {
2225 work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget);
2226 if (work_done >= budget)
2227 goto out;
2228 }
2229
2230 cqe = mlx5_cqwq_get_cqe(cqwq);
2231 if (!cqe) {
2232 if (unlikely(work_done))
2233 goto out;
2234 return 0;
2235 }
2236
2237 do {
2238 if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) {
2239 work_done +=
2240 mlx5e_decompress_cqes_start(rq, cqwq,
2241 budget - work_done);
2242 continue;
2243 }
2244
2245 mlx5_cqwq_pop(cqwq);
2246
2247 INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
2248 mlx5e_handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq_shampo,
2249 rq, cqe);
2250 } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq)));
2251
2252 out:
2253 if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) && rq->hw_gro_data->skb)
2254 mlx5e_shampo_flush_skb(rq, NULL, false);
2255
2256 if (rcu_access_pointer(rq->xdp_prog))
2257 mlx5e_xdp_rx_poll_complete(rq);
2258
2259 mlx5_cqwq_update_db_record(cqwq);
2260
2261 /* ensure cq space is freed before enabling more cqes */
2262 wmb();
2263
2264 return work_done;
2265 }
2266
2267 #ifdef CONFIG_MLX5_CORE_IPOIB
2268
2269 #define MLX5_IB_GRH_SGID_OFFSET 8
2270 #define MLX5_IB_GRH_DGID_OFFSET 24
2271 #define MLX5_GID_SIZE 16
2272
mlx5i_complete_rx_cqe(struct mlx5e_rq * rq,struct mlx5_cqe64 * cqe,u32 cqe_bcnt,struct sk_buff * skb)2273 static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
2274 struct mlx5_cqe64 *cqe,
2275 u32 cqe_bcnt,
2276 struct sk_buff *skb)
2277 {
2278 struct hwtstamp_config *tstamp;
2279 struct mlx5e_rq_stats *stats;
2280 struct net_device *netdev;
2281 struct mlx5e_priv *priv;
2282 char *pseudo_header;
2283 u32 flags_rqpn;
2284 u32 qpn;
2285 u8 *dgid;
2286 u8 g;
2287
2288 qpn = be32_to_cpu(cqe->sop_drop_qpn) & 0xffffff;
2289 netdev = mlx5i_pkey_get_netdev(rq->netdev, qpn);
2290
2291 /* No mapping present, cannot process SKB. This might happen if a child
2292 * interface is going down while having unprocessed CQEs on parent RQ
2293 */
2294 if (unlikely(!netdev)) {
2295 /* TODO: add drop counters support */
2296 skb->dev = NULL;
2297 pr_warn_once("Unable to map QPN %u to dev - dropping skb\n", qpn);
2298 return;
2299 }
2300
2301 priv = mlx5i_epriv(netdev);
2302 tstamp = &priv->tstamp;
2303 stats = &priv->channel_stats[rq->ix]->rq;
2304
2305 flags_rqpn = be32_to_cpu(cqe->flags_rqpn);
2306 g = (flags_rqpn >> 28) & 3;
2307 dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET;
2308 if ((!g) || dgid[0] != 0xff)
2309 skb->pkt_type = PACKET_HOST;
2310 else if (memcmp(dgid, netdev->broadcast + 4, MLX5_GID_SIZE) == 0)
2311 skb->pkt_type = PACKET_BROADCAST;
2312 else
2313 skb->pkt_type = PACKET_MULTICAST;
2314
2315 /* Drop packets that this interface sent, ie multicast packets
2316 * that the HCA has replicated.
2317 */
2318 if (g && (qpn == (flags_rqpn & 0xffffff)) &&
2319 (memcmp(netdev->dev_addr + 4, skb->data + MLX5_IB_GRH_SGID_OFFSET,
2320 MLX5_GID_SIZE) == 0)) {
2321 skb->dev = NULL;
2322 return;
2323 }
2324
2325 skb_pull(skb, MLX5_IB_GRH_BYTES);
2326
2327 skb->protocol = *((__be16 *)(skb->data));
2328
2329 if (netdev->features & NETIF_F_RXCSUM) {
2330 skb->ip_summed = CHECKSUM_COMPLETE;
2331 skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
2332 stats->csum_complete++;
2333 } else {
2334 skb->ip_summed = CHECKSUM_NONE;
2335 stats->csum_none++;
2336 }
2337
2338 if (unlikely(mlx5e_rx_hw_stamp(tstamp)))
2339 skb_hwtstamps(skb)->hwtstamp = mlx5e_cqe_ts_to_ns(rq->ptp_cyc2time,
2340 rq->clock, get_cqe_ts(cqe));
2341 skb_record_rx_queue(skb, rq->ix);
2342
2343 if (likely(netdev->features & NETIF_F_RXHASH))
2344 mlx5e_skb_set_hash(cqe, skb);
2345
2346 /* 20 bytes of ipoib header and 4 for encap existing */
2347 pseudo_header = skb_push(skb, MLX5_IPOIB_PSEUDO_LEN);
2348 memset(pseudo_header, 0, MLX5_IPOIB_PSEUDO_LEN);
2349 skb_reset_mac_header(skb);
2350 skb_pull(skb, MLX5_IPOIB_HARD_LEN);
2351
2352 skb->dev = netdev;
2353
2354 stats->packets++;
2355 stats->bytes += cqe_bcnt;
2356 }
2357
mlx5i_handle_rx_cqe(struct mlx5e_rq * rq,struct mlx5_cqe64 * cqe)2358 static void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
2359 {
2360 struct mlx5_wq_cyc *wq = &rq->wqe.wq;
2361 struct mlx5e_wqe_frag_info *wi;
2362 struct sk_buff *skb;
2363 u32 cqe_bcnt;
2364 u16 ci;
2365
2366 ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
2367 wi = get_frag(rq, ci);
2368 cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
2369
2370 if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
2371 rq->stats->wqe_err++;
2372 goto wq_free_wqe;
2373 }
2374
2375 skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
2376 mlx5e_skb_from_cqe_linear,
2377 mlx5e_skb_from_cqe_nonlinear,
2378 rq, wi, cqe_bcnt);
2379 if (!skb)
2380 goto wq_free_wqe;
2381
2382 mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
2383 if (unlikely(!skb->dev)) {
2384 dev_kfree_skb_any(skb);
2385 goto wq_free_wqe;
2386 }
2387 napi_gro_receive(rq->cq.napi, skb);
2388
2389 wq_free_wqe:
2390 mlx5e_free_rx_wqe(rq, wi, true);
2391 mlx5_wq_cyc_pop(wq);
2392 }
2393
2394 const struct mlx5e_rx_handlers mlx5i_rx_handlers = {
2395 .handle_rx_cqe = mlx5i_handle_rx_cqe,
2396 .handle_rx_cqe_mpwqe = NULL, /* Not supported */
2397 };
2398 #endif /* CONFIG_MLX5_CORE_IPOIB */
2399
mlx5e_rq_set_handlers(struct mlx5e_rq * rq,struct mlx5e_params * params,bool xsk)2400 int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool xsk)
2401 {
2402 struct net_device *netdev = rq->netdev;
2403 struct mlx5_core_dev *mdev = rq->mdev;
2404 struct mlx5e_priv *priv = rq->priv;
2405
2406 switch (rq->wq_type) {
2407 case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
2408 rq->mpwqe.skb_from_cqe_mpwrq = xsk ?
2409 mlx5e_xsk_skb_from_cqe_mpwrq_linear :
2410 mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ?
2411 mlx5e_skb_from_cqe_mpwrq_linear :
2412 mlx5e_skb_from_cqe_mpwrq_nonlinear;
2413 rq->post_wqes = mlx5e_post_rx_mpwqes;
2414 rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
2415
2416 if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
2417 rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe_shampo;
2418 if (!rq->handle_rx_cqe) {
2419 netdev_err(netdev, "RX handler of SHAMPO MPWQE RQ is not set\n");
2420 return -EINVAL;
2421 }
2422 } else {
2423 rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe;
2424 if (!rq->handle_rx_cqe) {
2425 netdev_err(netdev, "RX handler of MPWQE RQ is not set\n");
2426 return -EINVAL;
2427 }
2428 }
2429
2430 break;
2431 default: /* MLX5_WQ_TYPE_CYCLIC */
2432 rq->wqe.skb_from_cqe = xsk ?
2433 mlx5e_xsk_skb_from_cqe_linear :
2434 mlx5e_rx_is_linear_skb(mdev, params, NULL) ?
2435 mlx5e_skb_from_cqe_linear :
2436 mlx5e_skb_from_cqe_nonlinear;
2437 rq->post_wqes = mlx5e_post_rx_wqes;
2438 rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
2439 rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe;
2440 if (!rq->handle_rx_cqe) {
2441 netdev_err(netdev, "RX handler of RQ is not set\n");
2442 return -EINVAL;
2443 }
2444 }
2445
2446 return 0;
2447 }
2448
mlx5e_trap_handle_rx_cqe(struct mlx5e_rq * rq,struct mlx5_cqe64 * cqe)2449 static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
2450 {
2451 struct mlx5e_priv *priv = netdev_priv(rq->netdev);
2452 struct mlx5_wq_cyc *wq = &rq->wqe.wq;
2453 struct mlx5e_wqe_frag_info *wi;
2454 struct devlink_port *dl_port;
2455 struct sk_buff *skb;
2456 u32 cqe_bcnt;
2457 u16 trap_id;
2458 u16 ci;
2459
2460 trap_id = get_cqe_flow_tag(cqe);
2461 ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
2462 wi = get_frag(rq, ci);
2463 cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
2464
2465 if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
2466 rq->stats->wqe_err++;
2467 goto free_wqe;
2468 }
2469
2470 skb = mlx5e_skb_from_cqe_nonlinear(rq, wi, cqe_bcnt);
2471 if (!skb)
2472 goto free_wqe;
2473
2474 mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
2475 skb_push(skb, ETH_HLEN);
2476
2477 dl_port = mlx5e_devlink_get_dl_port(priv);
2478 mlx5_devlink_trap_report(rq->mdev, trap_id, skb, dl_port);
2479 dev_kfree_skb_any(skb);
2480
2481 free_wqe:
2482 mlx5e_free_rx_wqe(rq, wi, false);
2483 mlx5_wq_cyc_pop(wq);
2484 }
2485
mlx5e_rq_set_trap_handlers(struct mlx5e_rq * rq,struct mlx5e_params * params)2486 void mlx5e_rq_set_trap_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params)
2487 {
2488 rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(rq->mdev, params, NULL) ?
2489 mlx5e_skb_from_cqe_linear :
2490 mlx5e_skb_from_cqe_nonlinear;
2491 rq->post_wqes = mlx5e_post_rx_wqes;
2492 rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
2493 rq->handle_rx_cqe = mlx5e_trap_handle_rx_cqe;
2494 }
2495