1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include "rx.h"
5 #include "en/xdp.h"
6 #include <net/xdp_sock_drv.h>
7 #include <linux/filter.h>
8 
9 /* RX data path */
10 
mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq * rq,u16 ix)11 int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
12 {
13 	struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
14 	struct mlx5e_icosq *icosq = rq->icosq;
15 	struct mlx5_wq_cyc *wq = &icosq->wq;
16 	struct mlx5e_umr_wqe *umr_wqe;
17 	int batch, i;
18 	u32 offset; /* 17-bit value with MTT. */
19 	u16 pi;
20 
21 	if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe)))
22 		goto err;
23 
24 	BUILD_BUG_ON(sizeof(wi->alloc_units[0]) != sizeof(wi->alloc_units[0].xsk));
25 	batch = xsk_buff_alloc_batch(rq->xsk_pool, (struct xdp_buff **)wi->alloc_units,
26 				     rq->mpwqe.pages_per_wqe);
27 
28 	/* If batch < pages_per_wqe, either:
29 	 * 1. Some (or all) descriptors were invalid.
30 	 * 2. dma_need_sync is true, and it fell back to allocating one frame.
31 	 * In either case, try to continue allocating frames one by one, until
32 	 * the first error, which will mean there are no more valid descriptors.
33 	 */
34 	for (; batch < rq->mpwqe.pages_per_wqe; batch++) {
35 		wi->alloc_units[batch].xsk = xsk_buff_alloc(rq->xsk_pool);
36 		if (unlikely(!wi->alloc_units[batch].xsk))
37 			goto err_reuse_batch;
38 	}
39 
40 	pi = mlx5e_icosq_get_next_pi(icosq, rq->mpwqe.umr_wqebbs);
41 	umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
42 	memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe));
43 
44 	if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) {
45 		for (i = 0; i < batch; i++) {
46 			dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
47 
48 			umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
49 				.ptag = cpu_to_be64(addr | MLX5_EN_WR),
50 			};
51 		}
52 	} else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) {
53 		for (i = 0; i < batch; i++) {
54 			dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
55 
56 			umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
57 				.key = rq->mkey_be,
58 				.va = cpu_to_be64(addr),
59 			};
60 		}
61 	} else if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) {
62 		u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2);
63 
64 		for (i = 0; i < batch; i++) {
65 			dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
66 
67 			umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) {
68 				.key = rq->mkey_be,
69 				.va = cpu_to_be64(addr),
70 			};
71 			umr_wqe->inline_ksms[(i << 2) + 1] = (struct mlx5_ksm) {
72 				.key = rq->mkey_be,
73 				.va = cpu_to_be64(addr + mapping_size),
74 			};
75 			umr_wqe->inline_ksms[(i << 2) + 2] = (struct mlx5_ksm) {
76 				.key = rq->mkey_be,
77 				.va = cpu_to_be64(addr + mapping_size * 2),
78 			};
79 			umr_wqe->inline_ksms[(i << 2) + 3] = (struct mlx5_ksm) {
80 				.key = rq->mkey_be,
81 				.va = cpu_to_be64(rq->wqe_overflow.addr),
82 			};
83 		}
84 	} else {
85 		__be32 pad_size = cpu_to_be32((1 << rq->mpwqe.page_shift) -
86 					      rq->xsk_pool->chunk_size);
87 		__be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size);
88 
89 		for (i = 0; i < batch; i++) {
90 			dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
91 
92 			umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) {
93 				.key = rq->mkey_be,
94 				.va = cpu_to_be64(addr),
95 				.bcount = frame_size,
96 			};
97 			umr_wqe->inline_klms[(i << 1) + 1] = (struct mlx5_klm) {
98 				.key = rq->mkey_be,
99 				.va = cpu_to_be64(rq->wqe_overflow.addr),
100 				.bcount = pad_size,
101 			};
102 		}
103 	}
104 
105 	bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
106 	wi->consumed_strides = 0;
107 
108 	umr_wqe->ctrl.opmod_idx_opcode =
109 		cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR);
110 
111 	/* Optimized for speed: keep in sync with mlx5e_mpwrq_umr_entry_size. */
112 	offset = ix * rq->mpwqe.mtts_per_wqe;
113 	if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED))
114 		offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD;
115 	else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED))
116 		offset = offset * sizeof(struct mlx5_klm) * 2 / MLX5_OCTWORD;
117 	else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE))
118 		offset = offset * sizeof(struct mlx5_ksm) * 4 / MLX5_OCTWORD;
119 	umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
120 
121 	icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
122 		.wqe_type = MLX5E_ICOSQ_WQE_UMR_RX,
123 		.num_wqebbs = rq->mpwqe.umr_wqebbs,
124 		.umr.rq = rq,
125 	};
126 
127 	icosq->pc += rq->mpwqe.umr_wqebbs;
128 
129 	icosq->doorbell_cseg = &umr_wqe->ctrl;
130 
131 	return 0;
132 
133 err_reuse_batch:
134 	while (--batch >= 0)
135 		xsk_buff_free(wi->alloc_units[batch].xsk);
136 
137 err:
138 	rq->stats->buff_alloc_err++;
139 	return -ENOMEM;
140 }
141 
mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq * rq,u16 ix,int wqe_bulk)142 int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
143 {
144 	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
145 	struct xdp_buff **buffs;
146 	u32 contig, alloc;
147 	int i;
148 
149 	/* mlx5e_init_frags_partition creates a 1:1 mapping between
150 	 * rq->wqe.frags and rq->wqe.alloc_units, which allows us to
151 	 * allocate XDP buffers straight into alloc_units.
152 	 */
153 	BUILD_BUG_ON(sizeof(rq->wqe.alloc_units[0]) !=
154 		     sizeof(rq->wqe.alloc_units[0].xsk));
155 	buffs = (struct xdp_buff **)rq->wqe.alloc_units;
156 	contig = mlx5_wq_cyc_get_size(wq) - ix;
157 	if (wqe_bulk <= contig) {
158 		alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk);
159 	} else {
160 		alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, contig);
161 		if (likely(alloc == contig))
162 			alloc += xsk_buff_alloc_batch(rq->xsk_pool, buffs, wqe_bulk - contig);
163 	}
164 
165 	for (i = 0; i < alloc; i++) {
166 		int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
167 		struct mlx5e_wqe_frag_info *frag;
168 		struct mlx5e_rx_wqe_cyc *wqe;
169 		dma_addr_t addr;
170 
171 		wqe = mlx5_wq_cyc_get_wqe(wq, j);
172 		/* Assumes log_num_frags == 0. */
173 		frag = &rq->wqe.frags[j];
174 
175 		addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk);
176 		wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
177 	}
178 
179 	return alloc;
180 }
181 
mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq * rq,u16 ix,int wqe_bulk)182 int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
183 {
184 	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
185 	int i;
186 
187 	for (i = 0; i < wqe_bulk; i++) {
188 		int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
189 		struct mlx5e_wqe_frag_info *frag;
190 		struct mlx5e_rx_wqe_cyc *wqe;
191 		dma_addr_t addr;
192 
193 		wqe = mlx5_wq_cyc_get_wqe(wq, j);
194 		/* Assumes log_num_frags == 0. */
195 		frag = &rq->wqe.frags[j];
196 
197 		frag->au->xsk = xsk_buff_alloc(rq->xsk_pool);
198 		if (unlikely(!frag->au->xsk))
199 			return i;
200 
201 		addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk);
202 		wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
203 	}
204 
205 	return wqe_bulk;
206 }
207 
mlx5e_xsk_construct_skb(struct mlx5e_rq * rq,struct xdp_buff * xdp)208 static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, struct xdp_buff *xdp)
209 {
210 	u32 totallen = xdp->data_end - xdp->data_meta;
211 	u32 metalen = xdp->data - xdp->data_meta;
212 	struct sk_buff *skb;
213 
214 	skb = napi_alloc_skb(rq->cq.napi, totallen);
215 	if (unlikely(!skb)) {
216 		rq->stats->buff_alloc_err++;
217 		return NULL;
218 	}
219 
220 	skb_put_data(skb, xdp->data_meta, totallen);
221 
222 	if (metalen) {
223 		skb_metadata_set(skb, metalen);
224 		__skb_pull(skb, metalen);
225 	}
226 
227 	return skb;
228 }
229 
mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq * rq,struct mlx5e_mpw_info * wi,u16 cqe_bcnt,u32 head_offset,u32 page_idx)230 struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
231 						    struct mlx5e_mpw_info *wi,
232 						    u16 cqe_bcnt,
233 						    u32 head_offset,
234 						    u32 page_idx)
235 {
236 	struct xdp_buff *xdp = wi->alloc_units[page_idx].xsk;
237 	struct bpf_prog *prog;
238 
239 	/* Check packet size. Note LRO doesn't use linear SKB */
240 	if (unlikely(cqe_bcnt > rq->hw_mtu)) {
241 		rq->stats->oversize_pkts_sw_drop++;
242 		return NULL;
243 	}
244 
245 	/* head_offset is not used in this function, because xdp->data and the
246 	 * DMA address point directly to the necessary place. Furthermore, in
247 	 * the current implementation, UMR pages are mapped to XSK frames, so
248 	 * head_offset should always be 0.
249 	 */
250 	WARN_ON_ONCE(head_offset);
251 
252 	xsk_buff_set_size(xdp, cqe_bcnt);
253 	xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
254 	net_prefetch(xdp->data);
255 
256 	/* Possible flows:
257 	 * - XDP_REDIRECT to XSKMAP:
258 	 *   The page is owned by the userspace from now.
259 	 * - XDP_TX and other XDP_REDIRECTs:
260 	 *   The page was returned by ZCA and recycled.
261 	 * - XDP_DROP:
262 	 *   Recycle the page.
263 	 * - XDP_PASS:
264 	 *   Allocate an SKB, copy the data and recycle the page.
265 	 *
266 	 * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its
267 	 * size is the same as the Driver RX Ring's size, and pages for WQEs are
268 	 * allocated first from the Reuse Ring, so it has enough space.
269 	 */
270 
271 	prog = rcu_dereference(rq->xdp_prog);
272 	if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp))) {
273 		if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
274 			__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
275 		return NULL; /* page/packet was consumed by XDP */
276 	}
277 
278 	/* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the
279 	 * frame. On SKB allocation failure, NULL is returned.
280 	 */
281 	return mlx5e_xsk_construct_skb(rq, xdp);
282 }
283 
mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq * rq,struct mlx5e_wqe_frag_info * wi,u32 cqe_bcnt)284 struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
285 					      struct mlx5e_wqe_frag_info *wi,
286 					      u32 cqe_bcnt)
287 {
288 	struct xdp_buff *xdp = wi->au->xsk;
289 	struct bpf_prog *prog;
290 
291 	/* wi->offset is not used in this function, because xdp->data and the
292 	 * DMA address point directly to the necessary place. Furthermore, the
293 	 * XSK allocator allocates frames per packet, instead of pages, so
294 	 * wi->offset should always be 0.
295 	 */
296 	WARN_ON_ONCE(wi->offset);
297 
298 	xsk_buff_set_size(xdp, cqe_bcnt);
299 	xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
300 	net_prefetch(xdp->data);
301 
302 	prog = rcu_dereference(rq->xdp_prog);
303 	if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp)))
304 		return NULL; /* page/packet was consumed by XDP */
305 
306 	/* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
307 	 * will be handled by mlx5e_free_rx_wqe.
308 	 * On SKB allocation failure, NULL is returned.
309 	 */
310 	return mlx5e_xsk_construct_skb(rq, xdp);
311 }
312