1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
3
4 #include <rdma/ib_umem_odp.h>
5 #include "mlx5_ib.h"
6 #include "umr.h"
7 #include "wr.h"
8
9 /*
10 * We can't use an array for xlt_emergency_page because dma_map_single doesn't
11 * work on kernel modules memory
12 */
13 void *xlt_emergency_page;
14 static DEFINE_MUTEX(xlt_emergency_page_mutex);
15
get_umr_enable_mr_mask(void)16 static __be64 get_umr_enable_mr_mask(void)
17 {
18 u64 result;
19
20 result = MLX5_MKEY_MASK_KEY |
21 MLX5_MKEY_MASK_FREE;
22
23 return cpu_to_be64(result);
24 }
25
get_umr_disable_mr_mask(void)26 static __be64 get_umr_disable_mr_mask(void)
27 {
28 u64 result;
29
30 result = MLX5_MKEY_MASK_FREE;
31
32 return cpu_to_be64(result);
33 }
34
get_umr_update_translation_mask(void)35 static __be64 get_umr_update_translation_mask(void)
36 {
37 u64 result;
38
39 result = MLX5_MKEY_MASK_LEN |
40 MLX5_MKEY_MASK_PAGE_SIZE |
41 MLX5_MKEY_MASK_START_ADDR;
42
43 return cpu_to_be64(result);
44 }
45
get_umr_update_access_mask(struct mlx5_ib_dev * dev)46 static __be64 get_umr_update_access_mask(struct mlx5_ib_dev *dev)
47 {
48 u64 result;
49
50 result = MLX5_MKEY_MASK_LR |
51 MLX5_MKEY_MASK_LW |
52 MLX5_MKEY_MASK_RR |
53 MLX5_MKEY_MASK_RW;
54
55 if (MLX5_CAP_GEN(dev->mdev, atomic))
56 result |= MLX5_MKEY_MASK_A;
57
58 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
59 result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE;
60
61 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
62 result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ;
63
64 return cpu_to_be64(result);
65 }
66
get_umr_update_pd_mask(void)67 static __be64 get_umr_update_pd_mask(void)
68 {
69 u64 result;
70
71 result = MLX5_MKEY_MASK_PD;
72
73 return cpu_to_be64(result);
74 }
75
umr_check_mkey_mask(struct mlx5_ib_dev * dev,u64 mask)76 static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
77 {
78 if (mask & MLX5_MKEY_MASK_PAGE_SIZE &&
79 MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
80 return -EPERM;
81
82 if (mask & MLX5_MKEY_MASK_A &&
83 MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
84 return -EPERM;
85
86 if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE &&
87 !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
88 return -EPERM;
89
90 if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ &&
91 !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
92 return -EPERM;
93
94 return 0;
95 }
96
97 enum {
98 MAX_UMR_WR = 128,
99 };
100
mlx5r_umr_qp_rst2rts(struct mlx5_ib_dev * dev,struct ib_qp * qp)101 static int mlx5r_umr_qp_rst2rts(struct mlx5_ib_dev *dev, struct ib_qp *qp)
102 {
103 struct ib_qp_attr attr = {};
104 int ret;
105
106 attr.qp_state = IB_QPS_INIT;
107 attr.port_num = 1;
108 ret = ib_modify_qp(qp, &attr,
109 IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT);
110 if (ret) {
111 mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
112 return ret;
113 }
114
115 memset(&attr, 0, sizeof(attr));
116 attr.qp_state = IB_QPS_RTR;
117
118 ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
119 if (ret) {
120 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
121 return ret;
122 }
123
124 memset(&attr, 0, sizeof(attr));
125 attr.qp_state = IB_QPS_RTS;
126 ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
127 if (ret) {
128 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
129 return ret;
130 }
131
132 return 0;
133 }
134
mlx5r_umr_resource_init(struct mlx5_ib_dev * dev)135 int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
136 {
137 struct ib_qp_init_attr init_attr = {};
138 struct ib_pd *pd;
139 struct ib_cq *cq;
140 struct ib_qp *qp;
141 int ret;
142
143 pd = ib_alloc_pd(&dev->ib_dev, 0);
144 if (IS_ERR(pd)) {
145 mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
146 return PTR_ERR(pd);
147 }
148
149 cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
150 if (IS_ERR(cq)) {
151 mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
152 ret = PTR_ERR(cq);
153 goto destroy_pd;
154 }
155
156 init_attr.send_cq = cq;
157 init_attr.recv_cq = cq;
158 init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
159 init_attr.cap.max_send_wr = MAX_UMR_WR;
160 init_attr.cap.max_send_sge = 1;
161 init_attr.qp_type = MLX5_IB_QPT_REG_UMR;
162 init_attr.port_num = 1;
163 qp = ib_create_qp(pd, &init_attr);
164 if (IS_ERR(qp)) {
165 mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
166 ret = PTR_ERR(qp);
167 goto destroy_cq;
168 }
169
170 ret = mlx5r_umr_qp_rst2rts(dev, qp);
171 if (ret)
172 goto destroy_qp;
173
174 dev->umrc.qp = qp;
175 dev->umrc.cq = cq;
176 dev->umrc.pd = pd;
177
178 sema_init(&dev->umrc.sem, MAX_UMR_WR);
179 mutex_init(&dev->umrc.lock);
180 dev->umrc.state = MLX5_UMR_STATE_ACTIVE;
181
182 return 0;
183
184 destroy_qp:
185 ib_destroy_qp(qp);
186 destroy_cq:
187 ib_free_cq(cq);
188 destroy_pd:
189 ib_dealloc_pd(pd);
190 return ret;
191 }
192
mlx5r_umr_resource_cleanup(struct mlx5_ib_dev * dev)193 void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev)
194 {
195 if (dev->umrc.state == MLX5_UMR_STATE_UNINIT)
196 return;
197 ib_destroy_qp(dev->umrc.qp);
198 ib_free_cq(dev->umrc.cq);
199 ib_dealloc_pd(dev->umrc.pd);
200 }
201
mlx5r_umr_recover(struct mlx5_ib_dev * dev)202 static int mlx5r_umr_recover(struct mlx5_ib_dev *dev)
203 {
204 struct umr_common *umrc = &dev->umrc;
205 struct ib_qp_attr attr;
206 int err;
207
208 attr.qp_state = IB_QPS_RESET;
209 err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE);
210 if (err) {
211 mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
212 goto err;
213 }
214
215 err = mlx5r_umr_qp_rst2rts(dev, umrc->qp);
216 if (err)
217 goto err;
218
219 umrc->state = MLX5_UMR_STATE_ACTIVE;
220 return 0;
221
222 err:
223 umrc->state = MLX5_UMR_STATE_ERR;
224 return err;
225 }
226
mlx5r_umr_post_send(struct ib_qp * ibqp,u32 mkey,struct ib_cqe * cqe,struct mlx5r_umr_wqe * wqe,bool with_data)227 static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
228 struct mlx5r_umr_wqe *wqe, bool with_data)
229 {
230 unsigned int wqe_size =
231 with_data ? sizeof(struct mlx5r_umr_wqe) :
232 sizeof(struct mlx5r_umr_wqe) -
233 sizeof(struct mlx5_wqe_data_seg);
234 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
235 struct mlx5_core_dev *mdev = dev->mdev;
236 struct mlx5_ib_qp *qp = to_mqp(ibqp);
237 struct mlx5_wqe_ctrl_seg *ctrl;
238 union {
239 struct ib_cqe *ib_cqe;
240 u64 wr_id;
241 } id;
242 void *cur_edge, *seg;
243 unsigned long flags;
244 unsigned int idx;
245 int size, err;
246
247 if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR))
248 return -EIO;
249
250 spin_lock_irqsave(&qp->sq.lock, flags);
251
252 err = mlx5r_begin_wqe(qp, &seg, &ctrl, &idx, &size, &cur_edge, 0,
253 cpu_to_be32(mkey), false, false);
254 if (WARN_ON(err))
255 goto out;
256
257 qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
258
259 mlx5r_memcpy_send_wqe(&qp->sq, &cur_edge, &seg, &size, wqe, wqe_size);
260
261 id.ib_cqe = cqe;
262 mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0,
263 MLX5_FENCE_MODE_INITIATOR_SMALL, MLX5_OPCODE_UMR);
264
265 mlx5r_ring_db(qp, 1, ctrl);
266
267 out:
268 spin_unlock_irqrestore(&qp->sq.lock, flags);
269
270 return err;
271 }
272
mlx5r_umr_done(struct ib_cq * cq,struct ib_wc * wc)273 static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc)
274 {
275 struct mlx5_ib_umr_context *context =
276 container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
277
278 context->status = wc->status;
279 complete(&context->done);
280 }
281
mlx5r_umr_init_context(struct mlx5r_umr_context * context)282 static inline void mlx5r_umr_init_context(struct mlx5r_umr_context *context)
283 {
284 context->cqe.done = mlx5r_umr_done;
285 init_completion(&context->done);
286 }
287
mlx5r_umr_post_send_wait(struct mlx5_ib_dev * dev,u32 mkey,struct mlx5r_umr_wqe * wqe,bool with_data)288 static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
289 struct mlx5r_umr_wqe *wqe, bool with_data)
290 {
291 struct umr_common *umrc = &dev->umrc;
292 struct mlx5r_umr_context umr_context;
293 int err;
294
295 err = umr_check_mkey_mask(dev, be64_to_cpu(wqe->ctrl_seg.mkey_mask));
296 if (WARN_ON(err))
297 return err;
298
299 mlx5r_umr_init_context(&umr_context);
300
301 down(&umrc->sem);
302 while (true) {
303 mutex_lock(&umrc->lock);
304 if (umrc->state == MLX5_UMR_STATE_ERR) {
305 mutex_unlock(&umrc->lock);
306 err = -EFAULT;
307 break;
308 }
309
310 if (umrc->state == MLX5_UMR_STATE_RECOVER) {
311 mutex_unlock(&umrc->lock);
312 usleep_range(3000, 5000);
313 continue;
314 }
315
316 err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
317 with_data);
318 mutex_unlock(&umrc->lock);
319 if (err) {
320 mlx5_ib_warn(dev, "UMR post send failed, err %d\n",
321 err);
322 break;
323 }
324
325 wait_for_completion(&umr_context.done);
326
327 if (umr_context.status == IB_WC_SUCCESS)
328 break;
329
330 if (umr_context.status == IB_WC_WR_FLUSH_ERR)
331 continue;
332
333 WARN_ON_ONCE(1);
334 mlx5_ib_warn(dev,
335 "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs\n",
336 umr_context.status);
337 mutex_lock(&umrc->lock);
338 err = mlx5r_umr_recover(dev);
339 mutex_unlock(&umrc->lock);
340 if (err)
341 mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n",
342 err);
343 err = -EFAULT;
344 break;
345 }
346 up(&umrc->sem);
347 return err;
348 }
349
350 /**
351 * mlx5r_umr_revoke_mr - Fence all DMA on the MR
352 * @mr: The MR to fence
353 *
354 * Upon return the NIC will not be doing any DMA to the pages under the MR,
355 * and any DMA in progress will be completed. Failure of this function
356 * indicates the HW has failed catastrophically.
357 */
mlx5r_umr_revoke_mr(struct mlx5_ib_mr * mr)358 int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr)
359 {
360 struct mlx5_ib_dev *dev = mr_to_mdev(mr);
361 struct mlx5r_umr_wqe wqe = {};
362
363 if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
364 return 0;
365
366 wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
367 wqe.ctrl_seg.mkey_mask |= get_umr_disable_mr_mask();
368 wqe.ctrl_seg.flags |= MLX5_UMR_INLINE;
369
370 MLX5_SET(mkc, &wqe.mkey_seg, free, 1);
371 MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(dev->umrc.pd)->pdn);
372 MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
373 MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
374 mlx5_mkey_variant(mr->mmkey.key));
375
376 return mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
377 }
378
mlx5r_umr_set_access_flags(struct mlx5_ib_dev * dev,struct mlx5_mkey_seg * seg,unsigned int access_flags)379 static void mlx5r_umr_set_access_flags(struct mlx5_ib_dev *dev,
380 struct mlx5_mkey_seg *seg,
381 unsigned int access_flags)
382 {
383 MLX5_SET(mkc, seg, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
384 MLX5_SET(mkc, seg, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
385 MLX5_SET(mkc, seg, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
386 MLX5_SET(mkc, seg, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
387 MLX5_SET(mkc, seg, lr, 1);
388 MLX5_SET(mkc, seg, relaxed_ordering_write,
389 !!(access_flags & IB_ACCESS_RELAXED_ORDERING));
390 MLX5_SET(mkc, seg, relaxed_ordering_read,
391 !!(access_flags & IB_ACCESS_RELAXED_ORDERING));
392 }
393
mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr * mr,struct ib_pd * pd,int access_flags)394 int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
395 int access_flags)
396 {
397 struct mlx5_ib_dev *dev = mr_to_mdev(mr);
398 struct mlx5r_umr_wqe wqe = {};
399 int err;
400
401 wqe.ctrl_seg.mkey_mask = get_umr_update_access_mask(dev);
402 wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
403 wqe.ctrl_seg.flags = MLX5_UMR_CHECK_FREE;
404 wqe.ctrl_seg.flags |= MLX5_UMR_INLINE;
405
406 mlx5r_umr_set_access_flags(dev, &wqe.mkey_seg, access_flags);
407 MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(pd)->pdn);
408 MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
409 MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
410 mlx5_mkey_variant(mr->mmkey.key));
411
412 err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
413 if (err)
414 return err;
415
416 mr->access_flags = access_flags;
417 return 0;
418 }
419
420 #define MLX5_MAX_UMR_CHUNK \
421 ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - MLX5_UMR_MTT_ALIGNMENT)
422 #define MLX5_SPARE_UMR_CHUNK 0x10000
423
424 /*
425 * Allocate a temporary buffer to hold the per-page information to transfer to
426 * HW. For efficiency this should be as large as it can be, but buffer
427 * allocation failure is not allowed, so try smaller sizes.
428 */
mlx5r_umr_alloc_xlt(size_t * nents,size_t ent_size,gfp_t gfp_mask)429 static void *mlx5r_umr_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
430 {
431 const size_t xlt_chunk_align = MLX5_UMR_MTT_ALIGNMENT / ent_size;
432 size_t size;
433 void *res = NULL;
434
435 static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0);
436
437 /*
438 * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
439 * allocation can't trigger any kind of reclaim.
440 */
441 might_sleep();
442
443 gfp_mask |= __GFP_ZERO | __GFP_NORETRY;
444
445 /*
446 * If the system already has a suitable high order page then just use
447 * that, but don't try hard to create one. This max is about 1M, so a
448 * free x86 huge page will satisfy it.
449 */
450 size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align),
451 MLX5_MAX_UMR_CHUNK);
452 *nents = size / ent_size;
453 res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
454 get_order(size));
455 if (res)
456 return res;
457
458 if (size > MLX5_SPARE_UMR_CHUNK) {
459 size = MLX5_SPARE_UMR_CHUNK;
460 *nents = size / ent_size;
461 res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
462 get_order(size));
463 if (res)
464 return res;
465 }
466
467 *nents = PAGE_SIZE / ent_size;
468 res = (void *)__get_free_page(gfp_mask);
469 if (res)
470 return res;
471
472 mutex_lock(&xlt_emergency_page_mutex);
473 memset(xlt_emergency_page, 0, PAGE_SIZE);
474 return xlt_emergency_page;
475 }
476
mlx5r_umr_free_xlt(void * xlt,size_t length)477 static void mlx5r_umr_free_xlt(void *xlt, size_t length)
478 {
479 if (xlt == xlt_emergency_page) {
480 mutex_unlock(&xlt_emergency_page_mutex);
481 return;
482 }
483
484 free_pages((unsigned long)xlt, get_order(length));
485 }
486
mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev * dev,void * xlt,struct ib_sge * sg)487 static void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
488 struct ib_sge *sg)
489 {
490 struct device *ddev = &dev->mdev->pdev->dev;
491
492 dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE);
493 mlx5r_umr_free_xlt(xlt, sg->length);
494 }
495
496 /*
497 * Create an XLT buffer ready for submission.
498 */
mlx5r_umr_create_xlt(struct mlx5_ib_dev * dev,struct ib_sge * sg,size_t nents,size_t ent_size,unsigned int flags)499 static void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg,
500 size_t nents, size_t ent_size,
501 unsigned int flags)
502 {
503 struct device *ddev = &dev->mdev->pdev->dev;
504 dma_addr_t dma;
505 void *xlt;
506
507 xlt = mlx5r_umr_alloc_xlt(&nents, ent_size,
508 flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC :
509 GFP_KERNEL);
510 sg->length = nents * ent_size;
511 dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE);
512 if (dma_mapping_error(ddev, dma)) {
513 mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
514 mlx5r_umr_free_xlt(xlt, sg->length);
515 return NULL;
516 }
517 sg->addr = dma;
518 sg->lkey = dev->umrc.pd->local_dma_lkey;
519
520 return xlt;
521 }
522
523 static void
mlx5r_umr_set_update_xlt_ctrl_seg(struct mlx5_wqe_umr_ctrl_seg * ctrl_seg,unsigned int flags,struct ib_sge * sg)524 mlx5r_umr_set_update_xlt_ctrl_seg(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
525 unsigned int flags, struct ib_sge *sg)
526 {
527 if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
528 /* fail if free */
529 ctrl_seg->flags = MLX5_UMR_CHECK_FREE;
530 else
531 /* fail if not free */
532 ctrl_seg->flags = MLX5_UMR_CHECK_NOT_FREE;
533 ctrl_seg->xlt_octowords =
534 cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
535 }
536
mlx5r_umr_set_update_xlt_mkey_seg(struct mlx5_ib_dev * dev,struct mlx5_mkey_seg * mkey_seg,struct mlx5_ib_mr * mr,unsigned int page_shift)537 static void mlx5r_umr_set_update_xlt_mkey_seg(struct mlx5_ib_dev *dev,
538 struct mlx5_mkey_seg *mkey_seg,
539 struct mlx5_ib_mr *mr,
540 unsigned int page_shift)
541 {
542 mlx5r_umr_set_access_flags(dev, mkey_seg, mr->access_flags);
543 MLX5_SET(mkc, mkey_seg, pd, to_mpd(mr->ibmr.pd)->pdn);
544 MLX5_SET64(mkc, mkey_seg, start_addr, mr->ibmr.iova);
545 MLX5_SET64(mkc, mkey_seg, len, mr->ibmr.length);
546 MLX5_SET(mkc, mkey_seg, log_page_size, page_shift);
547 MLX5_SET(mkc, mkey_seg, qpn, 0xffffff);
548 MLX5_SET(mkc, mkey_seg, mkey_7_0, mlx5_mkey_variant(mr->mmkey.key));
549 }
550
551 static void
mlx5r_umr_set_update_xlt_data_seg(struct mlx5_wqe_data_seg * data_seg,struct ib_sge * sg)552 mlx5r_umr_set_update_xlt_data_seg(struct mlx5_wqe_data_seg *data_seg,
553 struct ib_sge *sg)
554 {
555 data_seg->byte_count = cpu_to_be32(sg->length);
556 data_seg->lkey = cpu_to_be32(sg->lkey);
557 data_seg->addr = cpu_to_be64(sg->addr);
558 }
559
mlx5r_umr_update_offset(struct mlx5_wqe_umr_ctrl_seg * ctrl_seg,u64 offset)560 static void mlx5r_umr_update_offset(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
561 u64 offset)
562 {
563 u64 octo_offset = mlx5r_umr_get_xlt_octo(offset);
564
565 ctrl_seg->xlt_offset = cpu_to_be16(octo_offset & 0xffff);
566 ctrl_seg->xlt_offset_47_16 = cpu_to_be32(octo_offset >> 16);
567 ctrl_seg->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
568 }
569
mlx5r_umr_final_update_xlt(struct mlx5_ib_dev * dev,struct mlx5r_umr_wqe * wqe,struct mlx5_ib_mr * mr,struct ib_sge * sg,unsigned int flags)570 static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev,
571 struct mlx5r_umr_wqe *wqe,
572 struct mlx5_ib_mr *mr, struct ib_sge *sg,
573 unsigned int flags)
574 {
575 bool update_pd_access, update_translation;
576
577 if (flags & MLX5_IB_UPD_XLT_ENABLE)
578 wqe->ctrl_seg.mkey_mask |= get_umr_enable_mr_mask();
579
580 update_pd_access = flags & MLX5_IB_UPD_XLT_ENABLE ||
581 flags & MLX5_IB_UPD_XLT_PD ||
582 flags & MLX5_IB_UPD_XLT_ACCESS;
583
584 if (update_pd_access) {
585 wqe->ctrl_seg.mkey_mask |= get_umr_update_access_mask(dev);
586 wqe->ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
587 }
588
589 update_translation =
590 flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_ADDR;
591
592 if (update_translation) {
593 wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask();
594 if (!mr->ibmr.length)
595 MLX5_SET(mkc, &wqe->mkey_seg, length64, 1);
596 }
597
598 wqe->ctrl_seg.xlt_octowords =
599 cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
600 wqe->data_seg.byte_count = cpu_to_be32(sg->length);
601 }
602
603 /*
604 * Send the DMA list to the HW for a normal MR using UMR.
605 * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
606 * flag may be used.
607 */
mlx5r_umr_update_mr_pas(struct mlx5_ib_mr * mr,unsigned int flags)608 int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
609 {
610 struct mlx5_ib_dev *dev = mr_to_mdev(mr);
611 struct device *ddev = &dev->mdev->pdev->dev;
612 struct mlx5r_umr_wqe wqe = {};
613 struct ib_block_iter biter;
614 struct mlx5_mtt *cur_mtt;
615 size_t orig_sg_length;
616 struct mlx5_mtt *mtt;
617 size_t final_size;
618 struct ib_sge sg;
619 u64 offset = 0;
620 int err = 0;
621
622 if (WARN_ON(mr->umem->is_odp))
623 return -EINVAL;
624
625 mtt = mlx5r_umr_create_xlt(
626 dev, &sg, ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift),
627 sizeof(*mtt), flags);
628 if (!mtt)
629 return -ENOMEM;
630
631 orig_sg_length = sg.length;
632
633 mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
634 mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr,
635 mr->page_shift);
636 mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
637
638 cur_mtt = mtt;
639 rdma_for_each_block(mr->umem->sgt_append.sgt.sgl, &biter,
640 mr->umem->sgt_append.sgt.nents,
641 BIT(mr->page_shift)) {
642 if (cur_mtt == (void *)mtt + sg.length) {
643 dma_sync_single_for_device(ddev, sg.addr, sg.length,
644 DMA_TO_DEVICE);
645
646 err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe,
647 true);
648 if (err)
649 goto err;
650 dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
651 DMA_TO_DEVICE);
652 offset += sg.length;
653 mlx5r_umr_update_offset(&wqe.ctrl_seg, offset);
654
655 cur_mtt = mtt;
656 }
657
658 cur_mtt->ptag =
659 cpu_to_be64(rdma_block_iter_dma_address(&biter) |
660 MLX5_IB_MTT_PRESENT);
661
662 if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
663 cur_mtt->ptag = 0;
664
665 cur_mtt++;
666 }
667
668 final_size = (void *)cur_mtt - (void *)mtt;
669 sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT);
670 memset(cur_mtt, 0, sg.length - final_size);
671 mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
672
673 dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
674 err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true);
675
676 err:
677 sg.length = orig_sg_length;
678 mlx5r_umr_unmap_free_xlt(dev, mtt, &sg);
679 return err;
680 }
681
umr_can_use_indirect_mkey(struct mlx5_ib_dev * dev)682 static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
683 {
684 return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
685 }
686
mlx5r_umr_update_xlt(struct mlx5_ib_mr * mr,u64 idx,int npages,int page_shift,int flags)687 int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
688 int page_shift, int flags)
689 {
690 int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
691 ? sizeof(struct mlx5_klm)
692 : sizeof(struct mlx5_mtt);
693 const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
694 struct mlx5_ib_dev *dev = mr_to_mdev(mr);
695 struct device *ddev = &dev->mdev->pdev->dev;
696 const int page_mask = page_align - 1;
697 struct mlx5r_umr_wqe wqe = {};
698 size_t pages_mapped = 0;
699 size_t pages_to_map = 0;
700 size_t size_to_map = 0;
701 size_t orig_sg_length;
702 size_t pages_iter;
703 struct ib_sge sg;
704 int err = 0;
705 void *xlt;
706
707 if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
708 !umr_can_use_indirect_mkey(dev))
709 return -EPERM;
710
711 if (WARN_ON(!mr->umem->is_odp))
712 return -EINVAL;
713
714 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
715 * so we need to align the offset and length accordingly
716 */
717 if (idx & page_mask) {
718 npages += idx & page_mask;
719 idx &= ~page_mask;
720 }
721 pages_to_map = ALIGN(npages, page_align);
722
723 xlt = mlx5r_umr_create_xlt(dev, &sg, npages, desc_size, flags);
724 if (!xlt)
725 return -ENOMEM;
726
727 pages_iter = sg.length / desc_size;
728 orig_sg_length = sg.length;
729
730 if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
731 struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
732 size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
733
734 pages_to_map = min_t(size_t, pages_to_map, max_pages);
735 }
736
737 mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
738 mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr, page_shift);
739 mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
740
741 for (pages_mapped = 0;
742 pages_mapped < pages_to_map && !err;
743 pages_mapped += pages_iter, idx += pages_iter) {
744 npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
745 size_to_map = npages * desc_size;
746 dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
747 DMA_TO_DEVICE);
748 mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
749 dma_sync_single_for_device(ddev, sg.addr, sg.length,
750 DMA_TO_DEVICE);
751 sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
752
753 if (pages_mapped + pages_iter >= pages_to_map)
754 mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
755 mlx5r_umr_update_offset(&wqe.ctrl_seg, idx * desc_size);
756 err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true);
757 }
758 sg.length = orig_sg_length;
759 mlx5r_umr_unmap_free_xlt(dev, xlt, &sg);
760 return err;
761 }
762