1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 */
6
7 #include "rxe.h"
8 #include "rxe_loc.h"
9
10 /* Return a random 8 bit key value that is
11 * different than the last_key. Set last_key to -1
12 * if this is the first key for an MR or MW
13 */
rxe_get_next_key(u32 last_key)14 u8 rxe_get_next_key(u32 last_key)
15 {
16 u8 key;
17
18 do {
19 get_random_bytes(&key, 1);
20 } while (key == last_key);
21
22 return key;
23 }
24
mr_check_range(struct rxe_mr * mr,u64 iova,size_t length)25 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
26 {
27
28
29 switch (mr->type) {
30 case IB_MR_TYPE_DMA:
31 return 0;
32
33 case IB_MR_TYPE_USER:
34 case IB_MR_TYPE_MEM_REG:
35 if (iova < mr->ibmr.iova || length > mr->ibmr.length ||
36 iova > mr->ibmr.iova + mr->ibmr.length - length)
37 return -EFAULT;
38 return 0;
39
40 default:
41 pr_warn("%s: mr type (%d) not supported\n",
42 __func__, mr->type);
43 return -EFAULT;
44 }
45 }
46
47 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \
48 | IB_ACCESS_REMOTE_WRITE \
49 | IB_ACCESS_REMOTE_ATOMIC)
50
rxe_mr_init(int access,struct rxe_mr * mr)51 static void rxe_mr_init(int access, struct rxe_mr *mr)
52 {
53 u32 lkey = mr->elem.index << 8 | rxe_get_next_key(-1);
54 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
55
56 /* set ibmr->l/rkey and also copy into private l/rkey
57 * for user MRs these will always be the same
58 * for cases where caller 'owns' the key portion
59 * they may be different until REG_MR WQE is executed.
60 */
61 mr->lkey = mr->ibmr.lkey = lkey;
62 mr->rkey = mr->ibmr.rkey = rkey;
63
64 mr->state = RXE_MR_STATE_INVALID;
65 mr->map_shift = ilog2(RXE_BUF_PER_MAP);
66 }
67
rxe_mr_alloc(struct rxe_mr * mr,int num_buf)68 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
69 {
70 int i;
71 int num_map;
72 struct rxe_map **map = mr->map;
73
74 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
75
76 mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
77 if (!mr->map)
78 goto err1;
79
80 for (i = 0; i < num_map; i++) {
81 mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
82 if (!mr->map[i])
83 goto err2;
84 }
85
86 BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
87
88 mr->map_shift = ilog2(RXE_BUF_PER_MAP);
89 mr->map_mask = RXE_BUF_PER_MAP - 1;
90
91 mr->num_buf = num_buf;
92 mr->num_map = num_map;
93 mr->max_buf = num_map * RXE_BUF_PER_MAP;
94
95 return 0;
96
97 err2:
98 for (i--; i >= 0; i--)
99 kfree(mr->map[i]);
100
101 kfree(mr->map);
102 mr->map = NULL;
103 err1:
104 return -ENOMEM;
105 }
106
rxe_mr_init_dma(int access,struct rxe_mr * mr)107 void rxe_mr_init_dma(int access, struct rxe_mr *mr)
108 {
109 rxe_mr_init(access, mr);
110
111 mr->access = access;
112 mr->state = RXE_MR_STATE_VALID;
113 mr->type = IB_MR_TYPE_DMA;
114 }
115
rxe_mr_init_user(struct rxe_dev * rxe,u64 start,u64 length,u64 iova,int access,struct rxe_mr * mr)116 int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
117 int access, struct rxe_mr *mr)
118 {
119 struct rxe_map **map;
120 struct rxe_phys_buf *buf = NULL;
121 struct ib_umem *umem;
122 struct sg_page_iter sg_iter;
123 int num_buf;
124 void *vaddr;
125 int err;
126
127 umem = ib_umem_get(&rxe->ib_dev, start, length, access);
128 if (IS_ERR(umem)) {
129 pr_warn("%s: Unable to pin memory region err = %d\n",
130 __func__, (int)PTR_ERR(umem));
131 err = PTR_ERR(umem);
132 goto err_out;
133 }
134
135 num_buf = ib_umem_num_pages(umem);
136
137 rxe_mr_init(access, mr);
138
139 err = rxe_mr_alloc(mr, num_buf);
140 if (err) {
141 pr_warn("%s: Unable to allocate memory for map\n",
142 __func__);
143 goto err_release_umem;
144 }
145
146 mr->page_shift = PAGE_SHIFT;
147 mr->page_mask = PAGE_SIZE - 1;
148
149 num_buf = 0;
150 map = mr->map;
151 if (length > 0) {
152 buf = map[0]->buf;
153
154 for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
155 if (num_buf >= RXE_BUF_PER_MAP) {
156 map++;
157 buf = map[0]->buf;
158 num_buf = 0;
159 }
160
161 vaddr = page_address(sg_page_iter_page(&sg_iter));
162 if (!vaddr) {
163 pr_warn("%s: Unable to get virtual address\n",
164 __func__);
165 err = -ENOMEM;
166 goto err_release_umem;
167 }
168 buf->addr = (uintptr_t)vaddr;
169 buf->size = PAGE_SIZE;
170 num_buf++;
171 buf++;
172
173 }
174 }
175
176 mr->umem = umem;
177 mr->access = access;
178 mr->offset = ib_umem_offset(umem);
179 mr->state = RXE_MR_STATE_VALID;
180 mr->type = IB_MR_TYPE_USER;
181
182 return 0;
183
184 err_release_umem:
185 ib_umem_release(umem);
186 err_out:
187 return err;
188 }
189
rxe_mr_init_fast(int max_pages,struct rxe_mr * mr)190 int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr)
191 {
192 int err;
193
194 /* always allow remote access for FMRs */
195 rxe_mr_init(IB_ACCESS_REMOTE, mr);
196
197 err = rxe_mr_alloc(mr, max_pages);
198 if (err)
199 goto err1;
200
201 mr->max_buf = max_pages;
202 mr->state = RXE_MR_STATE_FREE;
203 mr->type = IB_MR_TYPE_MEM_REG;
204
205 return 0;
206
207 err1:
208 return err;
209 }
210
lookup_iova(struct rxe_mr * mr,u64 iova,int * m_out,int * n_out,size_t * offset_out)211 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
212 size_t *offset_out)
213 {
214 size_t offset = iova - mr->ibmr.iova + mr->offset;
215 int map_index;
216 int buf_index;
217 u64 length;
218
219 if (likely(mr->page_shift)) {
220 *offset_out = offset & mr->page_mask;
221 offset >>= mr->page_shift;
222 *n_out = offset & mr->map_mask;
223 *m_out = offset >> mr->map_shift;
224 } else {
225 map_index = 0;
226 buf_index = 0;
227
228 length = mr->map[map_index]->buf[buf_index].size;
229
230 while (offset >= length) {
231 offset -= length;
232 buf_index++;
233
234 if (buf_index == RXE_BUF_PER_MAP) {
235 map_index++;
236 buf_index = 0;
237 }
238 length = mr->map[map_index]->buf[buf_index].size;
239 }
240
241 *m_out = map_index;
242 *n_out = buf_index;
243 *offset_out = offset;
244 }
245 }
246
iova_to_vaddr(struct rxe_mr * mr,u64 iova,int length)247 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
248 {
249 size_t offset;
250 int m, n;
251 void *addr;
252
253 if (mr->state != RXE_MR_STATE_VALID) {
254 pr_warn("mr not in valid state\n");
255 addr = NULL;
256 goto out;
257 }
258
259 if (!mr->map) {
260 addr = (void *)(uintptr_t)iova;
261 goto out;
262 }
263
264 if (mr_check_range(mr, iova, length)) {
265 pr_warn("range violation\n");
266 addr = NULL;
267 goto out;
268 }
269
270 lookup_iova(mr, iova, &m, &n, &offset);
271
272 if (offset + length > mr->map[m]->buf[n].size) {
273 pr_warn("crosses page boundary\n");
274 addr = NULL;
275 goto out;
276 }
277
278 addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
279
280 out:
281 return addr;
282 }
283
284 /* copy data from a range (vaddr, vaddr+length-1) to or from
285 * a mr object starting at iova.
286 */
rxe_mr_copy(struct rxe_mr * mr,u64 iova,void * addr,int length,enum rxe_mr_copy_dir dir)287 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
288 enum rxe_mr_copy_dir dir)
289 {
290 int err;
291 int bytes;
292 u8 *va;
293 struct rxe_map **map;
294 struct rxe_phys_buf *buf;
295 int m;
296 int i;
297 size_t offset;
298
299 if (length == 0)
300 return 0;
301
302 if (mr->type == IB_MR_TYPE_DMA) {
303 u8 *src, *dest;
304
305 src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
306
307 dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr;
308
309 memcpy(dest, src, length);
310
311 return 0;
312 }
313
314 WARN_ON_ONCE(!mr->map);
315
316 err = mr_check_range(mr, iova, length);
317 if (err) {
318 err = -EFAULT;
319 goto err1;
320 }
321
322 lookup_iova(mr, iova, &m, &i, &offset);
323
324 map = mr->map + m;
325 buf = map[0]->buf + i;
326
327 while (length > 0) {
328 u8 *src, *dest;
329
330 va = (u8 *)(uintptr_t)buf->addr + offset;
331 src = (dir == RXE_TO_MR_OBJ) ? addr : va;
332 dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
333
334 bytes = buf->size - offset;
335
336 if (bytes > length)
337 bytes = length;
338
339 memcpy(dest, src, bytes);
340
341 length -= bytes;
342 addr += bytes;
343
344 offset = 0;
345 buf++;
346 i++;
347
348 if (i == RXE_BUF_PER_MAP) {
349 i = 0;
350 map++;
351 buf = map[0]->buf;
352 }
353 }
354
355 return 0;
356
357 err1:
358 return err;
359 }
360
361 /* copy data in or out of a wqe, i.e. sg list
362 * under the control of a dma descriptor
363 */
copy_data(struct rxe_pd * pd,int access,struct rxe_dma_info * dma,void * addr,int length,enum rxe_mr_copy_dir dir)364 int copy_data(
365 struct rxe_pd *pd,
366 int access,
367 struct rxe_dma_info *dma,
368 void *addr,
369 int length,
370 enum rxe_mr_copy_dir dir)
371 {
372 int bytes;
373 struct rxe_sge *sge = &dma->sge[dma->cur_sge];
374 int offset = dma->sge_offset;
375 int resid = dma->resid;
376 struct rxe_mr *mr = NULL;
377 u64 iova;
378 int err;
379
380 if (length == 0)
381 return 0;
382
383 if (length > resid) {
384 err = -EINVAL;
385 goto err2;
386 }
387
388 if (sge->length && (offset < sge->length)) {
389 mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
390 if (!mr) {
391 err = -EINVAL;
392 goto err1;
393 }
394 }
395
396 while (length > 0) {
397 bytes = length;
398
399 if (offset >= sge->length) {
400 if (mr) {
401 rxe_put(mr);
402 mr = NULL;
403 }
404 sge++;
405 dma->cur_sge++;
406 offset = 0;
407
408 if (dma->cur_sge >= dma->num_sge) {
409 err = -ENOSPC;
410 goto err2;
411 }
412
413 if (sge->length) {
414 mr = lookup_mr(pd, access, sge->lkey,
415 RXE_LOOKUP_LOCAL);
416 if (!mr) {
417 err = -EINVAL;
418 goto err1;
419 }
420 } else {
421 continue;
422 }
423 }
424
425 if (bytes > sge->length - offset)
426 bytes = sge->length - offset;
427
428 if (bytes > 0) {
429 iova = sge->addr + offset;
430
431 err = rxe_mr_copy(mr, iova, addr, bytes, dir);
432 if (err)
433 goto err2;
434
435 offset += bytes;
436 resid -= bytes;
437 length -= bytes;
438 addr += bytes;
439 }
440 }
441
442 dma->sge_offset = offset;
443 dma->resid = resid;
444
445 if (mr)
446 rxe_put(mr);
447
448 return 0;
449
450 err2:
451 if (mr)
452 rxe_put(mr);
453 err1:
454 return err;
455 }
456
advance_dma_data(struct rxe_dma_info * dma,unsigned int length)457 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
458 {
459 struct rxe_sge *sge = &dma->sge[dma->cur_sge];
460 int offset = dma->sge_offset;
461 int resid = dma->resid;
462
463 while (length) {
464 unsigned int bytes;
465
466 if (offset >= sge->length) {
467 sge++;
468 dma->cur_sge++;
469 offset = 0;
470 if (dma->cur_sge >= dma->num_sge)
471 return -ENOSPC;
472 }
473
474 bytes = length;
475
476 if (bytes > sge->length - offset)
477 bytes = sge->length - offset;
478
479 offset += bytes;
480 resid -= bytes;
481 length -= bytes;
482 }
483
484 dma->sge_offset = offset;
485 dma->resid = resid;
486
487 return 0;
488 }
489
490 /* (1) find the mr corresponding to lkey/rkey
491 * depending on lookup_type
492 * (2) verify that the (qp) pd matches the mr pd
493 * (3) verify that the mr can support the requested access
494 * (4) verify that mr state is valid
495 */
lookup_mr(struct rxe_pd * pd,int access,u32 key,enum rxe_mr_lookup_type type)496 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
497 enum rxe_mr_lookup_type type)
498 {
499 struct rxe_mr *mr;
500 struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
501 int index = key >> 8;
502
503 mr = rxe_pool_get_index(&rxe->mr_pool, index);
504 if (!mr)
505 return NULL;
506
507 if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) ||
508 (type == RXE_LOOKUP_REMOTE && mr->rkey != key) ||
509 mr_pd(mr) != pd || (access && !(access & mr->access)) ||
510 mr->state != RXE_MR_STATE_VALID)) {
511 rxe_put(mr);
512 mr = NULL;
513 }
514
515 return mr;
516 }
517
rxe_invalidate_mr(struct rxe_qp * qp,u32 key)518 int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
519 {
520 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
521 struct rxe_mr *mr;
522 int ret;
523
524 mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8);
525 if (!mr) {
526 pr_err("%s: No MR for key %#x\n", __func__, key);
527 ret = -EINVAL;
528 goto err;
529 }
530
531 if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) {
532 pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n",
533 __func__, key, (mr->rkey ? mr->rkey : mr->lkey));
534 ret = -EINVAL;
535 goto err_drop_ref;
536 }
537
538 if (atomic_read(&mr->num_mw) > 0) {
539 pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
540 __func__);
541 ret = -EINVAL;
542 goto err_drop_ref;
543 }
544
545 if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) {
546 pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type);
547 ret = -EINVAL;
548 goto err_drop_ref;
549 }
550
551 mr->state = RXE_MR_STATE_FREE;
552 ret = 0;
553
554 err_drop_ref:
555 rxe_put(mr);
556 err:
557 return ret;
558 }
559
560 /* user can (re)register fast MR by executing a REG_MR WQE.
561 * user is expected to hold a reference on the ib mr until the
562 * WQE completes.
563 * Once a fast MR is created this is the only way to change the
564 * private keys. It is the responsibility of the user to maintain
565 * the ib mr keys in sync with rxe mr keys.
566 */
rxe_reg_fast_mr(struct rxe_qp * qp,struct rxe_send_wqe * wqe)567 int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
568 {
569 struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr);
570 u32 key = wqe->wr.wr.reg.key;
571 u32 access = wqe->wr.wr.reg.access;
572
573 /* user can only register MR in free state */
574 if (unlikely(mr->state != RXE_MR_STATE_FREE)) {
575 pr_warn("%s: mr->lkey = 0x%x not free\n",
576 __func__, mr->lkey);
577 return -EINVAL;
578 }
579
580 /* user can only register mr with qp in same protection domain */
581 if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) {
582 pr_warn("%s: qp->pd and mr->pd don't match\n",
583 __func__);
584 return -EINVAL;
585 }
586
587 /* user is only allowed to change key portion of l/rkey */
588 if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) {
589 pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n",
590 __func__, key, mr->lkey);
591 return -EINVAL;
592 }
593
594 mr->access = access;
595 mr->lkey = key;
596 mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0;
597 mr->ibmr.iova = wqe->wr.wr.reg.mr->iova;
598 mr->state = RXE_MR_STATE_VALID;
599
600 return 0;
601 }
602
rxe_dereg_mr(struct ib_mr * ibmr,struct ib_udata * udata)603 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
604 {
605 struct rxe_mr *mr = to_rmr(ibmr);
606
607 /* See IBA 10.6.7.2.6 */
608 if (atomic_read(&mr->num_mw) > 0)
609 return -EINVAL;
610
611 rxe_cleanup(mr);
612
613 return 0;
614 }
615
rxe_mr_cleanup(struct rxe_pool_elem * elem)616 void rxe_mr_cleanup(struct rxe_pool_elem *elem)
617 {
618 struct rxe_mr *mr = container_of(elem, typeof(*mr), elem);
619 int i;
620
621 rxe_put(mr_pd(mr));
622 ib_umem_release(mr->umem);
623
624 if (mr->map) {
625 for (i = 0; i < mr->num_map; i++)
626 kfree(mr->map[i]);
627
628 kfree(mr->map);
629 }
630 }
631