1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5  */
6 
7 #include "rxe.h"
8 #include "rxe_loc.h"
9 
10 /* Return a random 8 bit key value that is
11  * different than the last_key. Set last_key to -1
12  * if this is the first key for an MR or MW
13  */
rxe_get_next_key(u32 last_key)14 u8 rxe_get_next_key(u32 last_key)
15 {
16 	u8 key;
17 
18 	do {
19 		get_random_bytes(&key, 1);
20 	} while (key == last_key);
21 
22 	return key;
23 }
24 
mr_check_range(struct rxe_mr * mr,u64 iova,size_t length)25 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
26 {
27 
28 
29 	switch (mr->type) {
30 	case IB_MR_TYPE_DMA:
31 		return 0;
32 
33 	case IB_MR_TYPE_USER:
34 	case IB_MR_TYPE_MEM_REG:
35 		if (iova < mr->ibmr.iova || length > mr->ibmr.length ||
36 		    iova > mr->ibmr.iova + mr->ibmr.length - length)
37 			return -EFAULT;
38 		return 0;
39 
40 	default:
41 		pr_warn("%s: mr type (%d) not supported\n",
42 			__func__, mr->type);
43 		return -EFAULT;
44 	}
45 }
46 
47 #define IB_ACCESS_REMOTE	(IB_ACCESS_REMOTE_READ		\
48 				| IB_ACCESS_REMOTE_WRITE	\
49 				| IB_ACCESS_REMOTE_ATOMIC)
50 
rxe_mr_init(int access,struct rxe_mr * mr)51 static void rxe_mr_init(int access, struct rxe_mr *mr)
52 {
53 	u32 lkey = mr->elem.index << 8 | rxe_get_next_key(-1);
54 	u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
55 
56 	/* set ibmr->l/rkey and also copy into private l/rkey
57 	 * for user MRs these will always be the same
58 	 * for cases where caller 'owns' the key portion
59 	 * they may be different until REG_MR WQE is executed.
60 	 */
61 	mr->lkey = mr->ibmr.lkey = lkey;
62 	mr->rkey = mr->ibmr.rkey = rkey;
63 
64 	mr->state = RXE_MR_STATE_INVALID;
65 	mr->map_shift = ilog2(RXE_BUF_PER_MAP);
66 }
67 
rxe_mr_alloc(struct rxe_mr * mr,int num_buf)68 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
69 {
70 	int i;
71 	int num_map;
72 	struct rxe_map **map = mr->map;
73 
74 	num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
75 
76 	mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
77 	if (!mr->map)
78 		goto err1;
79 
80 	for (i = 0; i < num_map; i++) {
81 		mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
82 		if (!mr->map[i])
83 			goto err2;
84 	}
85 
86 	BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
87 
88 	mr->map_shift = ilog2(RXE_BUF_PER_MAP);
89 	mr->map_mask = RXE_BUF_PER_MAP - 1;
90 
91 	mr->num_buf = num_buf;
92 	mr->num_map = num_map;
93 	mr->max_buf = num_map * RXE_BUF_PER_MAP;
94 
95 	return 0;
96 
97 err2:
98 	for (i--; i >= 0; i--)
99 		kfree(mr->map[i]);
100 
101 	kfree(mr->map);
102 	mr->map = NULL;
103 err1:
104 	return -ENOMEM;
105 }
106 
rxe_mr_init_dma(int access,struct rxe_mr * mr)107 void rxe_mr_init_dma(int access, struct rxe_mr *mr)
108 {
109 	rxe_mr_init(access, mr);
110 
111 	mr->access = access;
112 	mr->state = RXE_MR_STATE_VALID;
113 	mr->type = IB_MR_TYPE_DMA;
114 }
115 
rxe_mr_init_user(struct rxe_dev * rxe,u64 start,u64 length,u64 iova,int access,struct rxe_mr * mr)116 int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
117 		     int access, struct rxe_mr *mr)
118 {
119 	struct rxe_map		**map;
120 	struct rxe_phys_buf	*buf = NULL;
121 	struct ib_umem		*umem;
122 	struct sg_page_iter	sg_iter;
123 	int			num_buf;
124 	void			*vaddr;
125 	int err;
126 
127 	umem = ib_umem_get(&rxe->ib_dev, start, length, access);
128 	if (IS_ERR(umem)) {
129 		pr_warn("%s: Unable to pin memory region err = %d\n",
130 			__func__, (int)PTR_ERR(umem));
131 		err = PTR_ERR(umem);
132 		goto err_out;
133 	}
134 
135 	num_buf = ib_umem_num_pages(umem);
136 
137 	rxe_mr_init(access, mr);
138 
139 	err = rxe_mr_alloc(mr, num_buf);
140 	if (err) {
141 		pr_warn("%s: Unable to allocate memory for map\n",
142 				__func__);
143 		goto err_release_umem;
144 	}
145 
146 	mr->page_shift = PAGE_SHIFT;
147 	mr->page_mask = PAGE_SIZE - 1;
148 
149 	num_buf			= 0;
150 	map = mr->map;
151 	if (length > 0) {
152 		buf = map[0]->buf;
153 
154 		for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
155 			if (num_buf >= RXE_BUF_PER_MAP) {
156 				map++;
157 				buf = map[0]->buf;
158 				num_buf = 0;
159 			}
160 
161 			vaddr = page_address(sg_page_iter_page(&sg_iter));
162 			if (!vaddr) {
163 				pr_warn("%s: Unable to get virtual address\n",
164 						__func__);
165 				err = -ENOMEM;
166 				goto err_release_umem;
167 			}
168 			buf->addr = (uintptr_t)vaddr;
169 			buf->size = PAGE_SIZE;
170 			num_buf++;
171 			buf++;
172 
173 		}
174 	}
175 
176 	mr->umem = umem;
177 	mr->access = access;
178 	mr->offset = ib_umem_offset(umem);
179 	mr->state = RXE_MR_STATE_VALID;
180 	mr->type = IB_MR_TYPE_USER;
181 
182 	return 0;
183 
184 err_release_umem:
185 	ib_umem_release(umem);
186 err_out:
187 	return err;
188 }
189 
rxe_mr_init_fast(int max_pages,struct rxe_mr * mr)190 int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr)
191 {
192 	int err;
193 
194 	/* always allow remote access for FMRs */
195 	rxe_mr_init(IB_ACCESS_REMOTE, mr);
196 
197 	err = rxe_mr_alloc(mr, max_pages);
198 	if (err)
199 		goto err1;
200 
201 	mr->max_buf = max_pages;
202 	mr->state = RXE_MR_STATE_FREE;
203 	mr->type = IB_MR_TYPE_MEM_REG;
204 
205 	return 0;
206 
207 err1:
208 	return err;
209 }
210 
lookup_iova(struct rxe_mr * mr,u64 iova,int * m_out,int * n_out,size_t * offset_out)211 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
212 			size_t *offset_out)
213 {
214 	size_t offset = iova - mr->ibmr.iova + mr->offset;
215 	int			map_index;
216 	int			buf_index;
217 	u64			length;
218 
219 	if (likely(mr->page_shift)) {
220 		*offset_out = offset & mr->page_mask;
221 		offset >>= mr->page_shift;
222 		*n_out = offset & mr->map_mask;
223 		*m_out = offset >> mr->map_shift;
224 	} else {
225 		map_index = 0;
226 		buf_index = 0;
227 
228 		length = mr->map[map_index]->buf[buf_index].size;
229 
230 		while (offset >= length) {
231 			offset -= length;
232 			buf_index++;
233 
234 			if (buf_index == RXE_BUF_PER_MAP) {
235 				map_index++;
236 				buf_index = 0;
237 			}
238 			length = mr->map[map_index]->buf[buf_index].size;
239 		}
240 
241 		*m_out = map_index;
242 		*n_out = buf_index;
243 		*offset_out = offset;
244 	}
245 }
246 
iova_to_vaddr(struct rxe_mr * mr,u64 iova,int length)247 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
248 {
249 	size_t offset;
250 	int m, n;
251 	void *addr;
252 
253 	if (mr->state != RXE_MR_STATE_VALID) {
254 		pr_warn("mr not in valid state\n");
255 		addr = NULL;
256 		goto out;
257 	}
258 
259 	if (!mr->map) {
260 		addr = (void *)(uintptr_t)iova;
261 		goto out;
262 	}
263 
264 	if (mr_check_range(mr, iova, length)) {
265 		pr_warn("range violation\n");
266 		addr = NULL;
267 		goto out;
268 	}
269 
270 	lookup_iova(mr, iova, &m, &n, &offset);
271 
272 	if (offset + length > mr->map[m]->buf[n].size) {
273 		pr_warn("crosses page boundary\n");
274 		addr = NULL;
275 		goto out;
276 	}
277 
278 	addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
279 
280 out:
281 	return addr;
282 }
283 
284 /* copy data from a range (vaddr, vaddr+length-1) to or from
285  * a mr object starting at iova.
286  */
rxe_mr_copy(struct rxe_mr * mr,u64 iova,void * addr,int length,enum rxe_mr_copy_dir dir)287 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
288 		enum rxe_mr_copy_dir dir)
289 {
290 	int			err;
291 	int			bytes;
292 	u8			*va;
293 	struct rxe_map		**map;
294 	struct rxe_phys_buf	*buf;
295 	int			m;
296 	int			i;
297 	size_t			offset;
298 
299 	if (length == 0)
300 		return 0;
301 
302 	if (mr->type == IB_MR_TYPE_DMA) {
303 		u8 *src, *dest;
304 
305 		src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
306 
307 		dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr;
308 
309 		memcpy(dest, src, length);
310 
311 		return 0;
312 	}
313 
314 	WARN_ON_ONCE(!mr->map);
315 
316 	err = mr_check_range(mr, iova, length);
317 	if (err) {
318 		err = -EFAULT;
319 		goto err1;
320 	}
321 
322 	lookup_iova(mr, iova, &m, &i, &offset);
323 
324 	map = mr->map + m;
325 	buf	= map[0]->buf + i;
326 
327 	while (length > 0) {
328 		u8 *src, *dest;
329 
330 		va	= (u8 *)(uintptr_t)buf->addr + offset;
331 		src = (dir == RXE_TO_MR_OBJ) ? addr : va;
332 		dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
333 
334 		bytes	= buf->size - offset;
335 
336 		if (bytes > length)
337 			bytes = length;
338 
339 		memcpy(dest, src, bytes);
340 
341 		length	-= bytes;
342 		addr	+= bytes;
343 
344 		offset	= 0;
345 		buf++;
346 		i++;
347 
348 		if (i == RXE_BUF_PER_MAP) {
349 			i = 0;
350 			map++;
351 			buf = map[0]->buf;
352 		}
353 	}
354 
355 	return 0;
356 
357 err1:
358 	return err;
359 }
360 
361 /* copy data in or out of a wqe, i.e. sg list
362  * under the control of a dma descriptor
363  */
copy_data(struct rxe_pd * pd,int access,struct rxe_dma_info * dma,void * addr,int length,enum rxe_mr_copy_dir dir)364 int copy_data(
365 	struct rxe_pd		*pd,
366 	int			access,
367 	struct rxe_dma_info	*dma,
368 	void			*addr,
369 	int			length,
370 	enum rxe_mr_copy_dir	dir)
371 {
372 	int			bytes;
373 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
374 	int			offset	= dma->sge_offset;
375 	int			resid	= dma->resid;
376 	struct rxe_mr		*mr	= NULL;
377 	u64			iova;
378 	int			err;
379 
380 	if (length == 0)
381 		return 0;
382 
383 	if (length > resid) {
384 		err = -EINVAL;
385 		goto err2;
386 	}
387 
388 	if (sge->length && (offset < sge->length)) {
389 		mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
390 		if (!mr) {
391 			err = -EINVAL;
392 			goto err1;
393 		}
394 	}
395 
396 	while (length > 0) {
397 		bytes = length;
398 
399 		if (offset >= sge->length) {
400 			if (mr) {
401 				rxe_put(mr);
402 				mr = NULL;
403 			}
404 			sge++;
405 			dma->cur_sge++;
406 			offset = 0;
407 
408 			if (dma->cur_sge >= dma->num_sge) {
409 				err = -ENOSPC;
410 				goto err2;
411 			}
412 
413 			if (sge->length) {
414 				mr = lookup_mr(pd, access, sge->lkey,
415 					       RXE_LOOKUP_LOCAL);
416 				if (!mr) {
417 					err = -EINVAL;
418 					goto err1;
419 				}
420 			} else {
421 				continue;
422 			}
423 		}
424 
425 		if (bytes > sge->length - offset)
426 			bytes = sge->length - offset;
427 
428 		if (bytes > 0) {
429 			iova = sge->addr + offset;
430 
431 			err = rxe_mr_copy(mr, iova, addr, bytes, dir);
432 			if (err)
433 				goto err2;
434 
435 			offset	+= bytes;
436 			resid	-= bytes;
437 			length	-= bytes;
438 			addr	+= bytes;
439 		}
440 	}
441 
442 	dma->sge_offset = offset;
443 	dma->resid	= resid;
444 
445 	if (mr)
446 		rxe_put(mr);
447 
448 	return 0;
449 
450 err2:
451 	if (mr)
452 		rxe_put(mr);
453 err1:
454 	return err;
455 }
456 
advance_dma_data(struct rxe_dma_info * dma,unsigned int length)457 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
458 {
459 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
460 	int			offset	= dma->sge_offset;
461 	int			resid	= dma->resid;
462 
463 	while (length) {
464 		unsigned int bytes;
465 
466 		if (offset >= sge->length) {
467 			sge++;
468 			dma->cur_sge++;
469 			offset = 0;
470 			if (dma->cur_sge >= dma->num_sge)
471 				return -ENOSPC;
472 		}
473 
474 		bytes = length;
475 
476 		if (bytes > sge->length - offset)
477 			bytes = sge->length - offset;
478 
479 		offset	+= bytes;
480 		resid	-= bytes;
481 		length	-= bytes;
482 	}
483 
484 	dma->sge_offset = offset;
485 	dma->resid	= resid;
486 
487 	return 0;
488 }
489 
490 /* (1) find the mr corresponding to lkey/rkey
491  *     depending on lookup_type
492  * (2) verify that the (qp) pd matches the mr pd
493  * (3) verify that the mr can support the requested access
494  * (4) verify that mr state is valid
495  */
lookup_mr(struct rxe_pd * pd,int access,u32 key,enum rxe_mr_lookup_type type)496 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
497 			 enum rxe_mr_lookup_type type)
498 {
499 	struct rxe_mr *mr;
500 	struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
501 	int index = key >> 8;
502 
503 	mr = rxe_pool_get_index(&rxe->mr_pool, index);
504 	if (!mr)
505 		return NULL;
506 
507 	if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) ||
508 		     (type == RXE_LOOKUP_REMOTE && mr->rkey != key) ||
509 		     mr_pd(mr) != pd || (access && !(access & mr->access)) ||
510 		     mr->state != RXE_MR_STATE_VALID)) {
511 		rxe_put(mr);
512 		mr = NULL;
513 	}
514 
515 	return mr;
516 }
517 
rxe_invalidate_mr(struct rxe_qp * qp,u32 key)518 int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
519 {
520 	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
521 	struct rxe_mr *mr;
522 	int ret;
523 
524 	mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8);
525 	if (!mr) {
526 		pr_err("%s: No MR for key %#x\n", __func__, key);
527 		ret = -EINVAL;
528 		goto err;
529 	}
530 
531 	if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) {
532 		pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n",
533 			__func__, key, (mr->rkey ? mr->rkey : mr->lkey));
534 		ret = -EINVAL;
535 		goto err_drop_ref;
536 	}
537 
538 	if (atomic_read(&mr->num_mw) > 0) {
539 		pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
540 			__func__);
541 		ret = -EINVAL;
542 		goto err_drop_ref;
543 	}
544 
545 	if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) {
546 		pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type);
547 		ret = -EINVAL;
548 		goto err_drop_ref;
549 	}
550 
551 	mr->state = RXE_MR_STATE_FREE;
552 	ret = 0;
553 
554 err_drop_ref:
555 	rxe_put(mr);
556 err:
557 	return ret;
558 }
559 
560 /* user can (re)register fast MR by executing a REG_MR WQE.
561  * user is expected to hold a reference on the ib mr until the
562  * WQE completes.
563  * Once a fast MR is created this is the only way to change the
564  * private keys. It is the responsibility of the user to maintain
565  * the ib mr keys in sync with rxe mr keys.
566  */
rxe_reg_fast_mr(struct rxe_qp * qp,struct rxe_send_wqe * wqe)567 int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
568 {
569 	struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr);
570 	u32 key = wqe->wr.wr.reg.key;
571 	u32 access = wqe->wr.wr.reg.access;
572 
573 	/* user can only register MR in free state */
574 	if (unlikely(mr->state != RXE_MR_STATE_FREE)) {
575 		pr_warn("%s: mr->lkey = 0x%x not free\n",
576 			__func__, mr->lkey);
577 		return -EINVAL;
578 	}
579 
580 	/* user can only register mr with qp in same protection domain */
581 	if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) {
582 		pr_warn("%s: qp->pd and mr->pd don't match\n",
583 			__func__);
584 		return -EINVAL;
585 	}
586 
587 	/* user is only allowed to change key portion of l/rkey */
588 	if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) {
589 		pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n",
590 			__func__, key, mr->lkey);
591 		return -EINVAL;
592 	}
593 
594 	mr->access = access;
595 	mr->lkey = key;
596 	mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0;
597 	mr->ibmr.iova = wqe->wr.wr.reg.mr->iova;
598 	mr->state = RXE_MR_STATE_VALID;
599 
600 	return 0;
601 }
602 
rxe_dereg_mr(struct ib_mr * ibmr,struct ib_udata * udata)603 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
604 {
605 	struct rxe_mr *mr = to_rmr(ibmr);
606 
607 	/* See IBA 10.6.7.2.6 */
608 	if (atomic_read(&mr->num_mw) > 0)
609 		return -EINVAL;
610 
611 	rxe_cleanup(mr);
612 
613 	return 0;
614 }
615 
rxe_mr_cleanup(struct rxe_pool_elem * elem)616 void rxe_mr_cleanup(struct rxe_pool_elem *elem)
617 {
618 	struct rxe_mr *mr = container_of(elem, typeof(*mr), elem);
619 	int i;
620 
621 	rxe_put(mr_pd(mr));
622 	ib_umem_release(mr->umem);
623 
624 	if (mr->map) {
625 		for (i = 0; i < mr->num_map; i++)
626 			kfree(mr->map[i]);
627 
628 		kfree(mr->map);
629 	}
630 }
631