1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3
4 #include <linux/vhost_types.h>
5 #include <linux/vdpa.h>
6 #include <linux/gcd.h>
7 #include <linux/string.h>
8 #include <linux/mlx5/qp.h>
9 #include "mlx5_vdpa.h"
10
11 /* DIV_ROUND_UP where the divider is a power of 2 give by its log base 2 value */
12 #define MLX5_DIV_ROUND_UP_POW2(_n, _s) \
13 ({ \
14 u64 __s = _s; \
15 u64 _res; \
16 _res = (((_n) + (1 << (__s)) - 1) >> (__s)); \
17 _res; \
18 })
19
get_octo_len(u64 len,int page_shift)20 static int get_octo_len(u64 len, int page_shift)
21 {
22 u64 page_size = 1ULL << page_shift;
23 int npages;
24
25 npages = ALIGN(len, page_size) >> page_shift;
26 return (npages + 1) / 2;
27 }
28
mlx5_set_access_mode(void * mkc,int mode)29 static void mlx5_set_access_mode(void *mkc, int mode)
30 {
31 MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
32 MLX5_SET(mkc, mkc, access_mode_4_2, mode >> 2);
33 }
34
populate_mtts(struct mlx5_vdpa_direct_mr * mr,__be64 * mtt)35 static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt)
36 {
37 struct scatterlist *sg;
38 int nsg = mr->nsg;
39 u64 dma_addr;
40 u64 dma_len;
41 int j = 0;
42 int i;
43
44 for_each_sg(mr->sg_head.sgl, sg, mr->nent, i) {
45 for (dma_addr = sg_dma_address(sg), dma_len = sg_dma_len(sg);
46 nsg && dma_len;
47 nsg--, dma_addr += BIT(mr->log_size), dma_len -= BIT(mr->log_size))
48 mtt[j++] = cpu_to_be64(dma_addr);
49 }
50 }
51
create_direct_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_direct_mr * mr)52 static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
53 {
54 int inlen;
55 void *mkc;
56 void *in;
57 int err;
58
59 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16);
60 in = kvzalloc(inlen, GFP_KERNEL);
61 if (!in)
62 return -ENOMEM;
63
64 MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
65 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
66 MLX5_SET(mkc, mkc, lw, !!(mr->perm & VHOST_MAP_WO));
67 MLX5_SET(mkc, mkc, lr, !!(mr->perm & VHOST_MAP_RO));
68 mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_MTT);
69 MLX5_SET(mkc, mkc, qpn, 0xffffff);
70 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
71 MLX5_SET64(mkc, mkc, start_addr, mr->offset);
72 MLX5_SET64(mkc, mkc, len, mr->end - mr->start);
73 MLX5_SET(mkc, mkc, log_page_size, mr->log_size);
74 MLX5_SET(mkc, mkc, translations_octword_size,
75 get_octo_len(mr->end - mr->start, mr->log_size));
76 MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
77 get_octo_len(mr->end - mr->start, mr->log_size));
78 populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt));
79 err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen);
80 kvfree(in);
81 if (err) {
82 mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n");
83 return err;
84 }
85
86 return 0;
87 }
88
destroy_direct_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_direct_mr * mr)89 static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
90 {
91 mlx5_vdpa_destroy_mkey(mvdev, mr->mr);
92 }
93
map_start(struct vhost_iotlb_map * map,struct mlx5_vdpa_direct_mr * mr)94 static u64 map_start(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
95 {
96 return max_t(u64, map->start, mr->start);
97 }
98
map_end(struct vhost_iotlb_map * map,struct mlx5_vdpa_direct_mr * mr)99 static u64 map_end(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
100 {
101 return min_t(u64, map->last + 1, mr->end);
102 }
103
maplen(struct vhost_iotlb_map * map,struct mlx5_vdpa_direct_mr * mr)104 static u64 maplen(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
105 {
106 return map_end(map, mr) - map_start(map, mr);
107 }
108
109 #define MLX5_VDPA_INVALID_START_ADDR ((u64)-1)
110 #define MLX5_VDPA_INVALID_LEN ((u64)-1)
111
indir_start_addr(struct mlx5_vdpa_mr * mkey)112 static u64 indir_start_addr(struct mlx5_vdpa_mr *mkey)
113 {
114 struct mlx5_vdpa_direct_mr *s;
115
116 s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list);
117 if (!s)
118 return MLX5_VDPA_INVALID_START_ADDR;
119
120 return s->start;
121 }
122
indir_len(struct mlx5_vdpa_mr * mkey)123 static u64 indir_len(struct mlx5_vdpa_mr *mkey)
124 {
125 struct mlx5_vdpa_direct_mr *s;
126 struct mlx5_vdpa_direct_mr *e;
127
128 s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list);
129 if (!s)
130 return MLX5_VDPA_INVALID_LEN;
131
132 e = list_last_entry(&mkey->head, struct mlx5_vdpa_direct_mr, list);
133
134 return e->end - s->start;
135 }
136
137 #define LOG_MAX_KLM_SIZE 30
138 #define MAX_KLM_SIZE BIT(LOG_MAX_KLM_SIZE)
139
klm_bcount(u64 size)140 static u32 klm_bcount(u64 size)
141 {
142 return (u32)size;
143 }
144
fill_indir(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mkey,void * in)145 static void fill_indir(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey, void *in)
146 {
147 struct mlx5_vdpa_direct_mr *dmr;
148 struct mlx5_klm *klmarr;
149 struct mlx5_klm *klm;
150 bool first = true;
151 u64 preve;
152 int i;
153
154 klmarr = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
155 i = 0;
156 list_for_each_entry(dmr, &mkey->head, list) {
157 again:
158 klm = &klmarr[i++];
159 if (first) {
160 preve = dmr->start;
161 first = false;
162 }
163
164 if (preve == dmr->start) {
165 klm->key = cpu_to_be32(dmr->mr);
166 klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start));
167 preve = dmr->end;
168 } else {
169 klm->key = cpu_to_be32(mvdev->res.null_mkey);
170 klm->bcount = cpu_to_be32(klm_bcount(dmr->start - preve));
171 preve = dmr->start;
172 goto again;
173 }
174 }
175 }
176
klm_byte_size(int nklms)177 static int klm_byte_size(int nklms)
178 {
179 return 16 * ALIGN(nklms, 4);
180 }
181
create_indirect_key(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mr)182 static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
183 {
184 int inlen;
185 void *mkc;
186 void *in;
187 int err;
188 u64 start;
189 u64 len;
190
191 start = indir_start_addr(mr);
192 len = indir_len(mr);
193 if (start == MLX5_VDPA_INVALID_START_ADDR || len == MLX5_VDPA_INVALID_LEN)
194 return -EINVAL;
195
196 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + klm_byte_size(mr->num_klms);
197 in = kzalloc(inlen, GFP_KERNEL);
198 if (!in)
199 return -ENOMEM;
200
201 MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
202 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
203 MLX5_SET(mkc, mkc, lw, 1);
204 MLX5_SET(mkc, mkc, lr, 1);
205 mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_KLMS);
206 MLX5_SET(mkc, mkc, qpn, 0xffffff);
207 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
208 MLX5_SET64(mkc, mkc, start_addr, start);
209 MLX5_SET64(mkc, mkc, len, len);
210 MLX5_SET(mkc, mkc, translations_octword_size, klm_byte_size(mr->num_klms) / 16);
211 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, mr->num_klms);
212 fill_indir(mvdev, mr, in);
213 err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen);
214 kfree(in);
215 return err;
216 }
217
destroy_indirect_key(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mkey)218 static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey)
219 {
220 mlx5_vdpa_destroy_mkey(mvdev, mkey->mkey);
221 }
222
map_direct_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_direct_mr * mr,struct vhost_iotlb * iotlb)223 static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr,
224 struct vhost_iotlb *iotlb)
225 {
226 struct vhost_iotlb_map *map;
227 unsigned long lgcd = 0;
228 int log_entity_size;
229 unsigned long size;
230 u64 start = 0;
231 int err;
232 struct page *pg;
233 unsigned int nsg;
234 int sglen;
235 u64 pa;
236 u64 paend;
237 struct scatterlist *sg;
238 struct device *dma = mvdev->vdev.dma_dev;
239
240 for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
241 map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) {
242 size = maplen(map, mr);
243 lgcd = gcd(lgcd, size);
244 start += size;
245 }
246 log_entity_size = ilog2(lgcd);
247
248 sglen = 1 << log_entity_size;
249 nsg = MLX5_DIV_ROUND_UP_POW2(mr->end - mr->start, log_entity_size);
250
251 err = sg_alloc_table(&mr->sg_head, nsg, GFP_KERNEL);
252 if (err)
253 return err;
254
255 sg = mr->sg_head.sgl;
256 for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
257 map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) {
258 paend = map->addr + maplen(map, mr);
259 for (pa = map->addr; pa < paend; pa += sglen) {
260 pg = pfn_to_page(__phys_to_pfn(pa));
261 if (!sg) {
262 mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n",
263 map->start, map->last + 1);
264 err = -ENOMEM;
265 goto err_map;
266 }
267 sg_set_page(sg, pg, sglen, 0);
268 sg = sg_next(sg);
269 if (!sg)
270 goto done;
271 }
272 }
273 done:
274 mr->log_size = log_entity_size;
275 mr->nsg = nsg;
276 mr->nent = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
277 if (!mr->nent) {
278 err = -ENOMEM;
279 goto err_map;
280 }
281
282 err = create_direct_mr(mvdev, mr);
283 if (err)
284 goto err_direct;
285
286 return 0;
287
288 err_direct:
289 dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
290 err_map:
291 sg_free_table(&mr->sg_head);
292 return err;
293 }
294
unmap_direct_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_direct_mr * mr)295 static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
296 {
297 struct device *dma = mvdev->vdev.dma_dev;
298
299 destroy_direct_mr(mvdev, mr);
300 dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
301 sg_free_table(&mr->sg_head);
302 }
303
add_direct_chain(struct mlx5_vdpa_dev * mvdev,u64 start,u64 size,u8 perm,struct vhost_iotlb * iotlb)304 static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8 perm,
305 struct vhost_iotlb *iotlb)
306 {
307 struct mlx5_vdpa_mr *mr = &mvdev->mr;
308 struct mlx5_vdpa_direct_mr *dmr;
309 struct mlx5_vdpa_direct_mr *n;
310 LIST_HEAD(tmp);
311 u64 st;
312 u64 sz;
313 int err;
314 int i = 0;
315
316 st = start;
317 while (size) {
318 sz = (u32)min_t(u64, MAX_KLM_SIZE, size);
319 dmr = kzalloc(sizeof(*dmr), GFP_KERNEL);
320 if (!dmr) {
321 err = -ENOMEM;
322 goto err_alloc;
323 }
324
325 dmr->start = st;
326 dmr->end = st + sz;
327 dmr->perm = perm;
328 err = map_direct_mr(mvdev, dmr, iotlb);
329 if (err) {
330 kfree(dmr);
331 goto err_alloc;
332 }
333
334 list_add_tail(&dmr->list, &tmp);
335 size -= sz;
336 mr->num_directs++;
337 mr->num_klms++;
338 st += sz;
339 i++;
340 }
341 list_splice_tail(&tmp, &mr->head);
342 return 0;
343
344 err_alloc:
345 list_for_each_entry_safe(dmr, n, &mr->head, list) {
346 list_del_init(&dmr->list);
347 unmap_direct_mr(mvdev, dmr);
348 kfree(dmr);
349 }
350 return err;
351 }
352
353 /* The iotlb pointer contains a list of maps. Go over the maps, possibly
354 * merging mergeable maps, and create direct memory keys that provide the
355 * device access to memory. The direct mkeys are then referred to by the
356 * indirect memory key that provides access to the enitre address space given
357 * by iotlb.
358 */
create_user_mr(struct mlx5_vdpa_dev * mvdev,struct vhost_iotlb * iotlb)359 static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
360 {
361 struct mlx5_vdpa_mr *mr = &mvdev->mr;
362 struct mlx5_vdpa_direct_mr *dmr;
363 struct mlx5_vdpa_direct_mr *n;
364 struct vhost_iotlb_map *map;
365 u32 pperm = U16_MAX;
366 u64 last = U64_MAX;
367 u64 ps = U64_MAX;
368 u64 pe = U64_MAX;
369 u64 start = 0;
370 int err = 0;
371 int nnuls;
372
373 INIT_LIST_HEAD(&mr->head);
374 for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
375 map = vhost_iotlb_itree_next(map, start, last)) {
376 start = map->start;
377 if (pe == map->start && pperm == map->perm) {
378 pe = map->last + 1;
379 } else {
380 if (ps != U64_MAX) {
381 if (pe < map->start) {
382 /* We have a hole in the map. Check how
383 * many null keys are required to fill it.
384 */
385 nnuls = MLX5_DIV_ROUND_UP_POW2(map->start - pe,
386 LOG_MAX_KLM_SIZE);
387 mr->num_klms += nnuls;
388 }
389 err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb);
390 if (err)
391 goto err_chain;
392 }
393 ps = map->start;
394 pe = map->last + 1;
395 pperm = map->perm;
396 }
397 }
398 err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb);
399 if (err)
400 goto err_chain;
401
402 /* Create the memory key that defines the guests's address space. This
403 * memory key refers to the direct keys that contain the MTT
404 * translations
405 */
406 err = create_indirect_key(mvdev, mr);
407 if (err)
408 goto err_chain;
409
410 mr->user_mr = true;
411 return 0;
412
413 err_chain:
414 list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
415 list_del_init(&dmr->list);
416 unmap_direct_mr(mvdev, dmr);
417 kfree(dmr);
418 }
419 return err;
420 }
421
create_dma_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mr)422 static int create_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
423 {
424 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
425 void *mkc;
426 u32 *in;
427 int err;
428
429 in = kzalloc(inlen, GFP_KERNEL);
430 if (!in)
431 return -ENOMEM;
432
433 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
434
435 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
436 MLX5_SET(mkc, mkc, length64, 1);
437 MLX5_SET(mkc, mkc, lw, 1);
438 MLX5_SET(mkc, mkc, lr, 1);
439 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
440 MLX5_SET(mkc, mkc, qpn, 0xffffff);
441
442 err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen);
443 if (!err)
444 mr->user_mr = false;
445
446 kfree(in);
447 return err;
448 }
449
destroy_dma_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mr)450 static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
451 {
452 mlx5_vdpa_destroy_mkey(mvdev, mr->mkey);
453 }
454
dup_iotlb(struct mlx5_vdpa_dev * mvdev,struct vhost_iotlb * src)455 static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src)
456 {
457 struct vhost_iotlb_map *map;
458 u64 start = 0, last = ULLONG_MAX;
459 int err;
460
461 if (!src) {
462 err = vhost_iotlb_add_range(mvdev->cvq.iotlb, start, last, start, VHOST_ACCESS_RW);
463 return err;
464 }
465
466 for (map = vhost_iotlb_itree_first(src, start, last); map;
467 map = vhost_iotlb_itree_next(map, start, last)) {
468 err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start, map->last,
469 map->addr, map->perm);
470 if (err)
471 return err;
472 }
473 return 0;
474 }
475
prune_iotlb(struct mlx5_vdpa_dev * mvdev)476 static void prune_iotlb(struct mlx5_vdpa_dev *mvdev)
477 {
478 vhost_iotlb_del_range(mvdev->cvq.iotlb, 0, ULLONG_MAX);
479 }
480
destroy_user_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mr)481 static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
482 {
483 struct mlx5_vdpa_direct_mr *dmr;
484 struct mlx5_vdpa_direct_mr *n;
485
486 destroy_indirect_key(mvdev, mr);
487 list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
488 list_del_init(&dmr->list);
489 unmap_direct_mr(mvdev, dmr);
490 kfree(dmr);
491 }
492 }
493
mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev * mvdev)494 void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
495 {
496 struct mlx5_vdpa_mr *mr = &mvdev->mr;
497
498 mutex_lock(&mr->mkey_mtx);
499 if (!mr->initialized)
500 goto out;
501
502 prune_iotlb(mvdev);
503 if (mr->user_mr)
504 destroy_user_mr(mvdev, mr);
505 else
506 destroy_dma_mr(mvdev, mr);
507
508 memset(mr, 0, sizeof(*mr));
509 mr->initialized = false;
510 out:
511 mutex_unlock(&mr->mkey_mtx);
512 }
513
_mlx5_vdpa_create_mr(struct mlx5_vdpa_dev * mvdev,struct vhost_iotlb * iotlb)514 static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
515 {
516 struct mlx5_vdpa_mr *mr = &mvdev->mr;
517 int err;
518
519 if (mr->initialized)
520 return 0;
521
522 if (iotlb)
523 err = create_user_mr(mvdev, iotlb);
524 else
525 err = create_dma_mr(mvdev, mr);
526
527 if (err)
528 return err;
529
530 err = dup_iotlb(mvdev, iotlb);
531 if (err)
532 goto out_err;
533
534 mr->initialized = true;
535 return 0;
536
537 out_err:
538 if (iotlb)
539 destroy_user_mr(mvdev, mr);
540 else
541 destroy_dma_mr(mvdev, mr);
542
543 return err;
544 }
545
mlx5_vdpa_create_mr(struct mlx5_vdpa_dev * mvdev,struct vhost_iotlb * iotlb)546 int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
547 {
548 int err;
549
550 mutex_lock(&mvdev->mr.mkey_mtx);
551 err = _mlx5_vdpa_create_mr(mvdev, iotlb);
552 mutex_unlock(&mvdev->mr.mkey_mtx);
553 return err;
554 }
555
mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev * mvdev,struct vhost_iotlb * iotlb,bool * change_map)556 int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
557 bool *change_map)
558 {
559 struct mlx5_vdpa_mr *mr = &mvdev->mr;
560 int err = 0;
561
562 *change_map = false;
563 mutex_lock(&mr->mkey_mtx);
564 if (mr->initialized) {
565 mlx5_vdpa_info(mvdev, "memory map update\n");
566 *change_map = true;
567 }
568 if (!*change_map)
569 err = _mlx5_vdpa_create_mr(mvdev, iotlb);
570 mutex_unlock(&mr->mkey_mtx);
571
572 return err;
573 }
574