1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2020 Google LLC
4 * Author: Quentin Perret <qperret@google.com>
5 */
6
7 #include <linux/kvm_host.h>
8 #include <asm/kvm_emulate.h>
9 #include <asm/kvm_hyp.h>
10 #include <asm/kvm_mmu.h>
11 #include <asm/kvm_pgtable.h>
12 #include <asm/kvm_pkvm.h>
13 #include <asm/stage2_pgtable.h>
14
15 #include <hyp/fault.h>
16
17 #include <nvhe/gfp.h>
18 #include <nvhe/memory.h>
19 #include <nvhe/mem_protect.h>
20 #include <nvhe/mm.h>
21
22 #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP)
23
24 struct host_mmu host_mmu;
25
26 static struct hyp_pool host_s2_pool;
27
28 static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm);
29 #define current_vm (*this_cpu_ptr(&__current_vm))
30
guest_lock_component(struct pkvm_hyp_vm * vm)31 static void guest_lock_component(struct pkvm_hyp_vm *vm)
32 {
33 hyp_spin_lock(&vm->lock);
34 current_vm = vm;
35 }
36
guest_unlock_component(struct pkvm_hyp_vm * vm)37 static void guest_unlock_component(struct pkvm_hyp_vm *vm)
38 {
39 current_vm = NULL;
40 hyp_spin_unlock(&vm->lock);
41 }
42
host_lock_component(void)43 static void host_lock_component(void)
44 {
45 hyp_spin_lock(&host_mmu.lock);
46 }
47
host_unlock_component(void)48 static void host_unlock_component(void)
49 {
50 hyp_spin_unlock(&host_mmu.lock);
51 }
52
hyp_lock_component(void)53 static void hyp_lock_component(void)
54 {
55 hyp_spin_lock(&pkvm_pgd_lock);
56 }
57
hyp_unlock_component(void)58 static void hyp_unlock_component(void)
59 {
60 hyp_spin_unlock(&pkvm_pgd_lock);
61 }
62
host_s2_zalloc_pages_exact(size_t size)63 static void *host_s2_zalloc_pages_exact(size_t size)
64 {
65 void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));
66
67 hyp_split_page(hyp_virt_to_page(addr));
68
69 /*
70 * The size of concatenated PGDs is always a power of two of PAGE_SIZE,
71 * so there should be no need to free any of the tail pages to make the
72 * allocation exact.
73 */
74 WARN_ON(size != (PAGE_SIZE << get_order(size)));
75
76 return addr;
77 }
78
host_s2_zalloc_page(void * pool)79 static void *host_s2_zalloc_page(void *pool)
80 {
81 return hyp_alloc_pages(pool, 0);
82 }
83
host_s2_get_page(void * addr)84 static void host_s2_get_page(void *addr)
85 {
86 hyp_get_page(&host_s2_pool, addr);
87 }
88
host_s2_put_page(void * addr)89 static void host_s2_put_page(void *addr)
90 {
91 hyp_put_page(&host_s2_pool, addr);
92 }
93
host_s2_free_unlinked_table(void * addr,u32 level)94 static void host_s2_free_unlinked_table(void *addr, u32 level)
95 {
96 kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, addr, level);
97 }
98
prepare_s2_pool(void * pgt_pool_base)99 static int prepare_s2_pool(void *pgt_pool_base)
100 {
101 unsigned long nr_pages, pfn;
102 int ret;
103
104 pfn = hyp_virt_to_pfn(pgt_pool_base);
105 nr_pages = host_s2_pgtable_pages();
106 ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0);
107 if (ret)
108 return ret;
109
110 host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) {
111 .zalloc_pages_exact = host_s2_zalloc_pages_exact,
112 .zalloc_page = host_s2_zalloc_page,
113 .free_unlinked_table = host_s2_free_unlinked_table,
114 .phys_to_virt = hyp_phys_to_virt,
115 .virt_to_phys = hyp_virt_to_phys,
116 .page_count = hyp_page_count,
117 .get_page = host_s2_get_page,
118 .put_page = host_s2_put_page,
119 };
120
121 return 0;
122 }
123
prepare_host_vtcr(void)124 static void prepare_host_vtcr(void)
125 {
126 u32 parange, phys_shift;
127
128 /* The host stage 2 is id-mapped, so use parange for T0SZ */
129 parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val);
130 phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange);
131
132 host_mmu.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,
133 id_aa64mmfr1_el1_sys_val, phys_shift);
134 }
135
136 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot);
137
kvm_host_prepare_stage2(void * pgt_pool_base)138 int kvm_host_prepare_stage2(void *pgt_pool_base)
139 {
140 struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
141 int ret;
142
143 prepare_host_vtcr();
144 hyp_spin_lock_init(&host_mmu.lock);
145 mmu->arch = &host_mmu.arch;
146
147 ret = prepare_s2_pool(pgt_pool_base);
148 if (ret)
149 return ret;
150
151 ret = __kvm_pgtable_stage2_init(&host_mmu.pgt, mmu,
152 &host_mmu.mm_ops, KVM_HOST_S2_FLAGS,
153 host_stage2_force_pte_cb);
154 if (ret)
155 return ret;
156
157 mmu->pgd_phys = __hyp_pa(host_mmu.pgt.pgd);
158 mmu->pgt = &host_mmu.pgt;
159 atomic64_set(&mmu->vmid.id, 0);
160
161 return 0;
162 }
163
guest_stage2_force_pte_cb(u64 addr,u64 end,enum kvm_pgtable_prot prot)164 static bool guest_stage2_force_pte_cb(u64 addr, u64 end,
165 enum kvm_pgtable_prot prot)
166 {
167 return true;
168 }
169
guest_s2_zalloc_pages_exact(size_t size)170 static void *guest_s2_zalloc_pages_exact(size_t size)
171 {
172 void *addr = hyp_alloc_pages(¤t_vm->pool, get_order(size));
173
174 WARN_ON(size != (PAGE_SIZE << get_order(size)));
175 hyp_split_page(hyp_virt_to_page(addr));
176
177 return addr;
178 }
179
guest_s2_free_pages_exact(void * addr,unsigned long size)180 static void guest_s2_free_pages_exact(void *addr, unsigned long size)
181 {
182 u8 order = get_order(size);
183 unsigned int i;
184
185 for (i = 0; i < (1 << order); i++)
186 hyp_put_page(¤t_vm->pool, addr + (i * PAGE_SIZE));
187 }
188
guest_s2_zalloc_page(void * mc)189 static void *guest_s2_zalloc_page(void *mc)
190 {
191 struct hyp_page *p;
192 void *addr;
193
194 addr = hyp_alloc_pages(¤t_vm->pool, 0);
195 if (addr)
196 return addr;
197
198 addr = pop_hyp_memcache(mc, hyp_phys_to_virt);
199 if (!addr)
200 return addr;
201
202 memset(addr, 0, PAGE_SIZE);
203 p = hyp_virt_to_page(addr);
204 memset(p, 0, sizeof(*p));
205 p->refcount = 1;
206
207 return addr;
208 }
209
guest_s2_get_page(void * addr)210 static void guest_s2_get_page(void *addr)
211 {
212 hyp_get_page(¤t_vm->pool, addr);
213 }
214
guest_s2_put_page(void * addr)215 static void guest_s2_put_page(void *addr)
216 {
217 hyp_put_page(¤t_vm->pool, addr);
218 }
219
clean_dcache_guest_page(void * va,size_t size)220 static void clean_dcache_guest_page(void *va, size_t size)
221 {
222 __clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size);
223 hyp_fixmap_unmap();
224 }
225
invalidate_icache_guest_page(void * va,size_t size)226 static void invalidate_icache_guest_page(void *va, size_t size)
227 {
228 __invalidate_icache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size);
229 hyp_fixmap_unmap();
230 }
231
kvm_guest_prepare_stage2(struct pkvm_hyp_vm * vm,void * pgd)232 int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
233 {
234 struct kvm_s2_mmu *mmu = &vm->kvm.arch.mmu;
235 unsigned long nr_pages;
236 int ret;
237
238 nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT;
239 ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0);
240 if (ret)
241 return ret;
242
243 hyp_spin_lock_init(&vm->lock);
244 vm->mm_ops = (struct kvm_pgtable_mm_ops) {
245 .zalloc_pages_exact = guest_s2_zalloc_pages_exact,
246 .free_pages_exact = guest_s2_free_pages_exact,
247 .zalloc_page = guest_s2_zalloc_page,
248 .phys_to_virt = hyp_phys_to_virt,
249 .virt_to_phys = hyp_virt_to_phys,
250 .page_count = hyp_page_count,
251 .get_page = guest_s2_get_page,
252 .put_page = guest_s2_put_page,
253 .dcache_clean_inval_poc = clean_dcache_guest_page,
254 .icache_inval_pou = invalidate_icache_guest_page,
255 };
256
257 guest_lock_component(vm);
258 ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0,
259 guest_stage2_force_pte_cb);
260 guest_unlock_component(vm);
261 if (ret)
262 return ret;
263
264 vm->kvm.arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd);
265
266 return 0;
267 }
268
reclaim_guest_pages(struct pkvm_hyp_vm * vm,struct kvm_hyp_memcache * mc)269 void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
270 {
271 void *addr;
272
273 /* Dump all pgtable pages in the hyp_pool */
274 guest_lock_component(vm);
275 kvm_pgtable_stage2_destroy(&vm->pgt);
276 vm->kvm.arch.mmu.pgd_phys = 0ULL;
277 guest_unlock_component(vm);
278
279 /* Drain the hyp_pool into the memcache */
280 addr = hyp_alloc_pages(&vm->pool, 0);
281 while (addr) {
282 memset(hyp_virt_to_page(addr), 0, sizeof(struct hyp_page));
283 push_hyp_memcache(mc, addr, hyp_virt_to_phys);
284 WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1));
285 addr = hyp_alloc_pages(&vm->pool, 0);
286 }
287 }
288
__pkvm_prot_finalize(void)289 int __pkvm_prot_finalize(void)
290 {
291 struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
292 struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
293
294 if (params->hcr_el2 & HCR_VM)
295 return -EPERM;
296
297 params->vttbr = kvm_get_vttbr(mmu);
298 params->vtcr = host_mmu.arch.vtcr;
299 params->hcr_el2 |= HCR_VM;
300
301 /*
302 * The CMO below not only cleans the updated params to the
303 * PoC, but also provides the DSB that ensures ongoing
304 * page-table walks that have started before we trapped to EL2
305 * have completed.
306 */
307 kvm_flush_dcache_to_poc(params, sizeof(*params));
308
309 write_sysreg(params->hcr_el2, hcr_el2);
310 __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch);
311
312 /*
313 * Make sure to have an ISB before the TLB maintenance below but only
314 * when __load_stage2() doesn't include one already.
315 */
316 asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
317
318 /* Invalidate stale HCR bits that may be cached in TLBs */
319 __tlbi(vmalls12e1);
320 dsb(nsh);
321 isb();
322
323 return 0;
324 }
325
host_stage2_unmap_dev_all(void)326 static int host_stage2_unmap_dev_all(void)
327 {
328 struct kvm_pgtable *pgt = &host_mmu.pgt;
329 struct memblock_region *reg;
330 u64 addr = 0;
331 int i, ret;
332
333 /* Unmap all non-memory regions to recycle the pages */
334 for (i = 0; i < hyp_memblock_nr; i++, addr = reg->base + reg->size) {
335 reg = &hyp_memory[i];
336 ret = kvm_pgtable_stage2_unmap(pgt, addr, reg->base - addr);
337 if (ret)
338 return ret;
339 }
340 return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr);
341 }
342
343 struct kvm_mem_range {
344 u64 start;
345 u64 end;
346 };
347
find_mem_range(phys_addr_t addr,struct kvm_mem_range * range)348 static struct memblock_region *find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
349 {
350 int cur, left = 0, right = hyp_memblock_nr;
351 struct memblock_region *reg;
352 phys_addr_t end;
353
354 range->start = 0;
355 range->end = ULONG_MAX;
356
357 /* The list of memblock regions is sorted, binary search it */
358 while (left < right) {
359 cur = (left + right) >> 1;
360 reg = &hyp_memory[cur];
361 end = reg->base + reg->size;
362 if (addr < reg->base) {
363 right = cur;
364 range->end = reg->base;
365 } else if (addr >= end) {
366 left = cur + 1;
367 range->start = end;
368 } else {
369 range->start = reg->base;
370 range->end = end;
371 return reg;
372 }
373 }
374
375 return NULL;
376 }
377
addr_is_memory(phys_addr_t phys)378 bool addr_is_memory(phys_addr_t phys)
379 {
380 struct kvm_mem_range range;
381
382 return !!find_mem_range(phys, &range);
383 }
384
addr_is_allowed_memory(phys_addr_t phys)385 static bool addr_is_allowed_memory(phys_addr_t phys)
386 {
387 struct memblock_region *reg;
388 struct kvm_mem_range range;
389
390 reg = find_mem_range(phys, &range);
391
392 return reg && !(reg->flags & MEMBLOCK_NOMAP);
393 }
394
is_in_mem_range(u64 addr,struct kvm_mem_range * range)395 static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
396 {
397 return range->start <= addr && addr < range->end;
398 }
399
range_is_memory(u64 start,u64 end)400 static bool range_is_memory(u64 start, u64 end)
401 {
402 struct kvm_mem_range r;
403
404 if (!find_mem_range(start, &r))
405 return false;
406
407 return is_in_mem_range(end - 1, &r);
408 }
409
__host_stage2_idmap(u64 start,u64 end,enum kvm_pgtable_prot prot)410 static inline int __host_stage2_idmap(u64 start, u64 end,
411 enum kvm_pgtable_prot prot)
412 {
413 return kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start,
414 prot, &host_s2_pool, 0);
415 }
416
417 /*
418 * The pool has been provided with enough pages to cover all of memory with
419 * page granularity, but it is difficult to know how much of the MMIO range
420 * we will need to cover upfront, so we may need to 'recycle' the pages if we
421 * run out.
422 */
423 #define host_stage2_try(fn, ...) \
424 ({ \
425 int __ret; \
426 hyp_assert_lock_held(&host_mmu.lock); \
427 __ret = fn(__VA_ARGS__); \
428 if (__ret == -ENOMEM) { \
429 __ret = host_stage2_unmap_dev_all(); \
430 if (!__ret) \
431 __ret = fn(__VA_ARGS__); \
432 } \
433 __ret; \
434 })
435
range_included(struct kvm_mem_range * child,struct kvm_mem_range * parent)436 static inline bool range_included(struct kvm_mem_range *child,
437 struct kvm_mem_range *parent)
438 {
439 return parent->start <= child->start && child->end <= parent->end;
440 }
441
host_stage2_adjust_range(u64 addr,struct kvm_mem_range * range)442 static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
443 {
444 struct kvm_mem_range cur;
445 kvm_pte_t pte;
446 u32 level;
447 int ret;
448
449 hyp_assert_lock_held(&host_mmu.lock);
450 ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level);
451 if (ret)
452 return ret;
453
454 if (kvm_pte_valid(pte))
455 return -EAGAIN;
456
457 if (pte)
458 return -EPERM;
459
460 do {
461 u64 granule = kvm_granule_size(level);
462 cur.start = ALIGN_DOWN(addr, granule);
463 cur.end = cur.start + granule;
464 level++;
465 } while ((level < KVM_PGTABLE_MAX_LEVELS) &&
466 !(kvm_level_supports_block_mapping(level) &&
467 range_included(&cur, range)));
468
469 *range = cur;
470
471 return 0;
472 }
473
host_stage2_idmap_locked(phys_addr_t addr,u64 size,enum kvm_pgtable_prot prot)474 int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
475 enum kvm_pgtable_prot prot)
476 {
477 return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
478 }
479
host_stage2_set_owner_locked(phys_addr_t addr,u64 size,u8 owner_id)480 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
481 {
482 return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt,
483 addr, size, &host_s2_pool, owner_id);
484 }
485
host_stage2_force_pte_cb(u64 addr,u64 end,enum kvm_pgtable_prot prot)486 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot)
487 {
488 /*
489 * Block mappings must be used with care in the host stage-2 as a
490 * kvm_pgtable_stage2_map() operation targeting a page in the range of
491 * an existing block will delete the block under the assumption that
492 * mappings in the rest of the block range can always be rebuilt lazily.
493 * That assumption is correct for the host stage-2 with RWX mappings
494 * targeting memory or RW mappings targeting MMIO ranges (see
495 * host_stage2_idmap() below which implements some of the host memory
496 * abort logic). However, this is not safe for any other mappings where
497 * the host stage-2 page-table is in fact the only place where this
498 * state is stored. In all those cases, it is safer to use page-level
499 * mappings, hence avoiding to lose the state because of side-effects in
500 * kvm_pgtable_stage2_map().
501 */
502 if (range_is_memory(addr, end))
503 return prot != PKVM_HOST_MEM_PROT;
504 else
505 return prot != PKVM_HOST_MMIO_PROT;
506 }
507
host_stage2_idmap(u64 addr)508 static int host_stage2_idmap(u64 addr)
509 {
510 struct kvm_mem_range range;
511 bool is_memory = !!find_mem_range(addr, &range);
512 enum kvm_pgtable_prot prot;
513 int ret;
514
515 prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT;
516
517 host_lock_component();
518 ret = host_stage2_adjust_range(addr, &range);
519 if (ret)
520 goto unlock;
521
522 ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot);
523 unlock:
524 host_unlock_component();
525
526 return ret;
527 }
528
handle_host_mem_abort(struct kvm_cpu_context * host_ctxt)529 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
530 {
531 struct kvm_vcpu_fault_info fault;
532 u64 esr, addr;
533 int ret = 0;
534
535 esr = read_sysreg_el2(SYS_ESR);
536 BUG_ON(!__get_fault_info(esr, &fault));
537
538 addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
539 ret = host_stage2_idmap(addr);
540 BUG_ON(ret && ret != -EAGAIN);
541 }
542
543 struct pkvm_mem_transition {
544 u64 nr_pages;
545
546 struct {
547 enum pkvm_component_id id;
548 /* Address in the initiator's address space */
549 u64 addr;
550
551 union {
552 struct {
553 /* Address in the completer's address space */
554 u64 completer_addr;
555 } host;
556 struct {
557 u64 completer_addr;
558 } hyp;
559 };
560 } initiator;
561
562 struct {
563 enum pkvm_component_id id;
564 } completer;
565 };
566
567 struct pkvm_mem_share {
568 const struct pkvm_mem_transition tx;
569 const enum kvm_pgtable_prot completer_prot;
570 };
571
572 struct pkvm_mem_donation {
573 const struct pkvm_mem_transition tx;
574 };
575
576 struct check_walk_data {
577 enum pkvm_page_state desired;
578 enum pkvm_page_state (*get_page_state)(kvm_pte_t pte, u64 addr);
579 };
580
__check_page_state_visitor(const struct kvm_pgtable_visit_ctx * ctx,enum kvm_pgtable_walk_flags visit)581 static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx,
582 enum kvm_pgtable_walk_flags visit)
583 {
584 struct check_walk_data *d = ctx->arg;
585
586 return d->get_page_state(ctx->old, ctx->addr) == d->desired ? 0 : -EPERM;
587 }
588
check_page_state_range(struct kvm_pgtable * pgt,u64 addr,u64 size,struct check_walk_data * data)589 static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
590 struct check_walk_data *data)
591 {
592 struct kvm_pgtable_walker walker = {
593 .cb = __check_page_state_visitor,
594 .arg = data,
595 .flags = KVM_PGTABLE_WALK_LEAF,
596 };
597
598 return kvm_pgtable_walk(pgt, addr, size, &walker);
599 }
600
host_get_page_state(kvm_pte_t pte,u64 addr)601 static enum pkvm_page_state host_get_page_state(kvm_pte_t pte, u64 addr)
602 {
603 if (!addr_is_allowed_memory(addr))
604 return PKVM_NOPAGE;
605
606 if (!kvm_pte_valid(pte) && pte)
607 return PKVM_NOPAGE;
608
609 return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
610 }
611
__host_check_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)612 static int __host_check_page_state_range(u64 addr, u64 size,
613 enum pkvm_page_state state)
614 {
615 struct check_walk_data d = {
616 .desired = state,
617 .get_page_state = host_get_page_state,
618 };
619
620 hyp_assert_lock_held(&host_mmu.lock);
621 return check_page_state_range(&host_mmu.pgt, addr, size, &d);
622 }
623
__host_set_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)624 static int __host_set_page_state_range(u64 addr, u64 size,
625 enum pkvm_page_state state)
626 {
627 enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state);
628
629 return host_stage2_idmap_locked(addr, size, prot);
630 }
631
host_request_owned_transition(u64 * completer_addr,const struct pkvm_mem_transition * tx)632 static int host_request_owned_transition(u64 *completer_addr,
633 const struct pkvm_mem_transition *tx)
634 {
635 u64 size = tx->nr_pages * PAGE_SIZE;
636 u64 addr = tx->initiator.addr;
637
638 *completer_addr = tx->initiator.host.completer_addr;
639 return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED);
640 }
641
host_request_unshare(u64 * completer_addr,const struct pkvm_mem_transition * tx)642 static int host_request_unshare(u64 *completer_addr,
643 const struct pkvm_mem_transition *tx)
644 {
645 u64 size = tx->nr_pages * PAGE_SIZE;
646 u64 addr = tx->initiator.addr;
647
648 *completer_addr = tx->initiator.host.completer_addr;
649 return __host_check_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
650 }
651
host_initiate_share(u64 * completer_addr,const struct pkvm_mem_transition * tx)652 static int host_initiate_share(u64 *completer_addr,
653 const struct pkvm_mem_transition *tx)
654 {
655 u64 size = tx->nr_pages * PAGE_SIZE;
656 u64 addr = tx->initiator.addr;
657
658 *completer_addr = tx->initiator.host.completer_addr;
659 return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
660 }
661
host_initiate_unshare(u64 * completer_addr,const struct pkvm_mem_transition * tx)662 static int host_initiate_unshare(u64 *completer_addr,
663 const struct pkvm_mem_transition *tx)
664 {
665 u64 size = tx->nr_pages * PAGE_SIZE;
666 u64 addr = tx->initiator.addr;
667
668 *completer_addr = tx->initiator.host.completer_addr;
669 return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED);
670 }
671
host_initiate_donation(u64 * completer_addr,const struct pkvm_mem_transition * tx)672 static int host_initiate_donation(u64 *completer_addr,
673 const struct pkvm_mem_transition *tx)
674 {
675 u8 owner_id = tx->completer.id;
676 u64 size = tx->nr_pages * PAGE_SIZE;
677
678 *completer_addr = tx->initiator.host.completer_addr;
679 return host_stage2_set_owner_locked(tx->initiator.addr, size, owner_id);
680 }
681
__host_ack_skip_pgtable_check(const struct pkvm_mem_transition * tx)682 static bool __host_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
683 {
684 return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
685 tx->initiator.id != PKVM_ID_HYP);
686 }
687
__host_ack_transition(u64 addr,const struct pkvm_mem_transition * tx,enum pkvm_page_state state)688 static int __host_ack_transition(u64 addr, const struct pkvm_mem_transition *tx,
689 enum pkvm_page_state state)
690 {
691 u64 size = tx->nr_pages * PAGE_SIZE;
692
693 if (__host_ack_skip_pgtable_check(tx))
694 return 0;
695
696 return __host_check_page_state_range(addr, size, state);
697 }
698
host_ack_donation(u64 addr,const struct pkvm_mem_transition * tx)699 static int host_ack_donation(u64 addr, const struct pkvm_mem_transition *tx)
700 {
701 return __host_ack_transition(addr, tx, PKVM_NOPAGE);
702 }
703
host_complete_donation(u64 addr,const struct pkvm_mem_transition * tx)704 static int host_complete_donation(u64 addr, const struct pkvm_mem_transition *tx)
705 {
706 u64 size = tx->nr_pages * PAGE_SIZE;
707 u8 host_id = tx->completer.id;
708
709 return host_stage2_set_owner_locked(addr, size, host_id);
710 }
711
hyp_get_page_state(kvm_pte_t pte,u64 addr)712 static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr)
713 {
714 if (!kvm_pte_valid(pte))
715 return PKVM_NOPAGE;
716
717 return pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte));
718 }
719
__hyp_check_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)720 static int __hyp_check_page_state_range(u64 addr, u64 size,
721 enum pkvm_page_state state)
722 {
723 struct check_walk_data d = {
724 .desired = state,
725 .get_page_state = hyp_get_page_state,
726 };
727
728 hyp_assert_lock_held(&pkvm_pgd_lock);
729 return check_page_state_range(&pkvm_pgtable, addr, size, &d);
730 }
731
hyp_request_donation(u64 * completer_addr,const struct pkvm_mem_transition * tx)732 static int hyp_request_donation(u64 *completer_addr,
733 const struct pkvm_mem_transition *tx)
734 {
735 u64 size = tx->nr_pages * PAGE_SIZE;
736 u64 addr = tx->initiator.addr;
737
738 *completer_addr = tx->initiator.hyp.completer_addr;
739 return __hyp_check_page_state_range(addr, size, PKVM_PAGE_OWNED);
740 }
741
hyp_initiate_donation(u64 * completer_addr,const struct pkvm_mem_transition * tx)742 static int hyp_initiate_donation(u64 *completer_addr,
743 const struct pkvm_mem_transition *tx)
744 {
745 u64 size = tx->nr_pages * PAGE_SIZE;
746 int ret;
747
748 *completer_addr = tx->initiator.hyp.completer_addr;
749 ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, tx->initiator.addr, size);
750 return (ret != size) ? -EFAULT : 0;
751 }
752
__hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition * tx)753 static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
754 {
755 return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
756 tx->initiator.id != PKVM_ID_HOST);
757 }
758
hyp_ack_share(u64 addr,const struct pkvm_mem_transition * tx,enum kvm_pgtable_prot perms)759 static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx,
760 enum kvm_pgtable_prot perms)
761 {
762 u64 size = tx->nr_pages * PAGE_SIZE;
763
764 if (perms != PAGE_HYP)
765 return -EPERM;
766
767 if (__hyp_ack_skip_pgtable_check(tx))
768 return 0;
769
770 return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE);
771 }
772
hyp_ack_unshare(u64 addr,const struct pkvm_mem_transition * tx)773 static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx)
774 {
775 u64 size = tx->nr_pages * PAGE_SIZE;
776
777 if (tx->initiator.id == PKVM_ID_HOST && hyp_page_count((void *)addr))
778 return -EBUSY;
779
780 if (__hyp_ack_skip_pgtable_check(tx))
781 return 0;
782
783 return __hyp_check_page_state_range(addr, size,
784 PKVM_PAGE_SHARED_BORROWED);
785 }
786
hyp_ack_donation(u64 addr,const struct pkvm_mem_transition * tx)787 static int hyp_ack_donation(u64 addr, const struct pkvm_mem_transition *tx)
788 {
789 u64 size = tx->nr_pages * PAGE_SIZE;
790
791 if (__hyp_ack_skip_pgtable_check(tx))
792 return 0;
793
794 return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE);
795 }
796
hyp_complete_share(u64 addr,const struct pkvm_mem_transition * tx,enum kvm_pgtable_prot perms)797 static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx,
798 enum kvm_pgtable_prot perms)
799 {
800 void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE);
801 enum kvm_pgtable_prot prot;
802
803 prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED);
804 return pkvm_create_mappings_locked(start, end, prot);
805 }
806
hyp_complete_unshare(u64 addr,const struct pkvm_mem_transition * tx)807 static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx)
808 {
809 u64 size = tx->nr_pages * PAGE_SIZE;
810 int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, addr, size);
811
812 return (ret != size) ? -EFAULT : 0;
813 }
814
hyp_complete_donation(u64 addr,const struct pkvm_mem_transition * tx)815 static int hyp_complete_donation(u64 addr,
816 const struct pkvm_mem_transition *tx)
817 {
818 void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE);
819 enum kvm_pgtable_prot prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED);
820
821 return pkvm_create_mappings_locked(start, end, prot);
822 }
823
check_share(struct pkvm_mem_share * share)824 static int check_share(struct pkvm_mem_share *share)
825 {
826 const struct pkvm_mem_transition *tx = &share->tx;
827 u64 completer_addr;
828 int ret;
829
830 switch (tx->initiator.id) {
831 case PKVM_ID_HOST:
832 ret = host_request_owned_transition(&completer_addr, tx);
833 break;
834 default:
835 ret = -EINVAL;
836 }
837
838 if (ret)
839 return ret;
840
841 switch (tx->completer.id) {
842 case PKVM_ID_HYP:
843 ret = hyp_ack_share(completer_addr, tx, share->completer_prot);
844 break;
845 case PKVM_ID_FFA:
846 /*
847 * We only check the host; the secure side will check the other
848 * end when we forward the FFA call.
849 */
850 ret = 0;
851 break;
852 default:
853 ret = -EINVAL;
854 }
855
856 return ret;
857 }
858
__do_share(struct pkvm_mem_share * share)859 static int __do_share(struct pkvm_mem_share *share)
860 {
861 const struct pkvm_mem_transition *tx = &share->tx;
862 u64 completer_addr;
863 int ret;
864
865 switch (tx->initiator.id) {
866 case PKVM_ID_HOST:
867 ret = host_initiate_share(&completer_addr, tx);
868 break;
869 default:
870 ret = -EINVAL;
871 }
872
873 if (ret)
874 return ret;
875
876 switch (tx->completer.id) {
877 case PKVM_ID_HYP:
878 ret = hyp_complete_share(completer_addr, tx, share->completer_prot);
879 break;
880 case PKVM_ID_FFA:
881 /*
882 * We're not responsible for any secure page-tables, so there's
883 * nothing to do here.
884 */
885 ret = 0;
886 break;
887 default:
888 ret = -EINVAL;
889 }
890
891 return ret;
892 }
893
894 /*
895 * do_share():
896 *
897 * The page owner grants access to another component with a given set
898 * of permissions.
899 *
900 * Initiator: OWNED => SHARED_OWNED
901 * Completer: NOPAGE => SHARED_BORROWED
902 */
do_share(struct pkvm_mem_share * share)903 static int do_share(struct pkvm_mem_share *share)
904 {
905 int ret;
906
907 ret = check_share(share);
908 if (ret)
909 return ret;
910
911 return WARN_ON(__do_share(share));
912 }
913
check_unshare(struct pkvm_mem_share * share)914 static int check_unshare(struct pkvm_mem_share *share)
915 {
916 const struct pkvm_mem_transition *tx = &share->tx;
917 u64 completer_addr;
918 int ret;
919
920 switch (tx->initiator.id) {
921 case PKVM_ID_HOST:
922 ret = host_request_unshare(&completer_addr, tx);
923 break;
924 default:
925 ret = -EINVAL;
926 }
927
928 if (ret)
929 return ret;
930
931 switch (tx->completer.id) {
932 case PKVM_ID_HYP:
933 ret = hyp_ack_unshare(completer_addr, tx);
934 break;
935 case PKVM_ID_FFA:
936 /* See check_share() */
937 ret = 0;
938 break;
939 default:
940 ret = -EINVAL;
941 }
942
943 return ret;
944 }
945
__do_unshare(struct pkvm_mem_share * share)946 static int __do_unshare(struct pkvm_mem_share *share)
947 {
948 const struct pkvm_mem_transition *tx = &share->tx;
949 u64 completer_addr;
950 int ret;
951
952 switch (tx->initiator.id) {
953 case PKVM_ID_HOST:
954 ret = host_initiate_unshare(&completer_addr, tx);
955 break;
956 default:
957 ret = -EINVAL;
958 }
959
960 if (ret)
961 return ret;
962
963 switch (tx->completer.id) {
964 case PKVM_ID_HYP:
965 ret = hyp_complete_unshare(completer_addr, tx);
966 break;
967 case PKVM_ID_FFA:
968 /* See __do_share() */
969 ret = 0;
970 break;
971 default:
972 ret = -EINVAL;
973 }
974
975 return ret;
976 }
977
978 /*
979 * do_unshare():
980 *
981 * The page owner revokes access from another component for a range of
982 * pages which were previously shared using do_share().
983 *
984 * Initiator: SHARED_OWNED => OWNED
985 * Completer: SHARED_BORROWED => NOPAGE
986 */
do_unshare(struct pkvm_mem_share * share)987 static int do_unshare(struct pkvm_mem_share *share)
988 {
989 int ret;
990
991 ret = check_unshare(share);
992 if (ret)
993 return ret;
994
995 return WARN_ON(__do_unshare(share));
996 }
997
check_donation(struct pkvm_mem_donation * donation)998 static int check_donation(struct pkvm_mem_donation *donation)
999 {
1000 const struct pkvm_mem_transition *tx = &donation->tx;
1001 u64 completer_addr;
1002 int ret;
1003
1004 switch (tx->initiator.id) {
1005 case PKVM_ID_HOST:
1006 ret = host_request_owned_transition(&completer_addr, tx);
1007 break;
1008 case PKVM_ID_HYP:
1009 ret = hyp_request_donation(&completer_addr, tx);
1010 break;
1011 default:
1012 ret = -EINVAL;
1013 }
1014
1015 if (ret)
1016 return ret;
1017
1018 switch (tx->completer.id) {
1019 case PKVM_ID_HOST:
1020 ret = host_ack_donation(completer_addr, tx);
1021 break;
1022 case PKVM_ID_HYP:
1023 ret = hyp_ack_donation(completer_addr, tx);
1024 break;
1025 default:
1026 ret = -EINVAL;
1027 }
1028
1029 return ret;
1030 }
1031
__do_donate(struct pkvm_mem_donation * donation)1032 static int __do_donate(struct pkvm_mem_donation *donation)
1033 {
1034 const struct pkvm_mem_transition *tx = &donation->tx;
1035 u64 completer_addr;
1036 int ret;
1037
1038 switch (tx->initiator.id) {
1039 case PKVM_ID_HOST:
1040 ret = host_initiate_donation(&completer_addr, tx);
1041 break;
1042 case PKVM_ID_HYP:
1043 ret = hyp_initiate_donation(&completer_addr, tx);
1044 break;
1045 default:
1046 ret = -EINVAL;
1047 }
1048
1049 if (ret)
1050 return ret;
1051
1052 switch (tx->completer.id) {
1053 case PKVM_ID_HOST:
1054 ret = host_complete_donation(completer_addr, tx);
1055 break;
1056 case PKVM_ID_HYP:
1057 ret = hyp_complete_donation(completer_addr, tx);
1058 break;
1059 default:
1060 ret = -EINVAL;
1061 }
1062
1063 return ret;
1064 }
1065
1066 /*
1067 * do_donate():
1068 *
1069 * The page owner transfers ownership to another component, losing access
1070 * as a consequence.
1071 *
1072 * Initiator: OWNED => NOPAGE
1073 * Completer: NOPAGE => OWNED
1074 */
do_donate(struct pkvm_mem_donation * donation)1075 static int do_donate(struct pkvm_mem_donation *donation)
1076 {
1077 int ret;
1078
1079 ret = check_donation(donation);
1080 if (ret)
1081 return ret;
1082
1083 return WARN_ON(__do_donate(donation));
1084 }
1085
__pkvm_host_share_hyp(u64 pfn)1086 int __pkvm_host_share_hyp(u64 pfn)
1087 {
1088 int ret;
1089 u64 host_addr = hyp_pfn_to_phys(pfn);
1090 u64 hyp_addr = (u64)__hyp_va(host_addr);
1091 struct pkvm_mem_share share = {
1092 .tx = {
1093 .nr_pages = 1,
1094 .initiator = {
1095 .id = PKVM_ID_HOST,
1096 .addr = host_addr,
1097 .host = {
1098 .completer_addr = hyp_addr,
1099 },
1100 },
1101 .completer = {
1102 .id = PKVM_ID_HYP,
1103 },
1104 },
1105 .completer_prot = PAGE_HYP,
1106 };
1107
1108 host_lock_component();
1109 hyp_lock_component();
1110
1111 ret = do_share(&share);
1112
1113 hyp_unlock_component();
1114 host_unlock_component();
1115
1116 return ret;
1117 }
1118
__pkvm_host_unshare_hyp(u64 pfn)1119 int __pkvm_host_unshare_hyp(u64 pfn)
1120 {
1121 int ret;
1122 u64 host_addr = hyp_pfn_to_phys(pfn);
1123 u64 hyp_addr = (u64)__hyp_va(host_addr);
1124 struct pkvm_mem_share share = {
1125 .tx = {
1126 .nr_pages = 1,
1127 .initiator = {
1128 .id = PKVM_ID_HOST,
1129 .addr = host_addr,
1130 .host = {
1131 .completer_addr = hyp_addr,
1132 },
1133 },
1134 .completer = {
1135 .id = PKVM_ID_HYP,
1136 },
1137 },
1138 .completer_prot = PAGE_HYP,
1139 };
1140
1141 host_lock_component();
1142 hyp_lock_component();
1143
1144 ret = do_unshare(&share);
1145
1146 hyp_unlock_component();
1147 host_unlock_component();
1148
1149 return ret;
1150 }
1151
__pkvm_host_donate_hyp(u64 pfn,u64 nr_pages)1152 int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
1153 {
1154 int ret;
1155 u64 host_addr = hyp_pfn_to_phys(pfn);
1156 u64 hyp_addr = (u64)__hyp_va(host_addr);
1157 struct pkvm_mem_donation donation = {
1158 .tx = {
1159 .nr_pages = nr_pages,
1160 .initiator = {
1161 .id = PKVM_ID_HOST,
1162 .addr = host_addr,
1163 .host = {
1164 .completer_addr = hyp_addr,
1165 },
1166 },
1167 .completer = {
1168 .id = PKVM_ID_HYP,
1169 },
1170 },
1171 };
1172
1173 host_lock_component();
1174 hyp_lock_component();
1175
1176 ret = do_donate(&donation);
1177
1178 hyp_unlock_component();
1179 host_unlock_component();
1180
1181 return ret;
1182 }
1183
__pkvm_hyp_donate_host(u64 pfn,u64 nr_pages)1184 int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
1185 {
1186 int ret;
1187 u64 host_addr = hyp_pfn_to_phys(pfn);
1188 u64 hyp_addr = (u64)__hyp_va(host_addr);
1189 struct pkvm_mem_donation donation = {
1190 .tx = {
1191 .nr_pages = nr_pages,
1192 .initiator = {
1193 .id = PKVM_ID_HYP,
1194 .addr = hyp_addr,
1195 .hyp = {
1196 .completer_addr = host_addr,
1197 },
1198 },
1199 .completer = {
1200 .id = PKVM_ID_HOST,
1201 },
1202 },
1203 };
1204
1205 host_lock_component();
1206 hyp_lock_component();
1207
1208 ret = do_donate(&donation);
1209
1210 hyp_unlock_component();
1211 host_unlock_component();
1212
1213 return ret;
1214 }
1215
hyp_pin_shared_mem(void * from,void * to)1216 int hyp_pin_shared_mem(void *from, void *to)
1217 {
1218 u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
1219 u64 end = PAGE_ALIGN((u64)to);
1220 u64 size = end - start;
1221 int ret;
1222
1223 host_lock_component();
1224 hyp_lock_component();
1225
1226 ret = __host_check_page_state_range(__hyp_pa(start), size,
1227 PKVM_PAGE_SHARED_OWNED);
1228 if (ret)
1229 goto unlock;
1230
1231 ret = __hyp_check_page_state_range(start, size,
1232 PKVM_PAGE_SHARED_BORROWED);
1233 if (ret)
1234 goto unlock;
1235
1236 for (cur = start; cur < end; cur += PAGE_SIZE)
1237 hyp_page_ref_inc(hyp_virt_to_page(cur));
1238
1239 unlock:
1240 hyp_unlock_component();
1241 host_unlock_component();
1242
1243 return ret;
1244 }
1245
hyp_unpin_shared_mem(void * from,void * to)1246 void hyp_unpin_shared_mem(void *from, void *to)
1247 {
1248 u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
1249 u64 end = PAGE_ALIGN((u64)to);
1250
1251 host_lock_component();
1252 hyp_lock_component();
1253
1254 for (cur = start; cur < end; cur += PAGE_SIZE)
1255 hyp_page_ref_dec(hyp_virt_to_page(cur));
1256
1257 hyp_unlock_component();
1258 host_unlock_component();
1259 }
1260
__pkvm_host_share_ffa(u64 pfn,u64 nr_pages)1261 int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages)
1262 {
1263 int ret;
1264 struct pkvm_mem_share share = {
1265 .tx = {
1266 .nr_pages = nr_pages,
1267 .initiator = {
1268 .id = PKVM_ID_HOST,
1269 .addr = hyp_pfn_to_phys(pfn),
1270 },
1271 .completer = {
1272 .id = PKVM_ID_FFA,
1273 },
1274 },
1275 };
1276
1277 host_lock_component();
1278 ret = do_share(&share);
1279 host_unlock_component();
1280
1281 return ret;
1282 }
1283
__pkvm_host_unshare_ffa(u64 pfn,u64 nr_pages)1284 int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages)
1285 {
1286 int ret;
1287 struct pkvm_mem_share share = {
1288 .tx = {
1289 .nr_pages = nr_pages,
1290 .initiator = {
1291 .id = PKVM_ID_HOST,
1292 .addr = hyp_pfn_to_phys(pfn),
1293 },
1294 .completer = {
1295 .id = PKVM_ID_FFA,
1296 },
1297 },
1298 };
1299
1300 host_lock_component();
1301 ret = do_unshare(&share);
1302 host_unlock_component();
1303
1304 return ret;
1305 }
1306