1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2021 Intel Corporation. */
3
4 #include <asm/sgx.h>
5
6 #include "cpuid.h"
7 #include "kvm_cache_regs.h"
8 #include "nested.h"
9 #include "sgx.h"
10 #include "vmx.h"
11 #include "x86.h"
12
13 bool __read_mostly enable_sgx = 1;
14 module_param_named(sgx, enable_sgx, bool, 0444);
15
16 /* Initial value of guest's virtual SGX_LEPUBKEYHASHn MSRs */
17 static u64 sgx_pubkey_hash[4] __ro_after_init;
18
19 /*
20 * ENCLS's memory operands use a fixed segment (DS) and a fixed
21 * address size based on the mode. Related prefixes are ignored.
22 */
sgx_get_encls_gva(struct kvm_vcpu * vcpu,unsigned long offset,int size,int alignment,gva_t * gva)23 static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset,
24 int size, int alignment, gva_t *gva)
25 {
26 struct kvm_segment s;
27 bool fault;
28
29 /* Skip vmcs.GUEST_DS retrieval for 64-bit mode to avoid VMREADs. */
30 *gva = offset;
31 if (!is_long_mode(vcpu)) {
32 vmx_get_segment(vcpu, &s, VCPU_SREG_DS);
33 *gva += s.base;
34 }
35
36 if (!IS_ALIGNED(*gva, alignment)) {
37 fault = true;
38 } else if (likely(is_long_mode(vcpu))) {
39 fault = is_noncanonical_address(*gva, vcpu);
40 } else {
41 *gva &= 0xffffffff;
42 fault = (s.unusable) ||
43 (s.type != 2 && s.type != 3) ||
44 (*gva > s.limit) ||
45 ((s.base != 0 || s.limit != 0xffffffff) &&
46 (((u64)*gva + size - 1) > s.limit + 1));
47 }
48 if (fault)
49 kvm_inject_gp(vcpu, 0);
50 return fault ? -EINVAL : 0;
51 }
52
sgx_handle_emulation_failure(struct kvm_vcpu * vcpu,u64 addr,unsigned int size)53 static void sgx_handle_emulation_failure(struct kvm_vcpu *vcpu, u64 addr,
54 unsigned int size)
55 {
56 uint64_t data[2] = { addr, size };
57
58 __kvm_prepare_emulation_failure_exit(vcpu, data, ARRAY_SIZE(data));
59 }
60
sgx_read_hva(struct kvm_vcpu * vcpu,unsigned long hva,void * data,unsigned int size)61 static int sgx_read_hva(struct kvm_vcpu *vcpu, unsigned long hva, void *data,
62 unsigned int size)
63 {
64 if (__copy_from_user(data, (void __user *)hva, size)) {
65 sgx_handle_emulation_failure(vcpu, hva, size);
66 return -EFAULT;
67 }
68
69 return 0;
70 }
71
sgx_gva_to_gpa(struct kvm_vcpu * vcpu,gva_t gva,bool write,gpa_t * gpa)72 static int sgx_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t gva, bool write,
73 gpa_t *gpa)
74 {
75 struct x86_exception ex;
76
77 if (write)
78 *gpa = kvm_mmu_gva_to_gpa_write(vcpu, gva, &ex);
79 else
80 *gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, &ex);
81
82 if (*gpa == UNMAPPED_GVA) {
83 kvm_inject_emulated_page_fault(vcpu, &ex);
84 return -EFAULT;
85 }
86
87 return 0;
88 }
89
sgx_gpa_to_hva(struct kvm_vcpu * vcpu,gpa_t gpa,unsigned long * hva)90 static int sgx_gpa_to_hva(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long *hva)
91 {
92 *hva = kvm_vcpu_gfn_to_hva(vcpu, PFN_DOWN(gpa));
93 if (kvm_is_error_hva(*hva)) {
94 sgx_handle_emulation_failure(vcpu, gpa, 1);
95 return -EFAULT;
96 }
97
98 *hva |= gpa & ~PAGE_MASK;
99
100 return 0;
101 }
102
sgx_inject_fault(struct kvm_vcpu * vcpu,gva_t gva,int trapnr)103 static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr)
104 {
105 struct x86_exception ex;
106
107 /*
108 * A non-EPCM #PF indicates a bad userspace HVA. This *should* check
109 * for PFEC.SGX and not assume any #PF on SGX2 originated in the EPC,
110 * but the error code isn't (yet) plumbed through the ENCLS helpers.
111 */
112 if (trapnr == PF_VECTOR && !boot_cpu_has(X86_FEATURE_SGX2)) {
113 kvm_prepare_emulation_failure_exit(vcpu);
114 return 0;
115 }
116
117 /*
118 * If the guest thinks it's running on SGX2 hardware, inject an SGX
119 * #PF if the fault matches an EPCM fault signature (#GP on SGX1,
120 * #PF on SGX2). The assumption is that EPCM faults are much more
121 * likely than a bad userspace address.
122 */
123 if ((trapnr == PF_VECTOR || !boot_cpu_has(X86_FEATURE_SGX2)) &&
124 guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) {
125 memset(&ex, 0, sizeof(ex));
126 ex.vector = PF_VECTOR;
127 ex.error_code = PFERR_PRESENT_MASK | PFERR_WRITE_MASK |
128 PFERR_SGX_MASK;
129 ex.address = gva;
130 ex.error_code_valid = true;
131 ex.nested_page_fault = false;
132 kvm_inject_page_fault(vcpu, &ex);
133 } else {
134 kvm_inject_gp(vcpu, 0);
135 }
136 return 1;
137 }
138
__handle_encls_ecreate(struct kvm_vcpu * vcpu,struct sgx_pageinfo * pageinfo,unsigned long secs_hva,gva_t secs_gva)139 static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
140 struct sgx_pageinfo *pageinfo,
141 unsigned long secs_hva,
142 gva_t secs_gva)
143 {
144 struct sgx_secs *contents = (struct sgx_secs *)pageinfo->contents;
145 struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1;
146 u64 attributes, xfrm, size;
147 u32 miscselect;
148 u8 max_size_log2;
149 int trapnr, ret;
150
151 sgx_12_0 = kvm_find_cpuid_entry(vcpu, 0x12, 0);
152 sgx_12_1 = kvm_find_cpuid_entry(vcpu, 0x12, 1);
153 if (!sgx_12_0 || !sgx_12_1) {
154 kvm_prepare_emulation_failure_exit(vcpu);
155 return 0;
156 }
157
158 miscselect = contents->miscselect;
159 attributes = contents->attributes;
160 xfrm = contents->xfrm;
161 size = contents->size;
162
163 /* Enforce restriction of access to the PROVISIONKEY. */
164 if (!vcpu->kvm->arch.sgx_provisioning_allowed &&
165 (attributes & SGX_ATTR_PROVISIONKEY)) {
166 if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY)
167 pr_warn_once("KVM: SGX PROVISIONKEY advertised but not allowed\n");
168 kvm_inject_gp(vcpu, 0);
169 return 1;
170 }
171
172 /* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. */
173 if ((u32)miscselect & ~sgx_12_0->ebx ||
174 (u32)attributes & ~sgx_12_1->eax ||
175 (u32)(attributes >> 32) & ~sgx_12_1->ebx ||
176 (u32)xfrm & ~sgx_12_1->ecx ||
177 (u32)(xfrm >> 32) & ~sgx_12_1->edx) {
178 kvm_inject_gp(vcpu, 0);
179 return 1;
180 }
181
182 /* Enforce CPUID restriction on max enclave size. */
183 max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 :
184 sgx_12_0->edx;
185 if (size >= BIT_ULL(max_size_log2))
186 kvm_inject_gp(vcpu, 0);
187
188 /*
189 * sgx_virt_ecreate() returns:
190 * 1) 0: ECREATE was successful
191 * 2) -EFAULT: ECREATE was run but faulted, and trapnr was set to the
192 * exception number.
193 * 3) -EINVAL: access_ok() on @secs_hva failed. This should never
194 * happen as KVM checks host addresses at memslot creation.
195 * sgx_virt_ecreate() has already warned in this case.
196 */
197 ret = sgx_virt_ecreate(pageinfo, (void __user *)secs_hva, &trapnr);
198 if (!ret)
199 return kvm_skip_emulated_instruction(vcpu);
200 if (ret == -EFAULT)
201 return sgx_inject_fault(vcpu, secs_gva, trapnr);
202
203 return ret;
204 }
205
handle_encls_ecreate(struct kvm_vcpu * vcpu)206 static int handle_encls_ecreate(struct kvm_vcpu *vcpu)
207 {
208 gva_t pageinfo_gva, secs_gva;
209 gva_t metadata_gva, contents_gva;
210 gpa_t metadata_gpa, contents_gpa, secs_gpa;
211 unsigned long metadata_hva, contents_hva, secs_hva;
212 struct sgx_pageinfo pageinfo;
213 struct sgx_secs *contents;
214 struct x86_exception ex;
215 int r;
216
217 if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32, &pageinfo_gva) ||
218 sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva))
219 return 1;
220
221 /*
222 * Copy the PAGEINFO to local memory, its pointers need to be
223 * translated, i.e. we need to do a deep copy/translate.
224 */
225 r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo,
226 sizeof(pageinfo), &ex);
227 if (r == X86EMUL_PROPAGATE_FAULT) {
228 kvm_inject_emulated_page_fault(vcpu, &ex);
229 return 1;
230 } else if (r != X86EMUL_CONTINUE) {
231 sgx_handle_emulation_failure(vcpu, pageinfo_gva,
232 sizeof(pageinfo));
233 return 0;
234 }
235
236 if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64, &metadata_gva) ||
237 sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096,
238 &contents_gva))
239 return 1;
240
241 /*
242 * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA.
243 * Resume the guest on failure to inject a #PF.
244 */
245 if (sgx_gva_to_gpa(vcpu, metadata_gva, false, &metadata_gpa) ||
246 sgx_gva_to_gpa(vcpu, contents_gva, false, &contents_gpa) ||
247 sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa))
248 return 1;
249
250 /*
251 * ...and then to HVA. The order of accesses isn't architectural, i.e.
252 * KVM doesn't have to fully process one address at a time. Exit to
253 * userspace if a GPA is invalid.
254 */
255 if (sgx_gpa_to_hva(vcpu, metadata_gpa, &metadata_hva) ||
256 sgx_gpa_to_hva(vcpu, contents_gpa, &contents_hva) ||
257 sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva))
258 return 0;
259
260 /*
261 * Copy contents into kernel memory to prevent TOCTOU attack. E.g. the
262 * guest could do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and
263 * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to
264 * enforce restriction of access to the PROVISIONKEY.
265 */
266 contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL_ACCOUNT);
267 if (!contents)
268 return -ENOMEM;
269
270 /* Exit to userspace if copying from a host userspace address fails. */
271 if (sgx_read_hva(vcpu, contents_hva, (void *)contents, PAGE_SIZE)) {
272 free_page((unsigned long)contents);
273 return 0;
274 }
275
276 pageinfo.metadata = metadata_hva;
277 pageinfo.contents = (u64)contents;
278
279 r = __handle_encls_ecreate(vcpu, &pageinfo, secs_hva, secs_gva);
280
281 free_page((unsigned long)contents);
282
283 return r;
284 }
285
handle_encls_einit(struct kvm_vcpu * vcpu)286 static int handle_encls_einit(struct kvm_vcpu *vcpu)
287 {
288 unsigned long sig_hva, secs_hva, token_hva, rflags;
289 struct vcpu_vmx *vmx = to_vmx(vcpu);
290 gva_t sig_gva, secs_gva, token_gva;
291 gpa_t sig_gpa, secs_gpa, token_gpa;
292 int ret, trapnr;
293
294 if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 1808, 4096, &sig_gva) ||
295 sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva) ||
296 sgx_get_encls_gva(vcpu, kvm_rdx_read(vcpu), 304, 512, &token_gva))
297 return 1;
298
299 /*
300 * Translate the SIGSTRUCT, SECS and TOKEN pointers from GVA to GPA.
301 * Resume the guest on failure to inject a #PF.
302 */
303 if (sgx_gva_to_gpa(vcpu, sig_gva, false, &sig_gpa) ||
304 sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa) ||
305 sgx_gva_to_gpa(vcpu, token_gva, false, &token_gpa))
306 return 1;
307
308 /*
309 * ...and then to HVA. The order of accesses isn't architectural, i.e.
310 * KVM doesn't have to fully process one address at a time. Exit to
311 * userspace if a GPA is invalid. Note, all structures are aligned and
312 * cannot split pages.
313 */
314 if (sgx_gpa_to_hva(vcpu, sig_gpa, &sig_hva) ||
315 sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva) ||
316 sgx_gpa_to_hva(vcpu, token_gpa, &token_hva))
317 return 0;
318
319 ret = sgx_virt_einit((void __user *)sig_hva, (void __user *)token_hva,
320 (void __user *)secs_hva,
321 vmx->msr_ia32_sgxlepubkeyhash, &trapnr);
322
323 if (ret == -EFAULT)
324 return sgx_inject_fault(vcpu, secs_gva, trapnr);
325
326 /*
327 * sgx_virt_einit() returns -EINVAL when access_ok() fails on @sig_hva,
328 * @token_hva or @secs_hva. This should never happen as KVM checks host
329 * addresses at memslot creation. sgx_virt_einit() has already warned
330 * in this case, so just return.
331 */
332 if (ret < 0)
333 return ret;
334
335 rflags = vmx_get_rflags(vcpu) & ~(X86_EFLAGS_CF | X86_EFLAGS_PF |
336 X86_EFLAGS_AF | X86_EFLAGS_SF |
337 X86_EFLAGS_OF);
338 if (ret)
339 rflags |= X86_EFLAGS_ZF;
340 else
341 rflags &= ~X86_EFLAGS_ZF;
342 vmx_set_rflags(vcpu, rflags);
343
344 kvm_rax_write(vcpu, ret);
345 return kvm_skip_emulated_instruction(vcpu);
346 }
347
encls_leaf_enabled_in_guest(struct kvm_vcpu * vcpu,u32 leaf)348 static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf)
349 {
350 if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX))
351 return false;
352
353 if (leaf >= ECREATE && leaf <= ETRACK)
354 return guest_cpuid_has(vcpu, X86_FEATURE_SGX1);
355
356 if (leaf >= EAUG && leaf <= EMODT)
357 return guest_cpuid_has(vcpu, X86_FEATURE_SGX2);
358
359 return false;
360 }
361
sgx_enabled_in_guest_bios(struct kvm_vcpu * vcpu)362 static inline bool sgx_enabled_in_guest_bios(struct kvm_vcpu *vcpu)
363 {
364 const u64 bits = FEAT_CTL_SGX_ENABLED | FEAT_CTL_LOCKED;
365
366 return (to_vmx(vcpu)->msr_ia32_feature_control & bits) == bits;
367 }
368
handle_encls(struct kvm_vcpu * vcpu)369 int handle_encls(struct kvm_vcpu *vcpu)
370 {
371 u32 leaf = (u32)kvm_rax_read(vcpu);
372
373 if (!encls_leaf_enabled_in_guest(vcpu, leaf)) {
374 kvm_queue_exception(vcpu, UD_VECTOR);
375 } else if (!sgx_enabled_in_guest_bios(vcpu)) {
376 kvm_inject_gp(vcpu, 0);
377 } else {
378 if (leaf == ECREATE)
379 return handle_encls_ecreate(vcpu);
380 if (leaf == EINIT)
381 return handle_encls_einit(vcpu);
382 WARN(1, "KVM: unexpected exit on ENCLS[%u]", leaf);
383 vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
384 vcpu->run->hw.hardware_exit_reason = EXIT_REASON_ENCLS;
385 return 0;
386 }
387 return 1;
388 }
389
setup_default_sgx_lepubkeyhash(void)390 void setup_default_sgx_lepubkeyhash(void)
391 {
392 /*
393 * Use Intel's default value for Skylake hardware if Launch Control is
394 * not supported, i.e. Intel's hash is hardcoded into silicon, or if
395 * Launch Control is supported and enabled, i.e. mimic the reset value
396 * and let the guest write the MSRs at will. If Launch Control is
397 * supported but disabled, then use the current MSR values as the hash
398 * MSRs exist but are read-only (locked and not writable).
399 */
400 if (!enable_sgx || boot_cpu_has(X86_FEATURE_SGX_LC) ||
401 rdmsrl_safe(MSR_IA32_SGXLEPUBKEYHASH0, &sgx_pubkey_hash[0])) {
402 sgx_pubkey_hash[0] = 0xa6053e051270b7acULL;
403 sgx_pubkey_hash[1] = 0x6cfbe8ba8b3b413dULL;
404 sgx_pubkey_hash[2] = 0xc4916d99f2b3735dULL;
405 sgx_pubkey_hash[3] = 0xd4f8c05909f9bb3bULL;
406 } else {
407 /* MSR_IA32_SGXLEPUBKEYHASH0 is read above */
408 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH1, sgx_pubkey_hash[1]);
409 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH2, sgx_pubkey_hash[2]);
410 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH3, sgx_pubkey_hash[3]);
411 }
412 }
413
vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu * vcpu)414 void vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu *vcpu)
415 {
416 struct vcpu_vmx *vmx = to_vmx(vcpu);
417
418 memcpy(vmx->msr_ia32_sgxlepubkeyhash, sgx_pubkey_hash,
419 sizeof(sgx_pubkey_hash));
420 }
421
422 /*
423 * ECREATE must be intercepted to enforce MISCSELECT, ATTRIBUTES and XFRM
424 * restrictions if the guest's allowed-1 settings diverge from hardware.
425 */
sgx_intercept_encls_ecreate(struct kvm_vcpu * vcpu)426 static bool sgx_intercept_encls_ecreate(struct kvm_vcpu *vcpu)
427 {
428 struct kvm_cpuid_entry2 *guest_cpuid;
429 u32 eax, ebx, ecx, edx;
430
431 if (!vcpu->kvm->arch.sgx_provisioning_allowed)
432 return true;
433
434 guest_cpuid = kvm_find_cpuid_entry(vcpu, 0x12, 0);
435 if (!guest_cpuid)
436 return true;
437
438 cpuid_count(0x12, 0, &eax, &ebx, &ecx, &edx);
439 if (guest_cpuid->ebx != ebx || guest_cpuid->edx != edx)
440 return true;
441
442 guest_cpuid = kvm_find_cpuid_entry(vcpu, 0x12, 1);
443 if (!guest_cpuid)
444 return true;
445
446 cpuid_count(0x12, 1, &eax, &ebx, &ecx, &edx);
447 if (guest_cpuid->eax != eax || guest_cpuid->ebx != ebx ||
448 guest_cpuid->ecx != ecx || guest_cpuid->edx != edx)
449 return true;
450
451 return false;
452 }
453
vmx_write_encls_bitmap(struct kvm_vcpu * vcpu,struct vmcs12 * vmcs12)454 void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
455 {
456 /*
457 * There is no software enable bit for SGX that is virtualized by
458 * hardware, e.g. there's no CR4.SGXE, so when SGX is disabled in the
459 * guest (either by the host or by the guest's BIOS) but enabled in the
460 * host, trap all ENCLS leafs and inject #UD/#GP as needed to emulate
461 * the expected system behavior for ENCLS.
462 */
463 u64 bitmap = -1ull;
464
465 /* Nothing to do if hardware doesn't support SGX */
466 if (!cpu_has_vmx_encls_vmexit())
467 return;
468
469 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX) &&
470 sgx_enabled_in_guest_bios(vcpu)) {
471 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) {
472 bitmap &= ~GENMASK_ULL(ETRACK, ECREATE);
473 if (sgx_intercept_encls_ecreate(vcpu))
474 bitmap |= (1 << ECREATE);
475 }
476
477 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX2))
478 bitmap &= ~GENMASK_ULL(EMODT, EAUG);
479
480 /*
481 * Trap and execute EINIT if launch control is enabled in the
482 * host using the guest's values for launch control MSRs, even
483 * if the guest's values are fixed to hardware default values.
484 * The MSRs are not loaded/saved on VM-Enter/VM-Exit as writing
485 * the MSRs is extraordinarily expensive.
486 */
487 if (boot_cpu_has(X86_FEATURE_SGX_LC))
488 bitmap |= (1 << EINIT);
489
490 if (!vmcs12 && is_guest_mode(vcpu))
491 vmcs12 = get_vmcs12(vcpu);
492 if (vmcs12 && nested_cpu_has_encls_exit(vmcs12))
493 bitmap |= vmcs12->encls_exiting_bitmap;
494 }
495 vmcs_write64(ENCLS_EXITING_BITMAP, bitmap);
496 }
497