1 /*
2  * Kernel-based Virtual Machine driver for Linux
3  *
4  * AMD SVM support
5  *
6  * Copyright (C) 2006 Qumranet, Inc.
7  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
8  *
9  * Authors:
10  *   Yaniv Kamay  <yaniv@qumranet.com>
11  *   Avi Kivity   <avi@qumranet.com>
12  *
13  * This work is licensed under the terms of the GNU GPL, version 2.  See
14  * the COPYING file in the top-level directory.
15  *
16  */
17 #include <linux/kvm_host.h>
18 
19 #include "irq.h"
20 #include "mmu.h"
21 #include "kvm_cache_regs.h"
22 #include "x86.h"
23 
24 #include <linux/module.h>
25 #include <linux/kernel.h>
26 #include <linux/vmalloc.h>
27 #include <linux/highmem.h>
28 #include <linux/sched.h>
29 #include <linux/ftrace_event.h>
30 #include <linux/slab.h>
31 
32 #include <asm/perf_event.h>
33 #include <asm/tlbflush.h>
34 #include <asm/desc.h>
35 #include <asm/kvm_para.h>
36 
37 #include <asm/virtext.h>
38 #include "trace.h"
39 
40 #define __ex(x) __kvm_handle_fault_on_reboot(x)
41 
42 MODULE_AUTHOR("Qumranet");
43 MODULE_LICENSE("GPL");
44 
45 #define IOPM_ALLOC_ORDER 2
46 #define MSRPM_ALLOC_ORDER 1
47 
48 #define SEG_TYPE_LDT 2
49 #define SEG_TYPE_BUSY_TSS16 3
50 
51 #define SVM_FEATURE_NPT            (1 <<  0)
52 #define SVM_FEATURE_LBRV           (1 <<  1)
53 #define SVM_FEATURE_SVML           (1 <<  2)
54 #define SVM_FEATURE_NRIP           (1 <<  3)
55 #define SVM_FEATURE_TSC_RATE       (1 <<  4)
56 #define SVM_FEATURE_VMCB_CLEAN     (1 <<  5)
57 #define SVM_FEATURE_FLUSH_ASID     (1 <<  6)
58 #define SVM_FEATURE_DECODE_ASSIST  (1 <<  7)
59 #define SVM_FEATURE_PAUSE_FILTER   (1 << 10)
60 
61 #define NESTED_EXIT_HOST	0	/* Exit handled on host level */
62 #define NESTED_EXIT_DONE	1	/* Exit caused nested vmexit  */
63 #define NESTED_EXIT_CONTINUE	2	/* Further checks needed      */
64 
65 #define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
66 
67 #define TSC_RATIO_RSVD          0xffffff0000000000ULL
68 #define TSC_RATIO_MIN		0x0000000000000001ULL
69 #define TSC_RATIO_MAX		0x000000ffffffffffULL
70 
71 static bool erratum_383_found __read_mostly;
72 
73 static const u32 host_save_user_msrs[] = {
74 #ifdef CONFIG_X86_64
75 	MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
76 	MSR_FS_BASE,
77 #endif
78 	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
79 };
80 
81 #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
82 
83 struct kvm_vcpu;
84 
85 struct nested_state {
86 	struct vmcb *hsave;
87 	u64 hsave_msr;
88 	u64 vm_cr_msr;
89 	u64 vmcb;
90 
91 	/* These are the merged vectors */
92 	u32 *msrpm;
93 
94 	/* gpa pointers to the real vectors */
95 	u64 vmcb_msrpm;
96 	u64 vmcb_iopm;
97 
98 	/* A VMEXIT is required but not yet emulated */
99 	bool exit_required;
100 
101 	/* cache for intercepts of the guest */
102 	u32 intercept_cr;
103 	u32 intercept_dr;
104 	u32 intercept_exceptions;
105 	u64 intercept;
106 
107 	/* Nested Paging related state */
108 	u64 nested_cr3;
109 };
110 
111 #define MSRPM_OFFSETS	16
112 static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
113 
114 /*
115  * Set osvw_len to higher value when updated Revision Guides
116  * are published and we know what the new status bits are
117  */
118 static uint64_t osvw_len = 4, osvw_status;
119 
120 struct vcpu_svm {
121 	struct kvm_vcpu vcpu;
122 	struct vmcb *vmcb;
123 	unsigned long vmcb_pa;
124 	struct svm_cpu_data *svm_data;
125 	uint64_t asid_generation;
126 	uint64_t sysenter_esp;
127 	uint64_t sysenter_eip;
128 
129 	u64 next_rip;
130 
131 	u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
132 	struct {
133 		u16 fs;
134 		u16 gs;
135 		u16 ldt;
136 		u64 gs_base;
137 	} host;
138 
139 	u32 *msrpm;
140 
141 	ulong nmi_iret_rip;
142 
143 	struct nested_state nested;
144 
145 	bool nmi_singlestep;
146 
147 	unsigned int3_injected;
148 	unsigned long int3_rip;
149 	u32 apf_reason;
150 
151 	u64  tsc_ratio;
152 };
153 
154 static DEFINE_PER_CPU(u64, current_tsc_ratio);
155 #define TSC_RATIO_DEFAULT	0x0100000000ULL
156 
157 #define MSR_INVALID			0xffffffffU
158 
159 static struct svm_direct_access_msrs {
160 	u32 index;   /* Index of the MSR */
161 	bool always; /* True if intercept is always on */
162 } direct_access_msrs[] = {
163 	{ .index = MSR_STAR,				.always = true  },
164 	{ .index = MSR_IA32_SYSENTER_CS,		.always = true  },
165 #ifdef CONFIG_X86_64
166 	{ .index = MSR_GS_BASE,				.always = true  },
167 	{ .index = MSR_FS_BASE,				.always = true  },
168 	{ .index = MSR_KERNEL_GS_BASE,			.always = true  },
169 	{ .index = MSR_LSTAR,				.always = true  },
170 	{ .index = MSR_CSTAR,				.always = true  },
171 	{ .index = MSR_SYSCALL_MASK,			.always = true  },
172 #endif
173 	{ .index = MSR_IA32_LASTBRANCHFROMIP,		.always = false },
174 	{ .index = MSR_IA32_LASTBRANCHTOIP,		.always = false },
175 	{ .index = MSR_IA32_LASTINTFROMIP,		.always = false },
176 	{ .index = MSR_IA32_LASTINTTOIP,		.always = false },
177 	{ .index = MSR_INVALID,				.always = false },
178 };
179 
180 /* enable NPT for AMD64 and X86 with PAE */
181 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
182 static bool npt_enabled = true;
183 #else
184 static bool npt_enabled;
185 #endif
186 
187 /* allow nested paging (virtualized MMU) for all guests */
188 static int npt = true;
189 module_param(npt, int, S_IRUGO);
190 
191 /* allow nested virtualization in KVM/SVM */
192 static int nested = true;
193 module_param(nested, int, S_IRUGO);
194 
195 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
196 static void svm_complete_interrupts(struct vcpu_svm *svm);
197 
198 static int nested_svm_exit_handled(struct vcpu_svm *svm);
199 static int nested_svm_intercept(struct vcpu_svm *svm);
200 static int nested_svm_vmexit(struct vcpu_svm *svm);
201 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
202 				      bool has_error_code, u32 error_code);
203 static u64 __scale_tsc(u64 ratio, u64 tsc);
204 
205 enum {
206 	VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
207 			    pause filter count */
208 	VMCB_PERM_MAP,   /* IOPM Base and MSRPM Base */
209 	VMCB_ASID,	 /* ASID */
210 	VMCB_INTR,	 /* int_ctl, int_vector */
211 	VMCB_NPT,        /* npt_en, nCR3, gPAT */
212 	VMCB_CR,	 /* CR0, CR3, CR4, EFER */
213 	VMCB_DR,         /* DR6, DR7 */
214 	VMCB_DT,         /* GDT, IDT */
215 	VMCB_SEG,        /* CS, DS, SS, ES, CPL */
216 	VMCB_CR2,        /* CR2 only */
217 	VMCB_LBR,        /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
218 	VMCB_DIRTY_MAX,
219 };
220 
221 /* TPR and CR2 are always written before VMRUN */
222 #define VMCB_ALWAYS_DIRTY_MASK	((1U << VMCB_INTR) | (1U << VMCB_CR2))
223 
mark_all_dirty(struct vmcb * vmcb)224 static inline void mark_all_dirty(struct vmcb *vmcb)
225 {
226 	vmcb->control.clean = 0;
227 }
228 
mark_all_clean(struct vmcb * vmcb)229 static inline void mark_all_clean(struct vmcb *vmcb)
230 {
231 	vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
232 			       & ~VMCB_ALWAYS_DIRTY_MASK;
233 }
234 
mark_dirty(struct vmcb * vmcb,int bit)235 static inline void mark_dirty(struct vmcb *vmcb, int bit)
236 {
237 	vmcb->control.clean &= ~(1 << bit);
238 }
239 
to_svm(struct kvm_vcpu * vcpu)240 static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
241 {
242 	return container_of(vcpu, struct vcpu_svm, vcpu);
243 }
244 
recalc_intercepts(struct vcpu_svm * svm)245 static void recalc_intercepts(struct vcpu_svm *svm)
246 {
247 	struct vmcb_control_area *c, *h;
248 	struct nested_state *g;
249 
250 	mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
251 
252 	if (!is_guest_mode(&svm->vcpu))
253 		return;
254 
255 	c = &svm->vmcb->control;
256 	h = &svm->nested.hsave->control;
257 	g = &svm->nested;
258 
259 	c->intercept_cr = h->intercept_cr | g->intercept_cr;
260 	c->intercept_dr = h->intercept_dr | g->intercept_dr;
261 	c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
262 	c->intercept = h->intercept | g->intercept;
263 }
264 
get_host_vmcb(struct vcpu_svm * svm)265 static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
266 {
267 	if (is_guest_mode(&svm->vcpu))
268 		return svm->nested.hsave;
269 	else
270 		return svm->vmcb;
271 }
272 
set_cr_intercept(struct vcpu_svm * svm,int bit)273 static inline void set_cr_intercept(struct vcpu_svm *svm, int bit)
274 {
275 	struct vmcb *vmcb = get_host_vmcb(svm);
276 
277 	vmcb->control.intercept_cr |= (1U << bit);
278 
279 	recalc_intercepts(svm);
280 }
281 
clr_cr_intercept(struct vcpu_svm * svm,int bit)282 static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit)
283 {
284 	struct vmcb *vmcb = get_host_vmcb(svm);
285 
286 	vmcb->control.intercept_cr &= ~(1U << bit);
287 
288 	recalc_intercepts(svm);
289 }
290 
is_cr_intercept(struct vcpu_svm * svm,int bit)291 static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit)
292 {
293 	struct vmcb *vmcb = get_host_vmcb(svm);
294 
295 	return vmcb->control.intercept_cr & (1U << bit);
296 }
297 
set_dr_intercept(struct vcpu_svm * svm,int bit)298 static inline void set_dr_intercept(struct vcpu_svm *svm, int bit)
299 {
300 	struct vmcb *vmcb = get_host_vmcb(svm);
301 
302 	vmcb->control.intercept_dr |= (1U << bit);
303 
304 	recalc_intercepts(svm);
305 }
306 
clr_dr_intercept(struct vcpu_svm * svm,int bit)307 static inline void clr_dr_intercept(struct vcpu_svm *svm, int bit)
308 {
309 	struct vmcb *vmcb = get_host_vmcb(svm);
310 
311 	vmcb->control.intercept_dr &= ~(1U << bit);
312 
313 	recalc_intercepts(svm);
314 }
315 
set_exception_intercept(struct vcpu_svm * svm,int bit)316 static inline void set_exception_intercept(struct vcpu_svm *svm, int bit)
317 {
318 	struct vmcb *vmcb = get_host_vmcb(svm);
319 
320 	vmcb->control.intercept_exceptions |= (1U << bit);
321 
322 	recalc_intercepts(svm);
323 }
324 
clr_exception_intercept(struct vcpu_svm * svm,int bit)325 static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit)
326 {
327 	struct vmcb *vmcb = get_host_vmcb(svm);
328 
329 	vmcb->control.intercept_exceptions &= ~(1U << bit);
330 
331 	recalc_intercepts(svm);
332 }
333 
set_intercept(struct vcpu_svm * svm,int bit)334 static inline void set_intercept(struct vcpu_svm *svm, int bit)
335 {
336 	struct vmcb *vmcb = get_host_vmcb(svm);
337 
338 	vmcb->control.intercept |= (1ULL << bit);
339 
340 	recalc_intercepts(svm);
341 }
342 
clr_intercept(struct vcpu_svm * svm,int bit)343 static inline void clr_intercept(struct vcpu_svm *svm, int bit)
344 {
345 	struct vmcb *vmcb = get_host_vmcb(svm);
346 
347 	vmcb->control.intercept &= ~(1ULL << bit);
348 
349 	recalc_intercepts(svm);
350 }
351 
enable_gif(struct vcpu_svm * svm)352 static inline void enable_gif(struct vcpu_svm *svm)
353 {
354 	svm->vcpu.arch.hflags |= HF_GIF_MASK;
355 }
356 
disable_gif(struct vcpu_svm * svm)357 static inline void disable_gif(struct vcpu_svm *svm)
358 {
359 	svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
360 }
361 
gif_set(struct vcpu_svm * svm)362 static inline bool gif_set(struct vcpu_svm *svm)
363 {
364 	return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
365 }
366 
367 static unsigned long iopm_base;
368 
369 struct kvm_ldttss_desc {
370 	u16 limit0;
371 	u16 base0;
372 	unsigned base1:8, type:5, dpl:2, p:1;
373 	unsigned limit1:4, zero0:3, g:1, base2:8;
374 	u32 base3;
375 	u32 zero1;
376 } __attribute__((packed));
377 
378 struct svm_cpu_data {
379 	int cpu;
380 
381 	u64 asid_generation;
382 	u32 max_asid;
383 	u32 next_asid;
384 	struct kvm_ldttss_desc *tss_desc;
385 
386 	struct page *save_area;
387 };
388 
389 static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
390 
391 struct svm_init_data {
392 	int cpu;
393 	int r;
394 };
395 
396 static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
397 
398 #define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
399 #define MSRS_RANGE_SIZE 2048
400 #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
401 
svm_msrpm_offset(u32 msr)402 static u32 svm_msrpm_offset(u32 msr)
403 {
404 	u32 offset;
405 	int i;
406 
407 	for (i = 0; i < NUM_MSR_MAPS; i++) {
408 		if (msr < msrpm_ranges[i] ||
409 		    msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
410 			continue;
411 
412 		offset  = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */
413 		offset += (i * MSRS_RANGE_SIZE);       /* add range offset */
414 
415 		/* Now we have the u8 offset - but need the u32 offset */
416 		return offset / 4;
417 	}
418 
419 	/* MSR not in any range */
420 	return MSR_INVALID;
421 }
422 
423 #define MAX_INST_SIZE 15
424 
clgi(void)425 static inline void clgi(void)
426 {
427 	asm volatile (__ex(SVM_CLGI));
428 }
429 
stgi(void)430 static inline void stgi(void)
431 {
432 	asm volatile (__ex(SVM_STGI));
433 }
434 
invlpga(unsigned long addr,u32 asid)435 static inline void invlpga(unsigned long addr, u32 asid)
436 {
437 	asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
438 }
439 
get_npt_level(void)440 static int get_npt_level(void)
441 {
442 #ifdef CONFIG_X86_64
443 	return PT64_ROOT_LEVEL;
444 #else
445 	return PT32E_ROOT_LEVEL;
446 #endif
447 }
448 
svm_set_efer(struct kvm_vcpu * vcpu,u64 efer)449 static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
450 {
451 	vcpu->arch.efer = efer;
452 	if (!npt_enabled && !(efer & EFER_LMA))
453 		efer &= ~EFER_LME;
454 
455 	to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
456 	mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
457 }
458 
is_external_interrupt(u32 info)459 static int is_external_interrupt(u32 info)
460 {
461 	info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
462 	return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
463 }
464 
svm_get_interrupt_shadow(struct kvm_vcpu * vcpu,int mask)465 static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
466 {
467 	struct vcpu_svm *svm = to_svm(vcpu);
468 	u32 ret = 0;
469 
470 	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
471 		ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
472 	return ret & mask;
473 }
474 
svm_set_interrupt_shadow(struct kvm_vcpu * vcpu,int mask)475 static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
476 {
477 	struct vcpu_svm *svm = to_svm(vcpu);
478 
479 	if (mask == 0)
480 		svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
481 	else
482 		svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
483 
484 }
485 
skip_emulated_instruction(struct kvm_vcpu * vcpu)486 static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
487 {
488 	struct vcpu_svm *svm = to_svm(vcpu);
489 
490 	if (svm->vmcb->control.next_rip != 0)
491 		svm->next_rip = svm->vmcb->control.next_rip;
492 
493 	if (!svm->next_rip) {
494 		if (emulate_instruction(vcpu, EMULTYPE_SKIP) !=
495 				EMULATE_DONE)
496 			printk(KERN_DEBUG "%s: NOP\n", __func__);
497 		return;
498 	}
499 	if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
500 		printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
501 		       __func__, kvm_rip_read(vcpu), svm->next_rip);
502 
503 	kvm_rip_write(vcpu, svm->next_rip);
504 	svm_set_interrupt_shadow(vcpu, 0);
505 }
506 
svm_queue_exception(struct kvm_vcpu * vcpu,unsigned nr,bool has_error_code,u32 error_code,bool reinject)507 static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
508 				bool has_error_code, u32 error_code,
509 				bool reinject)
510 {
511 	struct vcpu_svm *svm = to_svm(vcpu);
512 
513 	/*
514 	 * If we are within a nested VM we'd better #VMEXIT and let the guest
515 	 * handle the exception
516 	 */
517 	if (!reinject &&
518 	    nested_svm_check_exception(svm, nr, has_error_code, error_code))
519 		return;
520 
521 	if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
522 		unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
523 
524 		/*
525 		 * For guest debugging where we have to reinject #BP if some
526 		 * INT3 is guest-owned:
527 		 * Emulate nRIP by moving RIP forward. Will fail if injection
528 		 * raises a fault that is not intercepted. Still better than
529 		 * failing in all cases.
530 		 */
531 		skip_emulated_instruction(&svm->vcpu);
532 		rip = kvm_rip_read(&svm->vcpu);
533 		svm->int3_rip = rip + svm->vmcb->save.cs.base;
534 		svm->int3_injected = rip - old_rip;
535 	}
536 
537 	svm->vmcb->control.event_inj = nr
538 		| SVM_EVTINJ_VALID
539 		| (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
540 		| SVM_EVTINJ_TYPE_EXEPT;
541 	svm->vmcb->control.event_inj_err = error_code;
542 }
543 
svm_init_erratum_383(void)544 static void svm_init_erratum_383(void)
545 {
546 	u32 low, high;
547 	int err;
548 	u64 val;
549 
550 	if (!cpu_has_amd_erratum(amd_erratum_383))
551 		return;
552 
553 	/* Use _safe variants to not break nested virtualization */
554 	val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
555 	if (err)
556 		return;
557 
558 	val |= (1ULL << 47);
559 
560 	low  = lower_32_bits(val);
561 	high = upper_32_bits(val);
562 
563 	native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
564 
565 	erratum_383_found = true;
566 }
567 
svm_init_osvw(struct kvm_vcpu * vcpu)568 static void svm_init_osvw(struct kvm_vcpu *vcpu)
569 {
570 	/*
571 	 * Guests should see errata 400 and 415 as fixed (assuming that
572 	 * HLT and IO instructions are intercepted).
573 	 */
574 	vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
575 	vcpu->arch.osvw.status = osvw_status & ~(6ULL);
576 
577 	/*
578 	 * By increasing VCPU's osvw.length to 3 we are telling the guest that
579 	 * all osvw.status bits inside that length, including bit 0 (which is
580 	 * reserved for erratum 298), are valid. However, if host processor's
581 	 * osvw_len is 0 then osvw_status[0] carries no information. We need to
582 	 * be conservative here and therefore we tell the guest that erratum 298
583 	 * is present (because we really don't know).
584 	 */
585 	if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
586 		vcpu->arch.osvw.status |= 1;
587 }
588 
has_svm(void)589 static int has_svm(void)
590 {
591 	const char *msg;
592 
593 	if (!cpu_has_svm(&msg)) {
594 		printk(KERN_INFO "has_svm: %s\n", msg);
595 		return 0;
596 	}
597 
598 	return 1;
599 }
600 
svm_hardware_disable(void * garbage)601 static void svm_hardware_disable(void *garbage)
602 {
603 	/* Make sure we clean up behind us */
604 	if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
605 		wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
606 
607 	cpu_svm_disable();
608 
609 	amd_pmu_disable_virt();
610 }
611 
svm_hardware_enable(void * garbage)612 static int svm_hardware_enable(void *garbage)
613 {
614 
615 	struct svm_cpu_data *sd;
616 	uint64_t efer;
617 	struct desc_ptr gdt_descr;
618 	struct desc_struct *gdt;
619 	int me = raw_smp_processor_id();
620 
621 	rdmsrl(MSR_EFER, efer);
622 	if (efer & EFER_SVME)
623 		return -EBUSY;
624 
625 	if (!has_svm()) {
626 		printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
627 		       me);
628 		return -EINVAL;
629 	}
630 	sd = per_cpu(svm_data, me);
631 
632 	if (!sd) {
633 		printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
634 		       me);
635 		return -EINVAL;
636 	}
637 
638 	sd->asid_generation = 1;
639 	sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
640 	sd->next_asid = sd->max_asid + 1;
641 
642 	native_store_gdt(&gdt_descr);
643 	gdt = (struct desc_struct *)gdt_descr.address;
644 	sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
645 
646 	wrmsrl(MSR_EFER, efer | EFER_SVME);
647 
648 	wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
649 
650 	if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
651 		wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
652 		__get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT;
653 	}
654 
655 
656 	/*
657 	 * Get OSVW bits.
658 	 *
659 	 * Note that it is possible to have a system with mixed processor
660 	 * revisions and therefore different OSVW bits. If bits are not the same
661 	 * on different processors then choose the worst case (i.e. if erratum
662 	 * is present on one processor and not on another then assume that the
663 	 * erratum is present everywhere).
664 	 */
665 	if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
666 		uint64_t len, status = 0;
667 		int err;
668 
669 		len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
670 		if (!err)
671 			status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
672 						      &err);
673 
674 		if (err)
675 			osvw_status = osvw_len = 0;
676 		else {
677 			if (len < osvw_len)
678 				osvw_len = len;
679 			osvw_status |= status;
680 			osvw_status &= (1ULL << osvw_len) - 1;
681 		}
682 	} else
683 		osvw_status = osvw_len = 0;
684 
685 	svm_init_erratum_383();
686 
687 	amd_pmu_enable_virt();
688 
689 	return 0;
690 }
691 
svm_cpu_uninit(int cpu)692 static void svm_cpu_uninit(int cpu)
693 {
694 	struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
695 
696 	if (!sd)
697 		return;
698 
699 	per_cpu(svm_data, raw_smp_processor_id()) = NULL;
700 	__free_page(sd->save_area);
701 	kfree(sd);
702 }
703 
svm_cpu_init(int cpu)704 static int svm_cpu_init(int cpu)
705 {
706 	struct svm_cpu_data *sd;
707 	int r;
708 
709 	sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
710 	if (!sd)
711 		return -ENOMEM;
712 	sd->cpu = cpu;
713 	sd->save_area = alloc_page(GFP_KERNEL);
714 	r = -ENOMEM;
715 	if (!sd->save_area)
716 		goto err_1;
717 
718 	per_cpu(svm_data, cpu) = sd;
719 
720 	return 0;
721 
722 err_1:
723 	kfree(sd);
724 	return r;
725 
726 }
727 
valid_msr_intercept(u32 index)728 static bool valid_msr_intercept(u32 index)
729 {
730 	int i;
731 
732 	for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
733 		if (direct_access_msrs[i].index == index)
734 			return true;
735 
736 	return false;
737 }
738 
set_msr_interception(u32 * msrpm,unsigned msr,int read,int write)739 static void set_msr_interception(u32 *msrpm, unsigned msr,
740 				 int read, int write)
741 {
742 	u8 bit_read, bit_write;
743 	unsigned long tmp;
744 	u32 offset;
745 
746 	/*
747 	 * If this warning triggers extend the direct_access_msrs list at the
748 	 * beginning of the file
749 	 */
750 	WARN_ON(!valid_msr_intercept(msr));
751 
752 	offset    = svm_msrpm_offset(msr);
753 	bit_read  = 2 * (msr & 0x0f);
754 	bit_write = 2 * (msr & 0x0f) + 1;
755 	tmp       = msrpm[offset];
756 
757 	BUG_ON(offset == MSR_INVALID);
758 
759 	read  ? clear_bit(bit_read,  &tmp) : set_bit(bit_read,  &tmp);
760 	write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
761 
762 	msrpm[offset] = tmp;
763 }
764 
svm_vcpu_init_msrpm(u32 * msrpm)765 static void svm_vcpu_init_msrpm(u32 *msrpm)
766 {
767 	int i;
768 
769 	memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
770 
771 	for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
772 		if (!direct_access_msrs[i].always)
773 			continue;
774 
775 		set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
776 	}
777 }
778 
add_msr_offset(u32 offset)779 static void add_msr_offset(u32 offset)
780 {
781 	int i;
782 
783 	for (i = 0; i < MSRPM_OFFSETS; ++i) {
784 
785 		/* Offset already in list? */
786 		if (msrpm_offsets[i] == offset)
787 			return;
788 
789 		/* Slot used by another offset? */
790 		if (msrpm_offsets[i] != MSR_INVALID)
791 			continue;
792 
793 		/* Add offset to list */
794 		msrpm_offsets[i] = offset;
795 
796 		return;
797 	}
798 
799 	/*
800 	 * If this BUG triggers the msrpm_offsets table has an overflow. Just
801 	 * increase MSRPM_OFFSETS in this case.
802 	 */
803 	BUG();
804 }
805 
init_msrpm_offsets(void)806 static void init_msrpm_offsets(void)
807 {
808 	int i;
809 
810 	memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets));
811 
812 	for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
813 		u32 offset;
814 
815 		offset = svm_msrpm_offset(direct_access_msrs[i].index);
816 		BUG_ON(offset == MSR_INVALID);
817 
818 		add_msr_offset(offset);
819 	}
820 }
821 
svm_enable_lbrv(struct vcpu_svm * svm)822 static void svm_enable_lbrv(struct vcpu_svm *svm)
823 {
824 	u32 *msrpm = svm->msrpm;
825 
826 	svm->vmcb->control.lbr_ctl = 1;
827 	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
828 	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
829 	set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
830 	set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
831 }
832 
svm_disable_lbrv(struct vcpu_svm * svm)833 static void svm_disable_lbrv(struct vcpu_svm *svm)
834 {
835 	u32 *msrpm = svm->msrpm;
836 
837 	svm->vmcb->control.lbr_ctl = 0;
838 	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
839 	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
840 	set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
841 	set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
842 }
843 
svm_hardware_setup(void)844 static __init int svm_hardware_setup(void)
845 {
846 	int cpu;
847 	struct page *iopm_pages;
848 	void *iopm_va;
849 	int r;
850 
851 	iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
852 
853 	if (!iopm_pages)
854 		return -ENOMEM;
855 
856 	iopm_va = page_address(iopm_pages);
857 	memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
858 	iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
859 
860 	init_msrpm_offsets();
861 
862 	if (boot_cpu_has(X86_FEATURE_NX))
863 		kvm_enable_efer_bits(EFER_NX);
864 
865 	if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
866 		kvm_enable_efer_bits(EFER_FFXSR);
867 
868 	if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
869 		u64 max;
870 
871 		kvm_has_tsc_control = true;
872 
873 		/*
874 		 * Make sure the user can only configure tsc_khz values that
875 		 * fit into a signed integer.
876 		 * A min value is not calculated needed because it will always
877 		 * be 1 on all machines and a value of 0 is used to disable
878 		 * tsc-scaling for the vcpu.
879 		 */
880 		max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX));
881 
882 		kvm_max_guest_tsc_khz = max;
883 	}
884 
885 	if (nested) {
886 		printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
887 		kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
888 	}
889 
890 	for_each_possible_cpu(cpu) {
891 		r = svm_cpu_init(cpu);
892 		if (r)
893 			goto err;
894 	}
895 
896 	if (!boot_cpu_has(X86_FEATURE_NPT))
897 		npt_enabled = false;
898 
899 	if (npt_enabled && !npt) {
900 		printk(KERN_INFO "kvm: Nested Paging disabled\n");
901 		npt_enabled = false;
902 	}
903 
904 	if (npt_enabled) {
905 		printk(KERN_INFO "kvm: Nested Paging enabled\n");
906 		kvm_enable_tdp();
907 	} else
908 		kvm_disable_tdp();
909 
910 	return 0;
911 
912 err:
913 	__free_pages(iopm_pages, IOPM_ALLOC_ORDER);
914 	iopm_base = 0;
915 	return r;
916 }
917 
svm_hardware_unsetup(void)918 static __exit void svm_hardware_unsetup(void)
919 {
920 	int cpu;
921 
922 	for_each_possible_cpu(cpu)
923 		svm_cpu_uninit(cpu);
924 
925 	__free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
926 	iopm_base = 0;
927 }
928 
init_seg(struct vmcb_seg * seg)929 static void init_seg(struct vmcb_seg *seg)
930 {
931 	seg->selector = 0;
932 	seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
933 		      SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */
934 	seg->limit = 0xffff;
935 	seg->base = 0;
936 }
937 
init_sys_seg(struct vmcb_seg * seg,uint32_t type)938 static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
939 {
940 	seg->selector = 0;
941 	seg->attrib = SVM_SELECTOR_P_MASK | type;
942 	seg->limit = 0xffff;
943 	seg->base = 0;
944 }
945 
__scale_tsc(u64 ratio,u64 tsc)946 static u64 __scale_tsc(u64 ratio, u64 tsc)
947 {
948 	u64 mult, frac, _tsc;
949 
950 	mult  = ratio >> 32;
951 	frac  = ratio & ((1ULL << 32) - 1);
952 
953 	_tsc  = tsc;
954 	_tsc *= mult;
955 	_tsc += (tsc >> 32) * frac;
956 	_tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32;
957 
958 	return _tsc;
959 }
960 
svm_scale_tsc(struct kvm_vcpu * vcpu,u64 tsc)961 static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
962 {
963 	struct vcpu_svm *svm = to_svm(vcpu);
964 	u64 _tsc = tsc;
965 
966 	if (svm->tsc_ratio != TSC_RATIO_DEFAULT)
967 		_tsc = __scale_tsc(svm->tsc_ratio, tsc);
968 
969 	return _tsc;
970 }
971 
svm_set_tsc_khz(struct kvm_vcpu * vcpu,u32 user_tsc_khz,bool scale)972 static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
973 {
974 	struct vcpu_svm *svm = to_svm(vcpu);
975 	u64 ratio;
976 	u64 khz;
977 
978 	/* Guest TSC same frequency as host TSC? */
979 	if (!scale) {
980 		svm->tsc_ratio = TSC_RATIO_DEFAULT;
981 		return;
982 	}
983 
984 	/* TSC scaling supported? */
985 	if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
986 		if (user_tsc_khz > tsc_khz) {
987 			vcpu->arch.tsc_catchup = 1;
988 			vcpu->arch.tsc_always_catchup = 1;
989 		} else
990 			WARN(1, "user requested TSC rate below hardware speed\n");
991 		return;
992 	}
993 
994 	khz = user_tsc_khz;
995 
996 	/* TSC scaling required  - calculate ratio */
997 	ratio = khz << 32;
998 	do_div(ratio, tsc_khz);
999 
1000 	if (ratio == 0 || ratio & TSC_RATIO_RSVD) {
1001 		WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n",
1002 				user_tsc_khz);
1003 		return;
1004 	}
1005 	svm->tsc_ratio             = ratio;
1006 }
1007 
svm_write_tsc_offset(struct kvm_vcpu * vcpu,u64 offset)1008 static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1009 {
1010 	struct vcpu_svm *svm = to_svm(vcpu);
1011 	u64 g_tsc_offset = 0;
1012 
1013 	if (is_guest_mode(vcpu)) {
1014 		g_tsc_offset = svm->vmcb->control.tsc_offset -
1015 			       svm->nested.hsave->control.tsc_offset;
1016 		svm->nested.hsave->control.tsc_offset = offset;
1017 	}
1018 
1019 	svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
1020 
1021 	mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
1022 }
1023 
svm_adjust_tsc_offset(struct kvm_vcpu * vcpu,s64 adjustment,bool host)1024 static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host)
1025 {
1026 	struct vcpu_svm *svm = to_svm(vcpu);
1027 
1028 	WARN_ON(adjustment < 0);
1029 	if (host)
1030 		adjustment = svm_scale_tsc(vcpu, adjustment);
1031 
1032 	svm->vmcb->control.tsc_offset += adjustment;
1033 	if (is_guest_mode(vcpu))
1034 		svm->nested.hsave->control.tsc_offset += adjustment;
1035 	mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
1036 }
1037 
svm_compute_tsc_offset(struct kvm_vcpu * vcpu,u64 target_tsc)1038 static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
1039 {
1040 	u64 tsc;
1041 
1042 	tsc = svm_scale_tsc(vcpu, native_read_tsc());
1043 
1044 	return target_tsc - tsc;
1045 }
1046 
init_vmcb(struct vcpu_svm * svm)1047 static void init_vmcb(struct vcpu_svm *svm)
1048 {
1049 	struct vmcb_control_area *control = &svm->vmcb->control;
1050 	struct vmcb_save_area *save = &svm->vmcb->save;
1051 
1052 	svm->vcpu.fpu_active = 1;
1053 	svm->vcpu.arch.hflags = 0;
1054 
1055 	set_cr_intercept(svm, INTERCEPT_CR0_READ);
1056 	set_cr_intercept(svm, INTERCEPT_CR3_READ);
1057 	set_cr_intercept(svm, INTERCEPT_CR4_READ);
1058 	set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1059 	set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
1060 	set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
1061 	set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
1062 
1063 	set_dr_intercept(svm, INTERCEPT_DR0_READ);
1064 	set_dr_intercept(svm, INTERCEPT_DR1_READ);
1065 	set_dr_intercept(svm, INTERCEPT_DR2_READ);
1066 	set_dr_intercept(svm, INTERCEPT_DR3_READ);
1067 	set_dr_intercept(svm, INTERCEPT_DR4_READ);
1068 	set_dr_intercept(svm, INTERCEPT_DR5_READ);
1069 	set_dr_intercept(svm, INTERCEPT_DR6_READ);
1070 	set_dr_intercept(svm, INTERCEPT_DR7_READ);
1071 
1072 	set_dr_intercept(svm, INTERCEPT_DR0_WRITE);
1073 	set_dr_intercept(svm, INTERCEPT_DR1_WRITE);
1074 	set_dr_intercept(svm, INTERCEPT_DR2_WRITE);
1075 	set_dr_intercept(svm, INTERCEPT_DR3_WRITE);
1076 	set_dr_intercept(svm, INTERCEPT_DR4_WRITE);
1077 	set_dr_intercept(svm, INTERCEPT_DR5_WRITE);
1078 	set_dr_intercept(svm, INTERCEPT_DR6_WRITE);
1079 	set_dr_intercept(svm, INTERCEPT_DR7_WRITE);
1080 
1081 	set_exception_intercept(svm, PF_VECTOR);
1082 	set_exception_intercept(svm, UD_VECTOR);
1083 	set_exception_intercept(svm, MC_VECTOR);
1084 
1085 	set_intercept(svm, INTERCEPT_INTR);
1086 	set_intercept(svm, INTERCEPT_NMI);
1087 	set_intercept(svm, INTERCEPT_SMI);
1088 	set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
1089 	set_intercept(svm, INTERCEPT_RDPMC);
1090 	set_intercept(svm, INTERCEPT_CPUID);
1091 	set_intercept(svm, INTERCEPT_INVD);
1092 	set_intercept(svm, INTERCEPT_HLT);
1093 	set_intercept(svm, INTERCEPT_INVLPG);
1094 	set_intercept(svm, INTERCEPT_INVLPGA);
1095 	set_intercept(svm, INTERCEPT_IOIO_PROT);
1096 	set_intercept(svm, INTERCEPT_MSR_PROT);
1097 	set_intercept(svm, INTERCEPT_TASK_SWITCH);
1098 	set_intercept(svm, INTERCEPT_SHUTDOWN);
1099 	set_intercept(svm, INTERCEPT_VMRUN);
1100 	set_intercept(svm, INTERCEPT_VMMCALL);
1101 	set_intercept(svm, INTERCEPT_VMLOAD);
1102 	set_intercept(svm, INTERCEPT_VMSAVE);
1103 	set_intercept(svm, INTERCEPT_STGI);
1104 	set_intercept(svm, INTERCEPT_CLGI);
1105 	set_intercept(svm, INTERCEPT_SKINIT);
1106 	set_intercept(svm, INTERCEPT_WBINVD);
1107 	set_intercept(svm, INTERCEPT_MONITOR);
1108 	set_intercept(svm, INTERCEPT_MWAIT);
1109 	set_intercept(svm, INTERCEPT_XSETBV);
1110 
1111 	control->iopm_base_pa = iopm_base;
1112 	control->msrpm_base_pa = __pa(svm->msrpm);
1113 	control->int_ctl = V_INTR_MASKING_MASK;
1114 
1115 	init_seg(&save->es);
1116 	init_seg(&save->ss);
1117 	init_seg(&save->ds);
1118 	init_seg(&save->fs);
1119 	init_seg(&save->gs);
1120 
1121 	save->cs.selector = 0xf000;
1122 	/* Executable/Readable Code Segment */
1123 	save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
1124 		SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
1125 	save->cs.limit = 0xffff;
1126 	/*
1127 	 * cs.base should really be 0xffff0000, but vmx can't handle that, so
1128 	 * be consistent with it.
1129 	 *
1130 	 * Replace when we have real mode working for vmx.
1131 	 */
1132 	save->cs.base = 0xf0000;
1133 
1134 	save->gdtr.limit = 0xffff;
1135 	save->idtr.limit = 0xffff;
1136 
1137 	init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
1138 	init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
1139 
1140 	svm_set_efer(&svm->vcpu, 0);
1141 	save->dr6 = 0xffff0ff0;
1142 	save->dr7 = 0x400;
1143 	kvm_set_rflags(&svm->vcpu, 2);
1144 	save->rip = 0x0000fff0;
1145 	svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
1146 
1147 	/*
1148 	 * This is the guest-visible cr0 value.
1149 	 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
1150 	 */
1151 	svm->vcpu.arch.cr0 = 0;
1152 	(void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
1153 
1154 	save->cr4 = X86_CR4_PAE;
1155 	/* rdx = ?? */
1156 
1157 	if (npt_enabled) {
1158 		/* Setup VMCB for Nested Paging */
1159 		control->nested_ctl = 1;
1160 		clr_intercept(svm, INTERCEPT_INVLPG);
1161 		clr_exception_intercept(svm, PF_VECTOR);
1162 		clr_cr_intercept(svm, INTERCEPT_CR3_READ);
1163 		clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
1164 		save->g_pat = 0x0007040600070406ULL;
1165 		save->cr3 = 0;
1166 		save->cr4 = 0;
1167 	}
1168 	svm->asid_generation = 0;
1169 
1170 	svm->nested.vmcb = 0;
1171 	svm->vcpu.arch.hflags = 0;
1172 
1173 	if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
1174 		control->pause_filter_count = 3000;
1175 		set_intercept(svm, INTERCEPT_PAUSE);
1176 	}
1177 
1178 	mark_all_dirty(svm->vmcb);
1179 
1180 	enable_gif(svm);
1181 }
1182 
svm_vcpu_reset(struct kvm_vcpu * vcpu)1183 static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
1184 {
1185 	struct vcpu_svm *svm = to_svm(vcpu);
1186 
1187 	init_vmcb(svm);
1188 
1189 	if (!kvm_vcpu_is_bsp(vcpu)) {
1190 		kvm_rip_write(vcpu, 0);
1191 		svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
1192 		svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
1193 	}
1194 	vcpu->arch.regs_avail = ~0;
1195 	vcpu->arch.regs_dirty = ~0;
1196 
1197 	return 0;
1198 }
1199 
svm_create_vcpu(struct kvm * kvm,unsigned int id)1200 static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
1201 {
1202 	struct vcpu_svm *svm;
1203 	struct page *page;
1204 	struct page *msrpm_pages;
1205 	struct page *hsave_page;
1206 	struct page *nested_msrpm_pages;
1207 	int err;
1208 
1209 	svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1210 	if (!svm) {
1211 		err = -ENOMEM;
1212 		goto out;
1213 	}
1214 
1215 	svm->tsc_ratio = TSC_RATIO_DEFAULT;
1216 
1217 	err = kvm_vcpu_init(&svm->vcpu, kvm, id);
1218 	if (err)
1219 		goto free_svm;
1220 
1221 	err = -ENOMEM;
1222 	page = alloc_page(GFP_KERNEL);
1223 	if (!page)
1224 		goto uninit;
1225 
1226 	msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
1227 	if (!msrpm_pages)
1228 		goto free_page1;
1229 
1230 	nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
1231 	if (!nested_msrpm_pages)
1232 		goto free_page2;
1233 
1234 	hsave_page = alloc_page(GFP_KERNEL);
1235 	if (!hsave_page)
1236 		goto free_page3;
1237 
1238 	svm->nested.hsave = page_address(hsave_page);
1239 
1240 	svm->msrpm = page_address(msrpm_pages);
1241 	svm_vcpu_init_msrpm(svm->msrpm);
1242 
1243 	svm->nested.msrpm = page_address(nested_msrpm_pages);
1244 	svm_vcpu_init_msrpm(svm->nested.msrpm);
1245 
1246 	svm->vmcb = page_address(page);
1247 	clear_page(svm->vmcb);
1248 	svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
1249 	svm->asid_generation = 0;
1250 	init_vmcb(svm);
1251 	kvm_write_tsc(&svm->vcpu, 0);
1252 
1253 	err = fx_init(&svm->vcpu);
1254 	if (err)
1255 		goto free_page4;
1256 
1257 	svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
1258 	if (kvm_vcpu_is_bsp(&svm->vcpu))
1259 		svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
1260 
1261 	svm_init_osvw(&svm->vcpu);
1262 
1263 	return &svm->vcpu;
1264 
1265 free_page4:
1266 	__free_page(hsave_page);
1267 free_page3:
1268 	__free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
1269 free_page2:
1270 	__free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
1271 free_page1:
1272 	__free_page(page);
1273 uninit:
1274 	kvm_vcpu_uninit(&svm->vcpu);
1275 free_svm:
1276 	kmem_cache_free(kvm_vcpu_cache, svm);
1277 out:
1278 	return ERR_PTR(err);
1279 }
1280 
svm_free_vcpu(struct kvm_vcpu * vcpu)1281 static void svm_free_vcpu(struct kvm_vcpu *vcpu)
1282 {
1283 	struct vcpu_svm *svm = to_svm(vcpu);
1284 
1285 	__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
1286 	__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
1287 	__free_page(virt_to_page(svm->nested.hsave));
1288 	__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
1289 	kvm_vcpu_uninit(vcpu);
1290 	kmem_cache_free(kvm_vcpu_cache, svm);
1291 }
1292 
svm_vcpu_load(struct kvm_vcpu * vcpu,int cpu)1293 static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1294 {
1295 	struct vcpu_svm *svm = to_svm(vcpu);
1296 	int i;
1297 
1298 	if (unlikely(cpu != vcpu->cpu)) {
1299 		svm->asid_generation = 0;
1300 		mark_all_dirty(svm->vmcb);
1301 	}
1302 
1303 #ifdef CONFIG_X86_64
1304 	rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
1305 #endif
1306 	savesegment(fs, svm->host.fs);
1307 	savesegment(gs, svm->host.gs);
1308 	svm->host.ldt = kvm_read_ldt();
1309 
1310 	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
1311 		rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
1312 
1313 	if (static_cpu_has(X86_FEATURE_TSCRATEMSR) &&
1314 	    svm->tsc_ratio != __get_cpu_var(current_tsc_ratio)) {
1315 		__get_cpu_var(current_tsc_ratio) = svm->tsc_ratio;
1316 		wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio);
1317 	}
1318 }
1319 
svm_vcpu_put(struct kvm_vcpu * vcpu)1320 static void svm_vcpu_put(struct kvm_vcpu *vcpu)
1321 {
1322 	struct vcpu_svm *svm = to_svm(vcpu);
1323 	int i;
1324 
1325 	++vcpu->stat.host_state_reload;
1326 	kvm_load_ldt(svm->host.ldt);
1327 #ifdef CONFIG_X86_64
1328 	loadsegment(fs, svm->host.fs);
1329 	wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
1330 	load_gs_index(svm->host.gs);
1331 #else
1332 #ifdef CONFIG_X86_32_LAZY_GS
1333 	loadsegment(gs, svm->host.gs);
1334 #endif
1335 #endif
1336 	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
1337 		wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
1338 }
1339 
svm_update_cpl(struct kvm_vcpu * vcpu)1340 static void svm_update_cpl(struct kvm_vcpu *vcpu)
1341 {
1342 	struct vcpu_svm *svm = to_svm(vcpu);
1343 	int cpl;
1344 
1345 	if (!is_protmode(vcpu))
1346 		cpl = 0;
1347 	else if (svm->vmcb->save.rflags & X86_EFLAGS_VM)
1348 		cpl = 3;
1349 	else
1350 		cpl = svm->vmcb->save.cs.selector & 0x3;
1351 
1352 	svm->vmcb->save.cpl = cpl;
1353 }
1354 
svm_get_rflags(struct kvm_vcpu * vcpu)1355 static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
1356 {
1357 	return to_svm(vcpu)->vmcb->save.rflags;
1358 }
1359 
svm_set_rflags(struct kvm_vcpu * vcpu,unsigned long rflags)1360 static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1361 {
1362 	unsigned long old_rflags = to_svm(vcpu)->vmcb->save.rflags;
1363 
1364 	to_svm(vcpu)->vmcb->save.rflags = rflags;
1365 	if ((old_rflags ^ rflags) & X86_EFLAGS_VM)
1366 		svm_update_cpl(vcpu);
1367 }
1368 
svm_cache_reg(struct kvm_vcpu * vcpu,enum kvm_reg reg)1369 static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
1370 {
1371 	switch (reg) {
1372 	case VCPU_EXREG_PDPTR:
1373 		BUG_ON(!npt_enabled);
1374 		load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
1375 		break;
1376 	default:
1377 		BUG();
1378 	}
1379 }
1380 
svm_set_vintr(struct vcpu_svm * svm)1381 static void svm_set_vintr(struct vcpu_svm *svm)
1382 {
1383 	set_intercept(svm, INTERCEPT_VINTR);
1384 }
1385 
svm_clear_vintr(struct vcpu_svm * svm)1386 static void svm_clear_vintr(struct vcpu_svm *svm)
1387 {
1388 	clr_intercept(svm, INTERCEPT_VINTR);
1389 }
1390 
svm_seg(struct kvm_vcpu * vcpu,int seg)1391 static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
1392 {
1393 	struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
1394 
1395 	switch (seg) {
1396 	case VCPU_SREG_CS: return &save->cs;
1397 	case VCPU_SREG_DS: return &save->ds;
1398 	case VCPU_SREG_ES: return &save->es;
1399 	case VCPU_SREG_FS: return &save->fs;
1400 	case VCPU_SREG_GS: return &save->gs;
1401 	case VCPU_SREG_SS: return &save->ss;
1402 	case VCPU_SREG_TR: return &save->tr;
1403 	case VCPU_SREG_LDTR: return &save->ldtr;
1404 	}
1405 	BUG();
1406 	return NULL;
1407 }
1408 
svm_get_segment_base(struct kvm_vcpu * vcpu,int seg)1409 static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
1410 {
1411 	struct vmcb_seg *s = svm_seg(vcpu, seg);
1412 
1413 	return s->base;
1414 }
1415 
svm_get_segment(struct kvm_vcpu * vcpu,struct kvm_segment * var,int seg)1416 static void svm_get_segment(struct kvm_vcpu *vcpu,
1417 			    struct kvm_segment *var, int seg)
1418 {
1419 	struct vmcb_seg *s = svm_seg(vcpu, seg);
1420 
1421 	var->base = s->base;
1422 	var->limit = s->limit;
1423 	var->selector = s->selector;
1424 	var->type = s->attrib & SVM_SELECTOR_TYPE_MASK;
1425 	var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1;
1426 	var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
1427 	var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1;
1428 	var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
1429 	var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
1430 	var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
1431 	var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
1432 
1433 	/*
1434 	 * AMD's VMCB does not have an explicit unusable field, so emulate it
1435 	 * for cross vendor migration purposes by "not present"
1436 	 */
1437 	var->unusable = !var->present || (var->type == 0);
1438 
1439 	switch (seg) {
1440 	case VCPU_SREG_CS:
1441 		/*
1442 		 * SVM always stores 0 for the 'G' bit in the CS selector in
1443 		 * the VMCB on a VMEXIT. This hurts cross-vendor migration:
1444 		 * Intel's VMENTRY has a check on the 'G' bit.
1445 		 */
1446 		var->g = s->limit > 0xfffff;
1447 		break;
1448 	case VCPU_SREG_TR:
1449 		/*
1450 		 * Work around a bug where the busy flag in the tr selector
1451 		 * isn't exposed
1452 		 */
1453 		var->type |= 0x2;
1454 		break;
1455 	case VCPU_SREG_DS:
1456 	case VCPU_SREG_ES:
1457 	case VCPU_SREG_FS:
1458 	case VCPU_SREG_GS:
1459 		/*
1460 		 * The accessed bit must always be set in the segment
1461 		 * descriptor cache, although it can be cleared in the
1462 		 * descriptor, the cached bit always remains at 1. Since
1463 		 * Intel has a check on this, set it here to support
1464 		 * cross-vendor migration.
1465 		 */
1466 		if (!var->unusable)
1467 			var->type |= 0x1;
1468 		break;
1469 	case VCPU_SREG_SS:
1470 		/*
1471 		 * On AMD CPUs sometimes the DB bit in the segment
1472 		 * descriptor is left as 1, although the whole segment has
1473 		 * been made unusable. Clear it here to pass an Intel VMX
1474 		 * entry check when cross vendor migrating.
1475 		 */
1476 		if (var->unusable)
1477 			var->db = 0;
1478 		break;
1479 	}
1480 }
1481 
svm_get_cpl(struct kvm_vcpu * vcpu)1482 static int svm_get_cpl(struct kvm_vcpu *vcpu)
1483 {
1484 	struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
1485 
1486 	return save->cpl;
1487 }
1488 
svm_get_idt(struct kvm_vcpu * vcpu,struct desc_ptr * dt)1489 static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1490 {
1491 	struct vcpu_svm *svm = to_svm(vcpu);
1492 
1493 	dt->size = svm->vmcb->save.idtr.limit;
1494 	dt->address = svm->vmcb->save.idtr.base;
1495 }
1496 
svm_set_idt(struct kvm_vcpu * vcpu,struct desc_ptr * dt)1497 static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1498 {
1499 	struct vcpu_svm *svm = to_svm(vcpu);
1500 
1501 	svm->vmcb->save.idtr.limit = dt->size;
1502 	svm->vmcb->save.idtr.base = dt->address ;
1503 	mark_dirty(svm->vmcb, VMCB_DT);
1504 }
1505 
svm_get_gdt(struct kvm_vcpu * vcpu,struct desc_ptr * dt)1506 static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1507 {
1508 	struct vcpu_svm *svm = to_svm(vcpu);
1509 
1510 	dt->size = svm->vmcb->save.gdtr.limit;
1511 	dt->address = svm->vmcb->save.gdtr.base;
1512 }
1513 
svm_set_gdt(struct kvm_vcpu * vcpu,struct desc_ptr * dt)1514 static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1515 {
1516 	struct vcpu_svm *svm = to_svm(vcpu);
1517 
1518 	svm->vmcb->save.gdtr.limit = dt->size;
1519 	svm->vmcb->save.gdtr.base = dt->address ;
1520 	mark_dirty(svm->vmcb, VMCB_DT);
1521 }
1522 
svm_decache_cr0_guest_bits(struct kvm_vcpu * vcpu)1523 static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
1524 {
1525 }
1526 
svm_decache_cr3(struct kvm_vcpu * vcpu)1527 static void svm_decache_cr3(struct kvm_vcpu *vcpu)
1528 {
1529 }
1530 
svm_decache_cr4_guest_bits(struct kvm_vcpu * vcpu)1531 static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
1532 {
1533 }
1534 
update_cr0_intercept(struct vcpu_svm * svm)1535 static void update_cr0_intercept(struct vcpu_svm *svm)
1536 {
1537 	ulong gcr0 = svm->vcpu.arch.cr0;
1538 	u64 *hcr0 = &svm->vmcb->save.cr0;
1539 
1540 	if (!svm->vcpu.fpu_active)
1541 		*hcr0 |= SVM_CR0_SELECTIVE_MASK;
1542 	else
1543 		*hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
1544 			| (gcr0 & SVM_CR0_SELECTIVE_MASK);
1545 
1546 	mark_dirty(svm->vmcb, VMCB_CR);
1547 
1548 	if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
1549 		clr_cr_intercept(svm, INTERCEPT_CR0_READ);
1550 		clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1551 	} else {
1552 		set_cr_intercept(svm, INTERCEPT_CR0_READ);
1553 		set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1554 	}
1555 }
1556 
svm_set_cr0(struct kvm_vcpu * vcpu,unsigned long cr0)1557 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1558 {
1559 	struct vcpu_svm *svm = to_svm(vcpu);
1560 
1561 #ifdef CONFIG_X86_64
1562 	if (vcpu->arch.efer & EFER_LME) {
1563 		if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
1564 			vcpu->arch.efer |= EFER_LMA;
1565 			svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
1566 		}
1567 
1568 		if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
1569 			vcpu->arch.efer &= ~EFER_LMA;
1570 			svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
1571 		}
1572 	}
1573 #endif
1574 	vcpu->arch.cr0 = cr0;
1575 
1576 	if (!npt_enabled)
1577 		cr0 |= X86_CR0_PG | X86_CR0_WP;
1578 
1579 	if (!vcpu->fpu_active)
1580 		cr0 |= X86_CR0_TS;
1581 	/*
1582 	 * re-enable caching here because the QEMU bios
1583 	 * does not do it - this results in some delay at
1584 	 * reboot
1585 	 */
1586 	cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
1587 	svm->vmcb->save.cr0 = cr0;
1588 	mark_dirty(svm->vmcb, VMCB_CR);
1589 	update_cr0_intercept(svm);
1590 }
1591 
svm_set_cr4(struct kvm_vcpu * vcpu,unsigned long cr4)1592 static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1593 {
1594 	unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE;
1595 	unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
1596 
1597 	if (cr4 & X86_CR4_VMXE)
1598 		return 1;
1599 
1600 	if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
1601 		svm_flush_tlb(vcpu);
1602 
1603 	vcpu->arch.cr4 = cr4;
1604 	if (!npt_enabled)
1605 		cr4 |= X86_CR4_PAE;
1606 	cr4 |= host_cr4_mce;
1607 	to_svm(vcpu)->vmcb->save.cr4 = cr4;
1608 	mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
1609 	return 0;
1610 }
1611 
svm_set_segment(struct kvm_vcpu * vcpu,struct kvm_segment * var,int seg)1612 static void svm_set_segment(struct kvm_vcpu *vcpu,
1613 			    struct kvm_segment *var, int seg)
1614 {
1615 	struct vcpu_svm *svm = to_svm(vcpu);
1616 	struct vmcb_seg *s = svm_seg(vcpu, seg);
1617 
1618 	s->base = var->base;
1619 	s->limit = var->limit;
1620 	s->selector = var->selector;
1621 	if (var->unusable)
1622 		s->attrib = 0;
1623 	else {
1624 		s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
1625 		s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
1626 		s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
1627 		s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT;
1628 		s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
1629 		s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
1630 		s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
1631 		s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
1632 	}
1633 	if (seg == VCPU_SREG_CS)
1634 		svm_update_cpl(vcpu);
1635 
1636 	mark_dirty(svm->vmcb, VMCB_SEG);
1637 }
1638 
update_db_intercept(struct kvm_vcpu * vcpu)1639 static void update_db_intercept(struct kvm_vcpu *vcpu)
1640 {
1641 	struct vcpu_svm *svm = to_svm(vcpu);
1642 
1643 	clr_exception_intercept(svm, DB_VECTOR);
1644 	clr_exception_intercept(svm, BP_VECTOR);
1645 
1646 	if (svm->nmi_singlestep)
1647 		set_exception_intercept(svm, DB_VECTOR);
1648 
1649 	if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
1650 		if (vcpu->guest_debug &
1651 		    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
1652 			set_exception_intercept(svm, DB_VECTOR);
1653 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
1654 			set_exception_intercept(svm, BP_VECTOR);
1655 	} else
1656 		vcpu->guest_debug = 0;
1657 }
1658 
svm_guest_debug(struct kvm_vcpu * vcpu,struct kvm_guest_debug * dbg)1659 static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
1660 {
1661 	struct vcpu_svm *svm = to_svm(vcpu);
1662 
1663 	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1664 		svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
1665 	else
1666 		svm->vmcb->save.dr7 = vcpu->arch.dr7;
1667 
1668 	mark_dirty(svm->vmcb, VMCB_DR);
1669 
1670 	update_db_intercept(vcpu);
1671 }
1672 
new_asid(struct vcpu_svm * svm,struct svm_cpu_data * sd)1673 static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
1674 {
1675 	if (sd->next_asid > sd->max_asid) {
1676 		++sd->asid_generation;
1677 		sd->next_asid = 1;
1678 		svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
1679 	}
1680 
1681 	svm->asid_generation = sd->asid_generation;
1682 	svm->vmcb->control.asid = sd->next_asid++;
1683 
1684 	mark_dirty(svm->vmcb, VMCB_ASID);
1685 }
1686 
svm_set_dr7(struct kvm_vcpu * vcpu,unsigned long value)1687 static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
1688 {
1689 	struct vcpu_svm *svm = to_svm(vcpu);
1690 
1691 	svm->vmcb->save.dr7 = value;
1692 	mark_dirty(svm->vmcb, VMCB_DR);
1693 }
1694 
pf_interception(struct vcpu_svm * svm)1695 static int pf_interception(struct vcpu_svm *svm)
1696 {
1697 	u64 fault_address = svm->vmcb->control.exit_info_2;
1698 	u32 error_code;
1699 	int r = 1;
1700 
1701 	switch (svm->apf_reason) {
1702 	default:
1703 		error_code = svm->vmcb->control.exit_info_1;
1704 
1705 		trace_kvm_page_fault(fault_address, error_code);
1706 		if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
1707 			kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
1708 		r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
1709 			svm->vmcb->control.insn_bytes,
1710 			svm->vmcb->control.insn_len);
1711 		break;
1712 	case KVM_PV_REASON_PAGE_NOT_PRESENT:
1713 		svm->apf_reason = 0;
1714 		local_irq_disable();
1715 		kvm_async_pf_task_wait(fault_address);
1716 		local_irq_enable();
1717 		break;
1718 	case KVM_PV_REASON_PAGE_READY:
1719 		svm->apf_reason = 0;
1720 		local_irq_disable();
1721 		kvm_async_pf_task_wake(fault_address);
1722 		local_irq_enable();
1723 		break;
1724 	}
1725 	return r;
1726 }
1727 
db_interception(struct vcpu_svm * svm)1728 static int db_interception(struct vcpu_svm *svm)
1729 {
1730 	struct kvm_run *kvm_run = svm->vcpu.run;
1731 
1732 	if (!(svm->vcpu.guest_debug &
1733 	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
1734 		!svm->nmi_singlestep) {
1735 		kvm_queue_exception(&svm->vcpu, DB_VECTOR);
1736 		return 1;
1737 	}
1738 
1739 	if (svm->nmi_singlestep) {
1740 		svm->nmi_singlestep = false;
1741 		if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
1742 			svm->vmcb->save.rflags &=
1743 				~(X86_EFLAGS_TF | X86_EFLAGS_RF);
1744 		update_db_intercept(&svm->vcpu);
1745 	}
1746 
1747 	if (svm->vcpu.guest_debug &
1748 	    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
1749 		kvm_run->exit_reason = KVM_EXIT_DEBUG;
1750 		kvm_run->debug.arch.pc =
1751 			svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1752 		kvm_run->debug.arch.exception = DB_VECTOR;
1753 		return 0;
1754 	}
1755 
1756 	return 1;
1757 }
1758 
bp_interception(struct vcpu_svm * svm)1759 static int bp_interception(struct vcpu_svm *svm)
1760 {
1761 	struct kvm_run *kvm_run = svm->vcpu.run;
1762 
1763 	kvm_run->exit_reason = KVM_EXIT_DEBUG;
1764 	kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1765 	kvm_run->debug.arch.exception = BP_VECTOR;
1766 	return 0;
1767 }
1768 
ud_interception(struct vcpu_svm * svm)1769 static int ud_interception(struct vcpu_svm *svm)
1770 {
1771 	int er;
1772 
1773 	er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
1774 	if (er != EMULATE_DONE)
1775 		kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1776 	return 1;
1777 }
1778 
svm_fpu_activate(struct kvm_vcpu * vcpu)1779 static void svm_fpu_activate(struct kvm_vcpu *vcpu)
1780 {
1781 	struct vcpu_svm *svm = to_svm(vcpu);
1782 
1783 	clr_exception_intercept(svm, NM_VECTOR);
1784 
1785 	svm->vcpu.fpu_active = 1;
1786 	update_cr0_intercept(svm);
1787 }
1788 
nm_interception(struct vcpu_svm * svm)1789 static int nm_interception(struct vcpu_svm *svm)
1790 {
1791 	svm_fpu_activate(&svm->vcpu);
1792 	return 1;
1793 }
1794 
is_erratum_383(void)1795 static bool is_erratum_383(void)
1796 {
1797 	int err, i;
1798 	u64 value;
1799 
1800 	if (!erratum_383_found)
1801 		return false;
1802 
1803 	value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err);
1804 	if (err)
1805 		return false;
1806 
1807 	/* Bit 62 may or may not be set for this mce */
1808 	value &= ~(1ULL << 62);
1809 
1810 	if (value != 0xb600000000010015ULL)
1811 		return false;
1812 
1813 	/* Clear MCi_STATUS registers */
1814 	for (i = 0; i < 6; ++i)
1815 		native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
1816 
1817 	value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
1818 	if (!err) {
1819 		u32 low, high;
1820 
1821 		value &= ~(1ULL << 2);
1822 		low    = lower_32_bits(value);
1823 		high   = upper_32_bits(value);
1824 
1825 		native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
1826 	}
1827 
1828 	/* Flush tlb to evict multi-match entries */
1829 	__flush_tlb_all();
1830 
1831 	return true;
1832 }
1833 
svm_handle_mce(struct vcpu_svm * svm)1834 static void svm_handle_mce(struct vcpu_svm *svm)
1835 {
1836 	if (is_erratum_383()) {
1837 		/*
1838 		 * Erratum 383 triggered. Guest state is corrupt so kill the
1839 		 * guest.
1840 		 */
1841 		pr_err("KVM: Guest triggered AMD Erratum 383\n");
1842 
1843 		kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu);
1844 
1845 		return;
1846 	}
1847 
1848 	/*
1849 	 * On an #MC intercept the MCE handler is not called automatically in
1850 	 * the host. So do it by hand here.
1851 	 */
1852 	asm volatile (
1853 		"int $0x12\n");
1854 	/* not sure if we ever come back to this point */
1855 
1856 	return;
1857 }
1858 
mc_interception(struct vcpu_svm * svm)1859 static int mc_interception(struct vcpu_svm *svm)
1860 {
1861 	return 1;
1862 }
1863 
shutdown_interception(struct vcpu_svm * svm)1864 static int shutdown_interception(struct vcpu_svm *svm)
1865 {
1866 	struct kvm_run *kvm_run = svm->vcpu.run;
1867 
1868 	/*
1869 	 * VMCB is undefined after a SHUTDOWN intercept
1870 	 * so reinitialize it.
1871 	 */
1872 	clear_page(svm->vmcb);
1873 	init_vmcb(svm);
1874 
1875 	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
1876 	return 0;
1877 }
1878 
io_interception(struct vcpu_svm * svm)1879 static int io_interception(struct vcpu_svm *svm)
1880 {
1881 	struct kvm_vcpu *vcpu = &svm->vcpu;
1882 	u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
1883 	int size, in, string;
1884 	unsigned port;
1885 
1886 	++svm->vcpu.stat.io_exits;
1887 	string = (io_info & SVM_IOIO_STR_MASK) != 0;
1888 	in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
1889 	if (string || in)
1890 		return emulate_instruction(vcpu, 0) == EMULATE_DONE;
1891 
1892 	port = io_info >> 16;
1893 	size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
1894 	svm->next_rip = svm->vmcb->control.exit_info_2;
1895 	skip_emulated_instruction(&svm->vcpu);
1896 
1897 	return kvm_fast_pio_out(vcpu, size, port);
1898 }
1899 
nmi_interception(struct vcpu_svm * svm)1900 static int nmi_interception(struct vcpu_svm *svm)
1901 {
1902 	return 1;
1903 }
1904 
intr_interception(struct vcpu_svm * svm)1905 static int intr_interception(struct vcpu_svm *svm)
1906 {
1907 	++svm->vcpu.stat.irq_exits;
1908 	return 1;
1909 }
1910 
nop_on_interception(struct vcpu_svm * svm)1911 static int nop_on_interception(struct vcpu_svm *svm)
1912 {
1913 	return 1;
1914 }
1915 
halt_interception(struct vcpu_svm * svm)1916 static int halt_interception(struct vcpu_svm *svm)
1917 {
1918 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
1919 	skip_emulated_instruction(&svm->vcpu);
1920 	return kvm_emulate_halt(&svm->vcpu);
1921 }
1922 
vmmcall_interception(struct vcpu_svm * svm)1923 static int vmmcall_interception(struct vcpu_svm *svm)
1924 {
1925 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1926 	skip_emulated_instruction(&svm->vcpu);
1927 	kvm_emulate_hypercall(&svm->vcpu);
1928 	return 1;
1929 }
1930 
nested_svm_get_tdp_cr3(struct kvm_vcpu * vcpu)1931 static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
1932 {
1933 	struct vcpu_svm *svm = to_svm(vcpu);
1934 
1935 	return svm->nested.nested_cr3;
1936 }
1937 
nested_svm_get_tdp_pdptr(struct kvm_vcpu * vcpu,int index)1938 static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
1939 {
1940 	struct vcpu_svm *svm = to_svm(vcpu);
1941 	u64 cr3 = svm->nested.nested_cr3;
1942 	u64 pdpte;
1943 	int ret;
1944 
1945 	ret = kvm_read_guest_page(vcpu->kvm, gpa_to_gfn(cr3), &pdpte,
1946 				  offset_in_page(cr3) + index * 8, 8);
1947 	if (ret)
1948 		return 0;
1949 	return pdpte;
1950 }
1951 
nested_svm_set_tdp_cr3(struct kvm_vcpu * vcpu,unsigned long root)1952 static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
1953 				   unsigned long root)
1954 {
1955 	struct vcpu_svm *svm = to_svm(vcpu);
1956 
1957 	svm->vmcb->control.nested_cr3 = root;
1958 	mark_dirty(svm->vmcb, VMCB_NPT);
1959 	svm_flush_tlb(vcpu);
1960 }
1961 
nested_svm_inject_npf_exit(struct kvm_vcpu * vcpu,struct x86_exception * fault)1962 static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
1963 				       struct x86_exception *fault)
1964 {
1965 	struct vcpu_svm *svm = to_svm(vcpu);
1966 
1967 	svm->vmcb->control.exit_code = SVM_EXIT_NPF;
1968 	svm->vmcb->control.exit_code_hi = 0;
1969 	svm->vmcb->control.exit_info_1 = fault->error_code;
1970 	svm->vmcb->control.exit_info_2 = fault->address;
1971 
1972 	nested_svm_vmexit(svm);
1973 }
1974 
nested_svm_init_mmu_context(struct kvm_vcpu * vcpu)1975 static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
1976 {
1977 	int r;
1978 
1979 	r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
1980 
1981 	vcpu->arch.mmu.set_cr3           = nested_svm_set_tdp_cr3;
1982 	vcpu->arch.mmu.get_cr3           = nested_svm_get_tdp_cr3;
1983 	vcpu->arch.mmu.get_pdptr         = nested_svm_get_tdp_pdptr;
1984 	vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
1985 	vcpu->arch.mmu.shadow_root_level = get_npt_level();
1986 	vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
1987 
1988 	return r;
1989 }
1990 
nested_svm_uninit_mmu_context(struct kvm_vcpu * vcpu)1991 static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
1992 {
1993 	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
1994 }
1995 
nested_svm_check_permissions(struct vcpu_svm * svm)1996 static int nested_svm_check_permissions(struct vcpu_svm *svm)
1997 {
1998 	if (!(svm->vcpu.arch.efer & EFER_SVME)
1999 	    || !is_paging(&svm->vcpu)) {
2000 		kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2001 		return 1;
2002 	}
2003 
2004 	if (svm->vmcb->save.cpl) {
2005 		kvm_inject_gp(&svm->vcpu, 0);
2006 		return 1;
2007 	}
2008 
2009        return 0;
2010 }
2011 
nested_svm_check_exception(struct vcpu_svm * svm,unsigned nr,bool has_error_code,u32 error_code)2012 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
2013 				      bool has_error_code, u32 error_code)
2014 {
2015 	int vmexit;
2016 
2017 	if (!is_guest_mode(&svm->vcpu))
2018 		return 0;
2019 
2020 	svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
2021 	svm->vmcb->control.exit_code_hi = 0;
2022 	svm->vmcb->control.exit_info_1 = error_code;
2023 	svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
2024 
2025 	vmexit = nested_svm_intercept(svm);
2026 	if (vmexit == NESTED_EXIT_DONE)
2027 		svm->nested.exit_required = true;
2028 
2029 	return vmexit;
2030 }
2031 
2032 /* This function returns true if it is save to enable the irq window */
nested_svm_intr(struct vcpu_svm * svm)2033 static inline bool nested_svm_intr(struct vcpu_svm *svm)
2034 {
2035 	if (!is_guest_mode(&svm->vcpu))
2036 		return true;
2037 
2038 	if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
2039 		return true;
2040 
2041 	if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
2042 		return false;
2043 
2044 	/*
2045 	 * if vmexit was already requested (by intercepted exception
2046 	 * for instance) do not overwrite it with "external interrupt"
2047 	 * vmexit.
2048 	 */
2049 	if (svm->nested.exit_required)
2050 		return false;
2051 
2052 	svm->vmcb->control.exit_code   = SVM_EXIT_INTR;
2053 	svm->vmcb->control.exit_info_1 = 0;
2054 	svm->vmcb->control.exit_info_2 = 0;
2055 
2056 	if (svm->nested.intercept & 1ULL) {
2057 		/*
2058 		 * The #vmexit can't be emulated here directly because this
2059 		 * code path runs with irqs and preemtion disabled. A
2060 		 * #vmexit emulation might sleep. Only signal request for
2061 		 * the #vmexit here.
2062 		 */
2063 		svm->nested.exit_required = true;
2064 		trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
2065 		return false;
2066 	}
2067 
2068 	return true;
2069 }
2070 
2071 /* This function returns true if it is save to enable the nmi window */
nested_svm_nmi(struct vcpu_svm * svm)2072 static inline bool nested_svm_nmi(struct vcpu_svm *svm)
2073 {
2074 	if (!is_guest_mode(&svm->vcpu))
2075 		return true;
2076 
2077 	if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
2078 		return true;
2079 
2080 	svm->vmcb->control.exit_code = SVM_EXIT_NMI;
2081 	svm->nested.exit_required = true;
2082 
2083 	return false;
2084 }
2085 
nested_svm_map(struct vcpu_svm * svm,u64 gpa,struct page ** _page)2086 static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
2087 {
2088 	struct page *page;
2089 
2090 	might_sleep();
2091 
2092 	page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
2093 	if (is_error_page(page))
2094 		goto error;
2095 
2096 	*_page = page;
2097 
2098 	return kmap(page);
2099 
2100 error:
2101 	kvm_release_page_clean(page);
2102 	kvm_inject_gp(&svm->vcpu, 0);
2103 
2104 	return NULL;
2105 }
2106 
nested_svm_unmap(struct page * page)2107 static void nested_svm_unmap(struct page *page)
2108 {
2109 	kunmap(page);
2110 	kvm_release_page_dirty(page);
2111 }
2112 
nested_svm_intercept_ioio(struct vcpu_svm * svm)2113 static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
2114 {
2115 	unsigned port;
2116 	u8 val, bit;
2117 	u64 gpa;
2118 
2119 	if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
2120 		return NESTED_EXIT_HOST;
2121 
2122 	port = svm->vmcb->control.exit_info_1 >> 16;
2123 	gpa  = svm->nested.vmcb_iopm + (port / 8);
2124 	bit  = port % 8;
2125 	val  = 0;
2126 
2127 	if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1))
2128 		val &= (1 << bit);
2129 
2130 	return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
2131 }
2132 
nested_svm_exit_handled_msr(struct vcpu_svm * svm)2133 static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
2134 {
2135 	u32 offset, msr, value;
2136 	int write, mask;
2137 
2138 	if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
2139 		return NESTED_EXIT_HOST;
2140 
2141 	msr    = svm->vcpu.arch.regs[VCPU_REGS_RCX];
2142 	offset = svm_msrpm_offset(msr);
2143 	write  = svm->vmcb->control.exit_info_1 & 1;
2144 	mask   = 1 << ((2 * (msr & 0xf)) + write);
2145 
2146 	if (offset == MSR_INVALID)
2147 		return NESTED_EXIT_DONE;
2148 
2149 	/* Offset is in 32 bit units but need in 8 bit units */
2150 	offset *= 4;
2151 
2152 	if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4))
2153 		return NESTED_EXIT_DONE;
2154 
2155 	return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
2156 }
2157 
nested_svm_exit_special(struct vcpu_svm * svm)2158 static int nested_svm_exit_special(struct vcpu_svm *svm)
2159 {
2160 	u32 exit_code = svm->vmcb->control.exit_code;
2161 
2162 	switch (exit_code) {
2163 	case SVM_EXIT_INTR:
2164 	case SVM_EXIT_NMI:
2165 	case SVM_EXIT_EXCP_BASE + MC_VECTOR:
2166 		return NESTED_EXIT_HOST;
2167 	case SVM_EXIT_NPF:
2168 		/* For now we are always handling NPFs when using them */
2169 		if (npt_enabled)
2170 			return NESTED_EXIT_HOST;
2171 		break;
2172 	case SVM_EXIT_EXCP_BASE + PF_VECTOR:
2173 		/* When we're shadowing, trap PFs, but not async PF */
2174 		if (!npt_enabled && svm->apf_reason == 0)
2175 			return NESTED_EXIT_HOST;
2176 		break;
2177 	case SVM_EXIT_EXCP_BASE + NM_VECTOR:
2178 		nm_interception(svm);
2179 		break;
2180 	default:
2181 		break;
2182 	}
2183 
2184 	return NESTED_EXIT_CONTINUE;
2185 }
2186 
2187 /*
2188  * If this function returns true, this #vmexit was already handled
2189  */
nested_svm_intercept(struct vcpu_svm * svm)2190 static int nested_svm_intercept(struct vcpu_svm *svm)
2191 {
2192 	u32 exit_code = svm->vmcb->control.exit_code;
2193 	int vmexit = NESTED_EXIT_HOST;
2194 
2195 	switch (exit_code) {
2196 	case SVM_EXIT_MSR:
2197 		vmexit = nested_svm_exit_handled_msr(svm);
2198 		break;
2199 	case SVM_EXIT_IOIO:
2200 		vmexit = nested_svm_intercept_ioio(svm);
2201 		break;
2202 	case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
2203 		u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0);
2204 		if (svm->nested.intercept_cr & bit)
2205 			vmexit = NESTED_EXIT_DONE;
2206 		break;
2207 	}
2208 	case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
2209 		u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0);
2210 		if (svm->nested.intercept_dr & bit)
2211 			vmexit = NESTED_EXIT_DONE;
2212 		break;
2213 	}
2214 	case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
2215 		u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
2216 		if (svm->nested.intercept_exceptions & excp_bits)
2217 			vmexit = NESTED_EXIT_DONE;
2218 		/* async page fault always cause vmexit */
2219 		else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
2220 			 svm->apf_reason != 0)
2221 			vmexit = NESTED_EXIT_DONE;
2222 		break;
2223 	}
2224 	case SVM_EXIT_ERR: {
2225 		vmexit = NESTED_EXIT_DONE;
2226 		break;
2227 	}
2228 	default: {
2229 		u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
2230 		if (svm->nested.intercept & exit_bits)
2231 			vmexit = NESTED_EXIT_DONE;
2232 	}
2233 	}
2234 
2235 	return vmexit;
2236 }
2237 
nested_svm_exit_handled(struct vcpu_svm * svm)2238 static int nested_svm_exit_handled(struct vcpu_svm *svm)
2239 {
2240 	int vmexit;
2241 
2242 	vmexit = nested_svm_intercept(svm);
2243 
2244 	if (vmexit == NESTED_EXIT_DONE)
2245 		nested_svm_vmexit(svm);
2246 
2247 	return vmexit;
2248 }
2249 
copy_vmcb_control_area(struct vmcb * dst_vmcb,struct vmcb * from_vmcb)2250 static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
2251 {
2252 	struct vmcb_control_area *dst  = &dst_vmcb->control;
2253 	struct vmcb_control_area *from = &from_vmcb->control;
2254 
2255 	dst->intercept_cr         = from->intercept_cr;
2256 	dst->intercept_dr         = from->intercept_dr;
2257 	dst->intercept_exceptions = from->intercept_exceptions;
2258 	dst->intercept            = from->intercept;
2259 	dst->iopm_base_pa         = from->iopm_base_pa;
2260 	dst->msrpm_base_pa        = from->msrpm_base_pa;
2261 	dst->tsc_offset           = from->tsc_offset;
2262 	dst->asid                 = from->asid;
2263 	dst->tlb_ctl              = from->tlb_ctl;
2264 	dst->int_ctl              = from->int_ctl;
2265 	dst->int_vector           = from->int_vector;
2266 	dst->int_state            = from->int_state;
2267 	dst->exit_code            = from->exit_code;
2268 	dst->exit_code_hi         = from->exit_code_hi;
2269 	dst->exit_info_1          = from->exit_info_1;
2270 	dst->exit_info_2          = from->exit_info_2;
2271 	dst->exit_int_info        = from->exit_int_info;
2272 	dst->exit_int_info_err    = from->exit_int_info_err;
2273 	dst->nested_ctl           = from->nested_ctl;
2274 	dst->event_inj            = from->event_inj;
2275 	dst->event_inj_err        = from->event_inj_err;
2276 	dst->nested_cr3           = from->nested_cr3;
2277 	dst->lbr_ctl              = from->lbr_ctl;
2278 }
2279 
nested_svm_vmexit(struct vcpu_svm * svm)2280 static int nested_svm_vmexit(struct vcpu_svm *svm)
2281 {
2282 	struct vmcb *nested_vmcb;
2283 	struct vmcb *hsave = svm->nested.hsave;
2284 	struct vmcb *vmcb = svm->vmcb;
2285 	struct page *page;
2286 
2287 	trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
2288 				       vmcb->control.exit_info_1,
2289 				       vmcb->control.exit_info_2,
2290 				       vmcb->control.exit_int_info,
2291 				       vmcb->control.exit_int_info_err,
2292 				       KVM_ISA_SVM);
2293 
2294 	nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
2295 	if (!nested_vmcb)
2296 		return 1;
2297 
2298 	/* Exit Guest-Mode */
2299 	leave_guest_mode(&svm->vcpu);
2300 	svm->nested.vmcb = 0;
2301 
2302 	/* Give the current vmcb to the guest */
2303 	disable_gif(svm);
2304 
2305 	nested_vmcb->save.es     = vmcb->save.es;
2306 	nested_vmcb->save.cs     = vmcb->save.cs;
2307 	nested_vmcb->save.ss     = vmcb->save.ss;
2308 	nested_vmcb->save.ds     = vmcb->save.ds;
2309 	nested_vmcb->save.gdtr   = vmcb->save.gdtr;
2310 	nested_vmcb->save.idtr   = vmcb->save.idtr;
2311 	nested_vmcb->save.efer   = svm->vcpu.arch.efer;
2312 	nested_vmcb->save.cr0    = kvm_read_cr0(&svm->vcpu);
2313 	nested_vmcb->save.cr3    = kvm_read_cr3(&svm->vcpu);
2314 	nested_vmcb->save.cr2    = vmcb->save.cr2;
2315 	nested_vmcb->save.cr4    = svm->vcpu.arch.cr4;
2316 	nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
2317 	nested_vmcb->save.rip    = vmcb->save.rip;
2318 	nested_vmcb->save.rsp    = vmcb->save.rsp;
2319 	nested_vmcb->save.rax    = vmcb->save.rax;
2320 	nested_vmcb->save.dr7    = vmcb->save.dr7;
2321 	nested_vmcb->save.dr6    = vmcb->save.dr6;
2322 	nested_vmcb->save.cpl    = vmcb->save.cpl;
2323 
2324 	nested_vmcb->control.int_ctl           = vmcb->control.int_ctl;
2325 	nested_vmcb->control.int_vector        = vmcb->control.int_vector;
2326 	nested_vmcb->control.int_state         = vmcb->control.int_state;
2327 	nested_vmcb->control.exit_code         = vmcb->control.exit_code;
2328 	nested_vmcb->control.exit_code_hi      = vmcb->control.exit_code_hi;
2329 	nested_vmcb->control.exit_info_1       = vmcb->control.exit_info_1;
2330 	nested_vmcb->control.exit_info_2       = vmcb->control.exit_info_2;
2331 	nested_vmcb->control.exit_int_info     = vmcb->control.exit_int_info;
2332 	nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
2333 	nested_vmcb->control.next_rip          = vmcb->control.next_rip;
2334 
2335 	/*
2336 	 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
2337 	 * to make sure that we do not lose injected events. So check event_inj
2338 	 * here and copy it to exit_int_info if it is valid.
2339 	 * Exit_int_info and event_inj can't be both valid because the case
2340 	 * below only happens on a VMRUN instruction intercept which has
2341 	 * no valid exit_int_info set.
2342 	 */
2343 	if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
2344 		struct vmcb_control_area *nc = &nested_vmcb->control;
2345 
2346 		nc->exit_int_info     = vmcb->control.event_inj;
2347 		nc->exit_int_info_err = vmcb->control.event_inj_err;
2348 	}
2349 
2350 	nested_vmcb->control.tlb_ctl           = 0;
2351 	nested_vmcb->control.event_inj         = 0;
2352 	nested_vmcb->control.event_inj_err     = 0;
2353 
2354 	/* We always set V_INTR_MASKING and remember the old value in hflags */
2355 	if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
2356 		nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
2357 
2358 	/* Restore the original control entries */
2359 	copy_vmcb_control_area(vmcb, hsave);
2360 
2361 	kvm_clear_exception_queue(&svm->vcpu);
2362 	kvm_clear_interrupt_queue(&svm->vcpu);
2363 
2364 	svm->nested.nested_cr3 = 0;
2365 
2366 	/* Restore selected save entries */
2367 	svm->vmcb->save.es = hsave->save.es;
2368 	svm->vmcb->save.cs = hsave->save.cs;
2369 	svm->vmcb->save.ss = hsave->save.ss;
2370 	svm->vmcb->save.ds = hsave->save.ds;
2371 	svm->vmcb->save.gdtr = hsave->save.gdtr;
2372 	svm->vmcb->save.idtr = hsave->save.idtr;
2373 	kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
2374 	svm_set_efer(&svm->vcpu, hsave->save.efer);
2375 	svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
2376 	svm_set_cr4(&svm->vcpu, hsave->save.cr4);
2377 	if (npt_enabled) {
2378 		svm->vmcb->save.cr3 = hsave->save.cr3;
2379 		svm->vcpu.arch.cr3 = hsave->save.cr3;
2380 	} else {
2381 		(void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
2382 	}
2383 	kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
2384 	kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
2385 	kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
2386 	svm->vmcb->save.dr7 = 0;
2387 	svm->vmcb->save.cpl = 0;
2388 	svm->vmcb->control.exit_int_info = 0;
2389 
2390 	mark_all_dirty(svm->vmcb);
2391 
2392 	nested_svm_unmap(page);
2393 
2394 	nested_svm_uninit_mmu_context(&svm->vcpu);
2395 	kvm_mmu_reset_context(&svm->vcpu);
2396 	kvm_mmu_load(&svm->vcpu);
2397 
2398 	return 0;
2399 }
2400 
nested_svm_vmrun_msrpm(struct vcpu_svm * svm)2401 static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
2402 {
2403 	/*
2404 	 * This function merges the msr permission bitmaps of kvm and the
2405 	 * nested vmcb. It is omptimized in that it only merges the parts where
2406 	 * the kvm msr permission bitmap may contain zero bits
2407 	 */
2408 	int i;
2409 
2410 	if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
2411 		return true;
2412 
2413 	for (i = 0; i < MSRPM_OFFSETS; i++) {
2414 		u32 value, p;
2415 		u64 offset;
2416 
2417 		if (msrpm_offsets[i] == 0xffffffff)
2418 			break;
2419 
2420 		p      = msrpm_offsets[i];
2421 		offset = svm->nested.vmcb_msrpm + (p * 4);
2422 
2423 		if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4))
2424 			return false;
2425 
2426 		svm->nested.msrpm[p] = svm->msrpm[p] | value;
2427 	}
2428 
2429 	svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
2430 
2431 	return true;
2432 }
2433 
nested_vmcb_checks(struct vmcb * vmcb)2434 static bool nested_vmcb_checks(struct vmcb *vmcb)
2435 {
2436 	if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
2437 		return false;
2438 
2439 	if (vmcb->control.asid == 0)
2440 		return false;
2441 
2442 	if (vmcb->control.nested_ctl && !npt_enabled)
2443 		return false;
2444 
2445 	return true;
2446 }
2447 
nested_svm_vmrun(struct vcpu_svm * svm)2448 static bool nested_svm_vmrun(struct vcpu_svm *svm)
2449 {
2450 	struct vmcb *nested_vmcb;
2451 	struct vmcb *hsave = svm->nested.hsave;
2452 	struct vmcb *vmcb = svm->vmcb;
2453 	struct page *page;
2454 	u64 vmcb_gpa;
2455 
2456 	vmcb_gpa = svm->vmcb->save.rax;
2457 
2458 	nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2459 	if (!nested_vmcb)
2460 		return false;
2461 
2462 	if (!nested_vmcb_checks(nested_vmcb)) {
2463 		nested_vmcb->control.exit_code    = SVM_EXIT_ERR;
2464 		nested_vmcb->control.exit_code_hi = 0;
2465 		nested_vmcb->control.exit_info_1  = 0;
2466 		nested_vmcb->control.exit_info_2  = 0;
2467 
2468 		nested_svm_unmap(page);
2469 
2470 		return false;
2471 	}
2472 
2473 	trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
2474 			       nested_vmcb->save.rip,
2475 			       nested_vmcb->control.int_ctl,
2476 			       nested_vmcb->control.event_inj,
2477 			       nested_vmcb->control.nested_ctl);
2478 
2479 	trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff,
2480 				    nested_vmcb->control.intercept_cr >> 16,
2481 				    nested_vmcb->control.intercept_exceptions,
2482 				    nested_vmcb->control.intercept);
2483 
2484 	/* Clear internal status */
2485 	kvm_clear_exception_queue(&svm->vcpu);
2486 	kvm_clear_interrupt_queue(&svm->vcpu);
2487 
2488 	/*
2489 	 * Save the old vmcb, so we don't need to pick what we save, but can
2490 	 * restore everything when a VMEXIT occurs
2491 	 */
2492 	hsave->save.es     = vmcb->save.es;
2493 	hsave->save.cs     = vmcb->save.cs;
2494 	hsave->save.ss     = vmcb->save.ss;
2495 	hsave->save.ds     = vmcb->save.ds;
2496 	hsave->save.gdtr   = vmcb->save.gdtr;
2497 	hsave->save.idtr   = vmcb->save.idtr;
2498 	hsave->save.efer   = svm->vcpu.arch.efer;
2499 	hsave->save.cr0    = kvm_read_cr0(&svm->vcpu);
2500 	hsave->save.cr4    = svm->vcpu.arch.cr4;
2501 	hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
2502 	hsave->save.rip    = kvm_rip_read(&svm->vcpu);
2503 	hsave->save.rsp    = vmcb->save.rsp;
2504 	hsave->save.rax    = vmcb->save.rax;
2505 	if (npt_enabled)
2506 		hsave->save.cr3    = vmcb->save.cr3;
2507 	else
2508 		hsave->save.cr3    = kvm_read_cr3(&svm->vcpu);
2509 
2510 	copy_vmcb_control_area(hsave, vmcb);
2511 
2512 	if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
2513 		svm->vcpu.arch.hflags |= HF_HIF_MASK;
2514 	else
2515 		svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
2516 
2517 	if (nested_vmcb->control.nested_ctl) {
2518 		kvm_mmu_unload(&svm->vcpu);
2519 		svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
2520 		nested_svm_init_mmu_context(&svm->vcpu);
2521 	}
2522 
2523 	/* Load the nested guest state */
2524 	svm->vmcb->save.es = nested_vmcb->save.es;
2525 	svm->vmcb->save.cs = nested_vmcb->save.cs;
2526 	svm->vmcb->save.ss = nested_vmcb->save.ss;
2527 	svm->vmcb->save.ds = nested_vmcb->save.ds;
2528 	svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
2529 	svm->vmcb->save.idtr = nested_vmcb->save.idtr;
2530 	kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags);
2531 	svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
2532 	svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
2533 	svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
2534 	if (npt_enabled) {
2535 		svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
2536 		svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
2537 	} else
2538 		(void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
2539 
2540 	/* Guest paging mode is active - reset mmu */
2541 	kvm_mmu_reset_context(&svm->vcpu);
2542 
2543 	svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
2544 	kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
2545 	kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
2546 	kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
2547 
2548 	/* In case we don't even reach vcpu_run, the fields are not updated */
2549 	svm->vmcb->save.rax = nested_vmcb->save.rax;
2550 	svm->vmcb->save.rsp = nested_vmcb->save.rsp;
2551 	svm->vmcb->save.rip = nested_vmcb->save.rip;
2552 	svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
2553 	svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
2554 	svm->vmcb->save.cpl = nested_vmcb->save.cpl;
2555 
2556 	svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
2557 	svm->nested.vmcb_iopm  = nested_vmcb->control.iopm_base_pa  & ~0x0fffULL;
2558 
2559 	/* cache intercepts */
2560 	svm->nested.intercept_cr         = nested_vmcb->control.intercept_cr;
2561 	svm->nested.intercept_dr         = nested_vmcb->control.intercept_dr;
2562 	svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
2563 	svm->nested.intercept            = nested_vmcb->control.intercept;
2564 
2565 	svm_flush_tlb(&svm->vcpu);
2566 	svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
2567 	if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
2568 		svm->vcpu.arch.hflags |= HF_VINTR_MASK;
2569 	else
2570 		svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
2571 
2572 	if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
2573 		/* We only want the cr8 intercept bits of the guest */
2574 		clr_cr_intercept(svm, INTERCEPT_CR8_READ);
2575 		clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
2576 	}
2577 
2578 	/* We don't want to see VMMCALLs from a nested guest */
2579 	clr_intercept(svm, INTERCEPT_VMMCALL);
2580 
2581 	svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl;
2582 	svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
2583 	svm->vmcb->control.int_state = nested_vmcb->control.int_state;
2584 	svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
2585 	svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
2586 	svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
2587 
2588 	nested_svm_unmap(page);
2589 
2590 	/* Enter Guest-Mode */
2591 	enter_guest_mode(&svm->vcpu);
2592 
2593 	/*
2594 	 * Merge guest and host intercepts - must be called  with vcpu in
2595 	 * guest-mode to take affect here
2596 	 */
2597 	recalc_intercepts(svm);
2598 
2599 	svm->nested.vmcb = vmcb_gpa;
2600 
2601 	enable_gif(svm);
2602 
2603 	mark_all_dirty(svm->vmcb);
2604 
2605 	return true;
2606 }
2607 
nested_svm_vmloadsave(struct vmcb * from_vmcb,struct vmcb * to_vmcb)2608 static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
2609 {
2610 	to_vmcb->save.fs = from_vmcb->save.fs;
2611 	to_vmcb->save.gs = from_vmcb->save.gs;
2612 	to_vmcb->save.tr = from_vmcb->save.tr;
2613 	to_vmcb->save.ldtr = from_vmcb->save.ldtr;
2614 	to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
2615 	to_vmcb->save.star = from_vmcb->save.star;
2616 	to_vmcb->save.lstar = from_vmcb->save.lstar;
2617 	to_vmcb->save.cstar = from_vmcb->save.cstar;
2618 	to_vmcb->save.sfmask = from_vmcb->save.sfmask;
2619 	to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
2620 	to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
2621 	to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
2622 }
2623 
vmload_interception(struct vcpu_svm * svm)2624 static int vmload_interception(struct vcpu_svm *svm)
2625 {
2626 	struct vmcb *nested_vmcb;
2627 	struct page *page;
2628 
2629 	if (nested_svm_check_permissions(svm))
2630 		return 1;
2631 
2632 	nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2633 	if (!nested_vmcb)
2634 		return 1;
2635 
2636 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2637 	skip_emulated_instruction(&svm->vcpu);
2638 
2639 	nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
2640 	nested_svm_unmap(page);
2641 
2642 	return 1;
2643 }
2644 
vmsave_interception(struct vcpu_svm * svm)2645 static int vmsave_interception(struct vcpu_svm *svm)
2646 {
2647 	struct vmcb *nested_vmcb;
2648 	struct page *page;
2649 
2650 	if (nested_svm_check_permissions(svm))
2651 		return 1;
2652 
2653 	nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2654 	if (!nested_vmcb)
2655 		return 1;
2656 
2657 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2658 	skip_emulated_instruction(&svm->vcpu);
2659 
2660 	nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
2661 	nested_svm_unmap(page);
2662 
2663 	return 1;
2664 }
2665 
vmrun_interception(struct vcpu_svm * svm)2666 static int vmrun_interception(struct vcpu_svm *svm)
2667 {
2668 	if (nested_svm_check_permissions(svm))
2669 		return 1;
2670 
2671 	/* Save rip after vmrun instruction */
2672 	kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3);
2673 
2674 	if (!nested_svm_vmrun(svm))
2675 		return 1;
2676 
2677 	if (!nested_svm_vmrun_msrpm(svm))
2678 		goto failed;
2679 
2680 	return 1;
2681 
2682 failed:
2683 
2684 	svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
2685 	svm->vmcb->control.exit_code_hi = 0;
2686 	svm->vmcb->control.exit_info_1  = 0;
2687 	svm->vmcb->control.exit_info_2  = 0;
2688 
2689 	nested_svm_vmexit(svm);
2690 
2691 	return 1;
2692 }
2693 
stgi_interception(struct vcpu_svm * svm)2694 static int stgi_interception(struct vcpu_svm *svm)
2695 {
2696 	if (nested_svm_check_permissions(svm))
2697 		return 1;
2698 
2699 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2700 	skip_emulated_instruction(&svm->vcpu);
2701 	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
2702 
2703 	enable_gif(svm);
2704 
2705 	return 1;
2706 }
2707 
clgi_interception(struct vcpu_svm * svm)2708 static int clgi_interception(struct vcpu_svm *svm)
2709 {
2710 	if (nested_svm_check_permissions(svm))
2711 		return 1;
2712 
2713 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2714 	skip_emulated_instruction(&svm->vcpu);
2715 
2716 	disable_gif(svm);
2717 
2718 	/* After a CLGI no interrupts should come */
2719 	svm_clear_vintr(svm);
2720 	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
2721 
2722 	mark_dirty(svm->vmcb, VMCB_INTR);
2723 
2724 	return 1;
2725 }
2726 
invlpga_interception(struct vcpu_svm * svm)2727 static int invlpga_interception(struct vcpu_svm *svm)
2728 {
2729 	struct kvm_vcpu *vcpu = &svm->vcpu;
2730 
2731 	trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
2732 			  vcpu->arch.regs[VCPU_REGS_RAX]);
2733 
2734 	/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
2735 	kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
2736 
2737 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2738 	skip_emulated_instruction(&svm->vcpu);
2739 	return 1;
2740 }
2741 
skinit_interception(struct vcpu_svm * svm)2742 static int skinit_interception(struct vcpu_svm *svm)
2743 {
2744 	trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
2745 
2746 	kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2747 	return 1;
2748 }
2749 
xsetbv_interception(struct vcpu_svm * svm)2750 static int xsetbv_interception(struct vcpu_svm *svm)
2751 {
2752 	u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
2753 	u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
2754 
2755 	if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
2756 		svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2757 		skip_emulated_instruction(&svm->vcpu);
2758 	}
2759 
2760 	return 1;
2761 }
2762 
invalid_op_interception(struct vcpu_svm * svm)2763 static int invalid_op_interception(struct vcpu_svm *svm)
2764 {
2765 	kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2766 	return 1;
2767 }
2768 
task_switch_interception(struct vcpu_svm * svm)2769 static int task_switch_interception(struct vcpu_svm *svm)
2770 {
2771 	u16 tss_selector;
2772 	int reason;
2773 	int int_type = svm->vmcb->control.exit_int_info &
2774 		SVM_EXITINTINFO_TYPE_MASK;
2775 	int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
2776 	uint32_t type =
2777 		svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
2778 	uint32_t idt_v =
2779 		svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
2780 	bool has_error_code = false;
2781 	u32 error_code = 0;
2782 
2783 	tss_selector = (u16)svm->vmcb->control.exit_info_1;
2784 
2785 	if (svm->vmcb->control.exit_info_2 &
2786 	    (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
2787 		reason = TASK_SWITCH_IRET;
2788 	else if (svm->vmcb->control.exit_info_2 &
2789 		 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
2790 		reason = TASK_SWITCH_JMP;
2791 	else if (idt_v)
2792 		reason = TASK_SWITCH_GATE;
2793 	else
2794 		reason = TASK_SWITCH_CALL;
2795 
2796 	if (reason == TASK_SWITCH_GATE) {
2797 		switch (type) {
2798 		case SVM_EXITINTINFO_TYPE_NMI:
2799 			svm->vcpu.arch.nmi_injected = false;
2800 			break;
2801 		case SVM_EXITINTINFO_TYPE_EXEPT:
2802 			if (svm->vmcb->control.exit_info_2 &
2803 			    (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) {
2804 				has_error_code = true;
2805 				error_code =
2806 					(u32)svm->vmcb->control.exit_info_2;
2807 			}
2808 			kvm_clear_exception_queue(&svm->vcpu);
2809 			break;
2810 		case SVM_EXITINTINFO_TYPE_INTR:
2811 			kvm_clear_interrupt_queue(&svm->vcpu);
2812 			break;
2813 		default:
2814 			break;
2815 		}
2816 	}
2817 
2818 	if (reason != TASK_SWITCH_GATE ||
2819 	    int_type == SVM_EXITINTINFO_TYPE_SOFT ||
2820 	    (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
2821 	     (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
2822 		skip_emulated_instruction(&svm->vcpu);
2823 
2824 	if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
2825 		int_vec = -1;
2826 
2827 	if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
2828 				has_error_code, error_code) == EMULATE_FAIL) {
2829 		svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
2830 		svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
2831 		svm->vcpu.run->internal.ndata = 0;
2832 		return 0;
2833 	}
2834 	return 1;
2835 }
2836 
cpuid_interception(struct vcpu_svm * svm)2837 static int cpuid_interception(struct vcpu_svm *svm)
2838 {
2839 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
2840 	kvm_emulate_cpuid(&svm->vcpu);
2841 	return 1;
2842 }
2843 
iret_interception(struct vcpu_svm * svm)2844 static int iret_interception(struct vcpu_svm *svm)
2845 {
2846 	++svm->vcpu.stat.nmi_window_exits;
2847 	clr_intercept(svm, INTERCEPT_IRET);
2848 	svm->vcpu.arch.hflags |= HF_IRET_MASK;
2849 	svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
2850 	return 1;
2851 }
2852 
invlpg_interception(struct vcpu_svm * svm)2853 static int invlpg_interception(struct vcpu_svm *svm)
2854 {
2855 	if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
2856 		return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
2857 
2858 	kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
2859 	skip_emulated_instruction(&svm->vcpu);
2860 	return 1;
2861 }
2862 
emulate_on_interception(struct vcpu_svm * svm)2863 static int emulate_on_interception(struct vcpu_svm *svm)
2864 {
2865 	return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
2866 }
2867 
rdpmc_interception(struct vcpu_svm * svm)2868 static int rdpmc_interception(struct vcpu_svm *svm)
2869 {
2870 	int err;
2871 
2872 	if (!static_cpu_has(X86_FEATURE_NRIPS))
2873 		return emulate_on_interception(svm);
2874 
2875 	err = kvm_rdpmc(&svm->vcpu);
2876 	kvm_complete_insn_gp(&svm->vcpu, err);
2877 
2878 	return 1;
2879 }
2880 
check_selective_cr0_intercepted(struct vcpu_svm * svm,unsigned long val)2881 bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val)
2882 {
2883 	unsigned long cr0 = svm->vcpu.arch.cr0;
2884 	bool ret = false;
2885 	u64 intercept;
2886 
2887 	intercept = svm->nested.intercept;
2888 
2889 	if (!is_guest_mode(&svm->vcpu) ||
2890 	    (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))))
2891 		return false;
2892 
2893 	cr0 &= ~SVM_CR0_SELECTIVE_MASK;
2894 	val &= ~SVM_CR0_SELECTIVE_MASK;
2895 
2896 	if (cr0 ^ val) {
2897 		svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
2898 		ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
2899 	}
2900 
2901 	return ret;
2902 }
2903 
2904 #define CR_VALID (1ULL << 63)
2905 
cr_interception(struct vcpu_svm * svm)2906 static int cr_interception(struct vcpu_svm *svm)
2907 {
2908 	int reg, cr;
2909 	unsigned long val;
2910 	int err;
2911 
2912 	if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
2913 		return emulate_on_interception(svm);
2914 
2915 	if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0))
2916 		return emulate_on_interception(svm);
2917 
2918 	reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
2919 	cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
2920 
2921 	err = 0;
2922 	if (cr >= 16) { /* mov to cr */
2923 		cr -= 16;
2924 		val = kvm_register_read(&svm->vcpu, reg);
2925 		switch (cr) {
2926 		case 0:
2927 			if (!check_selective_cr0_intercepted(svm, val))
2928 				err = kvm_set_cr0(&svm->vcpu, val);
2929 			else
2930 				return 1;
2931 
2932 			break;
2933 		case 3:
2934 			err = kvm_set_cr3(&svm->vcpu, val);
2935 			break;
2936 		case 4:
2937 			err = kvm_set_cr4(&svm->vcpu, val);
2938 			break;
2939 		case 8:
2940 			err = kvm_set_cr8(&svm->vcpu, val);
2941 			break;
2942 		default:
2943 			WARN(1, "unhandled write to CR%d", cr);
2944 			kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2945 			return 1;
2946 		}
2947 	} else { /* mov from cr */
2948 		switch (cr) {
2949 		case 0:
2950 			val = kvm_read_cr0(&svm->vcpu);
2951 			break;
2952 		case 2:
2953 			val = svm->vcpu.arch.cr2;
2954 			break;
2955 		case 3:
2956 			val = kvm_read_cr3(&svm->vcpu);
2957 			break;
2958 		case 4:
2959 			val = kvm_read_cr4(&svm->vcpu);
2960 			break;
2961 		case 8:
2962 			val = kvm_get_cr8(&svm->vcpu);
2963 			break;
2964 		default:
2965 			WARN(1, "unhandled read from CR%d", cr);
2966 			kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2967 			return 1;
2968 		}
2969 		kvm_register_write(&svm->vcpu, reg, val);
2970 	}
2971 	kvm_complete_insn_gp(&svm->vcpu, err);
2972 
2973 	return 1;
2974 }
2975 
dr_interception(struct vcpu_svm * svm)2976 static int dr_interception(struct vcpu_svm *svm)
2977 {
2978 	int reg, dr;
2979 	unsigned long val;
2980 	int err;
2981 
2982 	if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
2983 		return emulate_on_interception(svm);
2984 
2985 	reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
2986 	dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
2987 
2988 	if (dr >= 16) { /* mov to DRn */
2989 		val = kvm_register_read(&svm->vcpu, reg);
2990 		kvm_set_dr(&svm->vcpu, dr - 16, val);
2991 	} else {
2992 		err = kvm_get_dr(&svm->vcpu, dr, &val);
2993 		if (!err)
2994 			kvm_register_write(&svm->vcpu, reg, val);
2995 	}
2996 
2997 	skip_emulated_instruction(&svm->vcpu);
2998 
2999 	return 1;
3000 }
3001 
cr8_write_interception(struct vcpu_svm * svm)3002 static int cr8_write_interception(struct vcpu_svm *svm)
3003 {
3004 	struct kvm_run *kvm_run = svm->vcpu.run;
3005 	int r;
3006 
3007 	u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
3008 	/* instruction emulation calls kvm_set_cr8() */
3009 	r = cr_interception(svm);
3010 	if (irqchip_in_kernel(svm->vcpu.kvm))
3011 		return r;
3012 	if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
3013 		return r;
3014 	kvm_run->exit_reason = KVM_EXIT_SET_TPR;
3015 	return 0;
3016 }
3017 
svm_read_l1_tsc(struct kvm_vcpu * vcpu)3018 u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu)
3019 {
3020 	struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
3021 	return vmcb->control.tsc_offset +
3022 		svm_scale_tsc(vcpu, native_read_tsc());
3023 }
3024 
svm_get_msr(struct kvm_vcpu * vcpu,unsigned ecx,u64 * data)3025 static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
3026 {
3027 	struct vcpu_svm *svm = to_svm(vcpu);
3028 
3029 	switch (ecx) {
3030 	case MSR_IA32_TSC: {
3031 		*data = svm->vmcb->control.tsc_offset +
3032 			svm_scale_tsc(vcpu, native_read_tsc());
3033 
3034 		break;
3035 	}
3036 	case MSR_STAR:
3037 		*data = svm->vmcb->save.star;
3038 		break;
3039 #ifdef CONFIG_X86_64
3040 	case MSR_LSTAR:
3041 		*data = svm->vmcb->save.lstar;
3042 		break;
3043 	case MSR_CSTAR:
3044 		*data = svm->vmcb->save.cstar;
3045 		break;
3046 	case MSR_KERNEL_GS_BASE:
3047 		*data = svm->vmcb->save.kernel_gs_base;
3048 		break;
3049 	case MSR_SYSCALL_MASK:
3050 		*data = svm->vmcb->save.sfmask;
3051 		break;
3052 #endif
3053 	case MSR_IA32_SYSENTER_CS:
3054 		*data = svm->vmcb->save.sysenter_cs;
3055 		break;
3056 	case MSR_IA32_SYSENTER_EIP:
3057 		*data = svm->sysenter_eip;
3058 		break;
3059 	case MSR_IA32_SYSENTER_ESP:
3060 		*data = svm->sysenter_esp;
3061 		break;
3062 	/*
3063 	 * Nobody will change the following 5 values in the VMCB so we can
3064 	 * safely return them on rdmsr. They will always be 0 until LBRV is
3065 	 * implemented.
3066 	 */
3067 	case MSR_IA32_DEBUGCTLMSR:
3068 		*data = svm->vmcb->save.dbgctl;
3069 		break;
3070 	case MSR_IA32_LASTBRANCHFROMIP:
3071 		*data = svm->vmcb->save.br_from;
3072 		break;
3073 	case MSR_IA32_LASTBRANCHTOIP:
3074 		*data = svm->vmcb->save.br_to;
3075 		break;
3076 	case MSR_IA32_LASTINTFROMIP:
3077 		*data = svm->vmcb->save.last_excp_from;
3078 		break;
3079 	case MSR_IA32_LASTINTTOIP:
3080 		*data = svm->vmcb->save.last_excp_to;
3081 		break;
3082 	case MSR_VM_HSAVE_PA:
3083 		*data = svm->nested.hsave_msr;
3084 		break;
3085 	case MSR_VM_CR:
3086 		*data = svm->nested.vm_cr_msr;
3087 		break;
3088 	case MSR_IA32_UCODE_REV:
3089 		*data = 0x01000065;
3090 		break;
3091 	default:
3092 		return kvm_get_msr_common(vcpu, ecx, data);
3093 	}
3094 	return 0;
3095 }
3096 
rdmsr_interception(struct vcpu_svm * svm)3097 static int rdmsr_interception(struct vcpu_svm *svm)
3098 {
3099 	u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
3100 	u64 data;
3101 
3102 	if (svm_get_msr(&svm->vcpu, ecx, &data)) {
3103 		trace_kvm_msr_read_ex(ecx);
3104 		kvm_inject_gp(&svm->vcpu, 0);
3105 	} else {
3106 		trace_kvm_msr_read(ecx, data);
3107 
3108 		svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
3109 		svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
3110 		svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
3111 		skip_emulated_instruction(&svm->vcpu);
3112 	}
3113 	return 1;
3114 }
3115 
svm_set_vm_cr(struct kvm_vcpu * vcpu,u64 data)3116 static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
3117 {
3118 	struct vcpu_svm *svm = to_svm(vcpu);
3119 	int svm_dis, chg_mask;
3120 
3121 	if (data & ~SVM_VM_CR_VALID_MASK)
3122 		return 1;
3123 
3124 	chg_mask = SVM_VM_CR_VALID_MASK;
3125 
3126 	if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK)
3127 		chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK);
3128 
3129 	svm->nested.vm_cr_msr &= ~chg_mask;
3130 	svm->nested.vm_cr_msr |= (data & chg_mask);
3131 
3132 	svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK;
3133 
3134 	/* check for svm_disable while efer.svme is set */
3135 	if (svm_dis && (vcpu->arch.efer & EFER_SVME))
3136 		return 1;
3137 
3138 	return 0;
3139 }
3140 
svm_set_msr(struct kvm_vcpu * vcpu,unsigned ecx,u64 data)3141 static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
3142 {
3143 	struct vcpu_svm *svm = to_svm(vcpu);
3144 
3145 	switch (ecx) {
3146 	case MSR_IA32_TSC:
3147 		kvm_write_tsc(vcpu, data);
3148 		break;
3149 	case MSR_STAR:
3150 		svm->vmcb->save.star = data;
3151 		break;
3152 #ifdef CONFIG_X86_64
3153 	case MSR_LSTAR:
3154 		svm->vmcb->save.lstar = data;
3155 		break;
3156 	case MSR_CSTAR:
3157 		svm->vmcb->save.cstar = data;
3158 		break;
3159 	case MSR_KERNEL_GS_BASE:
3160 		svm->vmcb->save.kernel_gs_base = data;
3161 		break;
3162 	case MSR_SYSCALL_MASK:
3163 		svm->vmcb->save.sfmask = data;
3164 		break;
3165 #endif
3166 	case MSR_IA32_SYSENTER_CS:
3167 		svm->vmcb->save.sysenter_cs = data;
3168 		break;
3169 	case MSR_IA32_SYSENTER_EIP:
3170 		svm->sysenter_eip = data;
3171 		svm->vmcb->save.sysenter_eip = data;
3172 		break;
3173 	case MSR_IA32_SYSENTER_ESP:
3174 		svm->sysenter_esp = data;
3175 		svm->vmcb->save.sysenter_esp = data;
3176 		break;
3177 	case MSR_IA32_DEBUGCTLMSR:
3178 		if (!boot_cpu_has(X86_FEATURE_LBRV)) {
3179 			pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
3180 					__func__, data);
3181 			break;
3182 		}
3183 		if (data & DEBUGCTL_RESERVED_BITS)
3184 			return 1;
3185 
3186 		svm->vmcb->save.dbgctl = data;
3187 		mark_dirty(svm->vmcb, VMCB_LBR);
3188 		if (data & (1ULL<<0))
3189 			svm_enable_lbrv(svm);
3190 		else
3191 			svm_disable_lbrv(svm);
3192 		break;
3193 	case MSR_VM_HSAVE_PA:
3194 		svm->nested.hsave_msr = data;
3195 		break;
3196 	case MSR_VM_CR:
3197 		return svm_set_vm_cr(vcpu, data);
3198 	case MSR_VM_IGNNE:
3199 		pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
3200 		break;
3201 	default:
3202 		return kvm_set_msr_common(vcpu, ecx, data);
3203 	}
3204 	return 0;
3205 }
3206 
wrmsr_interception(struct vcpu_svm * svm)3207 static int wrmsr_interception(struct vcpu_svm *svm)
3208 {
3209 	u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
3210 	u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
3211 		| ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
3212 
3213 
3214 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
3215 	if (svm_set_msr(&svm->vcpu, ecx, data)) {
3216 		trace_kvm_msr_write_ex(ecx, data);
3217 		kvm_inject_gp(&svm->vcpu, 0);
3218 	} else {
3219 		trace_kvm_msr_write(ecx, data);
3220 		skip_emulated_instruction(&svm->vcpu);
3221 	}
3222 	return 1;
3223 }
3224 
msr_interception(struct vcpu_svm * svm)3225 static int msr_interception(struct vcpu_svm *svm)
3226 {
3227 	if (svm->vmcb->control.exit_info_1)
3228 		return wrmsr_interception(svm);
3229 	else
3230 		return rdmsr_interception(svm);
3231 }
3232 
interrupt_window_interception(struct vcpu_svm * svm)3233 static int interrupt_window_interception(struct vcpu_svm *svm)
3234 {
3235 	struct kvm_run *kvm_run = svm->vcpu.run;
3236 
3237 	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3238 	svm_clear_vintr(svm);
3239 	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
3240 	mark_dirty(svm->vmcb, VMCB_INTR);
3241 	/*
3242 	 * If the user space waits to inject interrupts, exit as soon as
3243 	 * possible
3244 	 */
3245 	if (!irqchip_in_kernel(svm->vcpu.kvm) &&
3246 	    kvm_run->request_interrupt_window &&
3247 	    !kvm_cpu_has_interrupt(&svm->vcpu)) {
3248 		++svm->vcpu.stat.irq_window_exits;
3249 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
3250 		return 0;
3251 	}
3252 
3253 	return 1;
3254 }
3255 
pause_interception(struct vcpu_svm * svm)3256 static int pause_interception(struct vcpu_svm *svm)
3257 {
3258 	kvm_vcpu_on_spin(&(svm->vcpu));
3259 	return 1;
3260 }
3261 
3262 static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
3263 	[SVM_EXIT_READ_CR0]			= cr_interception,
3264 	[SVM_EXIT_READ_CR3]			= cr_interception,
3265 	[SVM_EXIT_READ_CR4]			= cr_interception,
3266 	[SVM_EXIT_READ_CR8]			= cr_interception,
3267 	[SVM_EXIT_CR0_SEL_WRITE]		= emulate_on_interception,
3268 	[SVM_EXIT_WRITE_CR0]			= cr_interception,
3269 	[SVM_EXIT_WRITE_CR3]			= cr_interception,
3270 	[SVM_EXIT_WRITE_CR4]			= cr_interception,
3271 	[SVM_EXIT_WRITE_CR8]			= cr8_write_interception,
3272 	[SVM_EXIT_READ_DR0]			= dr_interception,
3273 	[SVM_EXIT_READ_DR1]			= dr_interception,
3274 	[SVM_EXIT_READ_DR2]			= dr_interception,
3275 	[SVM_EXIT_READ_DR3]			= dr_interception,
3276 	[SVM_EXIT_READ_DR4]			= dr_interception,
3277 	[SVM_EXIT_READ_DR5]			= dr_interception,
3278 	[SVM_EXIT_READ_DR6]			= dr_interception,
3279 	[SVM_EXIT_READ_DR7]			= dr_interception,
3280 	[SVM_EXIT_WRITE_DR0]			= dr_interception,
3281 	[SVM_EXIT_WRITE_DR1]			= dr_interception,
3282 	[SVM_EXIT_WRITE_DR2]			= dr_interception,
3283 	[SVM_EXIT_WRITE_DR3]			= dr_interception,
3284 	[SVM_EXIT_WRITE_DR4]			= dr_interception,
3285 	[SVM_EXIT_WRITE_DR5]			= dr_interception,
3286 	[SVM_EXIT_WRITE_DR6]			= dr_interception,
3287 	[SVM_EXIT_WRITE_DR7]			= dr_interception,
3288 	[SVM_EXIT_EXCP_BASE + DB_VECTOR]	= db_interception,
3289 	[SVM_EXIT_EXCP_BASE + BP_VECTOR]	= bp_interception,
3290 	[SVM_EXIT_EXCP_BASE + UD_VECTOR]	= ud_interception,
3291 	[SVM_EXIT_EXCP_BASE + PF_VECTOR]	= pf_interception,
3292 	[SVM_EXIT_EXCP_BASE + NM_VECTOR]	= nm_interception,
3293 	[SVM_EXIT_EXCP_BASE + MC_VECTOR]	= mc_interception,
3294 	[SVM_EXIT_INTR]				= intr_interception,
3295 	[SVM_EXIT_NMI]				= nmi_interception,
3296 	[SVM_EXIT_SMI]				= nop_on_interception,
3297 	[SVM_EXIT_INIT]				= nop_on_interception,
3298 	[SVM_EXIT_VINTR]			= interrupt_window_interception,
3299 	[SVM_EXIT_RDPMC]			= rdpmc_interception,
3300 	[SVM_EXIT_CPUID]			= cpuid_interception,
3301 	[SVM_EXIT_IRET]                         = iret_interception,
3302 	[SVM_EXIT_INVD]                         = emulate_on_interception,
3303 	[SVM_EXIT_PAUSE]			= pause_interception,
3304 	[SVM_EXIT_HLT]				= halt_interception,
3305 	[SVM_EXIT_INVLPG]			= invlpg_interception,
3306 	[SVM_EXIT_INVLPGA]			= invlpga_interception,
3307 	[SVM_EXIT_IOIO]				= io_interception,
3308 	[SVM_EXIT_MSR]				= msr_interception,
3309 	[SVM_EXIT_TASK_SWITCH]			= task_switch_interception,
3310 	[SVM_EXIT_SHUTDOWN]			= shutdown_interception,
3311 	[SVM_EXIT_VMRUN]			= vmrun_interception,
3312 	[SVM_EXIT_VMMCALL]			= vmmcall_interception,
3313 	[SVM_EXIT_VMLOAD]			= vmload_interception,
3314 	[SVM_EXIT_VMSAVE]			= vmsave_interception,
3315 	[SVM_EXIT_STGI]				= stgi_interception,
3316 	[SVM_EXIT_CLGI]				= clgi_interception,
3317 	[SVM_EXIT_SKINIT]			= skinit_interception,
3318 	[SVM_EXIT_WBINVD]                       = emulate_on_interception,
3319 	[SVM_EXIT_MONITOR]			= invalid_op_interception,
3320 	[SVM_EXIT_MWAIT]			= invalid_op_interception,
3321 	[SVM_EXIT_XSETBV]			= xsetbv_interception,
3322 	[SVM_EXIT_NPF]				= pf_interception,
3323 };
3324 
dump_vmcb(struct kvm_vcpu * vcpu)3325 static void dump_vmcb(struct kvm_vcpu *vcpu)
3326 {
3327 	struct vcpu_svm *svm = to_svm(vcpu);
3328 	struct vmcb_control_area *control = &svm->vmcb->control;
3329 	struct vmcb_save_area *save = &svm->vmcb->save;
3330 
3331 	pr_err("VMCB Control Area:\n");
3332 	pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff);
3333 	pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16);
3334 	pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff);
3335 	pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16);
3336 	pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
3337 	pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
3338 	pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
3339 	pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa);
3340 	pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa);
3341 	pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
3342 	pr_err("%-20s%d\n", "asid:", control->asid);
3343 	pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl);
3344 	pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl);
3345 	pr_err("%-20s%08x\n", "int_vector:", control->int_vector);
3346 	pr_err("%-20s%08x\n", "int_state:", control->int_state);
3347 	pr_err("%-20s%08x\n", "exit_code:", control->exit_code);
3348 	pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1);
3349 	pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2);
3350 	pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info);
3351 	pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
3352 	pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
3353 	pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
3354 	pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
3355 	pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
3356 	pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl);
3357 	pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
3358 	pr_err("VMCB State Save Area:\n");
3359 	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3360 	       "es:",
3361 	       save->es.selector, save->es.attrib,
3362 	       save->es.limit, save->es.base);
3363 	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3364 	       "cs:",
3365 	       save->cs.selector, save->cs.attrib,
3366 	       save->cs.limit, save->cs.base);
3367 	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3368 	       "ss:",
3369 	       save->ss.selector, save->ss.attrib,
3370 	       save->ss.limit, save->ss.base);
3371 	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3372 	       "ds:",
3373 	       save->ds.selector, save->ds.attrib,
3374 	       save->ds.limit, save->ds.base);
3375 	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3376 	       "fs:",
3377 	       save->fs.selector, save->fs.attrib,
3378 	       save->fs.limit, save->fs.base);
3379 	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3380 	       "gs:",
3381 	       save->gs.selector, save->gs.attrib,
3382 	       save->gs.limit, save->gs.base);
3383 	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3384 	       "gdtr:",
3385 	       save->gdtr.selector, save->gdtr.attrib,
3386 	       save->gdtr.limit, save->gdtr.base);
3387 	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3388 	       "ldtr:",
3389 	       save->ldtr.selector, save->ldtr.attrib,
3390 	       save->ldtr.limit, save->ldtr.base);
3391 	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3392 	       "idtr:",
3393 	       save->idtr.selector, save->idtr.attrib,
3394 	       save->idtr.limit, save->idtr.base);
3395 	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3396 	       "tr:",
3397 	       save->tr.selector, save->tr.attrib,
3398 	       save->tr.limit, save->tr.base);
3399 	pr_err("cpl:            %d                efer:         %016llx\n",
3400 		save->cpl, save->efer);
3401 	pr_err("%-15s %016llx %-13s %016llx\n",
3402 	       "cr0:", save->cr0, "cr2:", save->cr2);
3403 	pr_err("%-15s %016llx %-13s %016llx\n",
3404 	       "cr3:", save->cr3, "cr4:", save->cr4);
3405 	pr_err("%-15s %016llx %-13s %016llx\n",
3406 	       "dr6:", save->dr6, "dr7:", save->dr7);
3407 	pr_err("%-15s %016llx %-13s %016llx\n",
3408 	       "rip:", save->rip, "rflags:", save->rflags);
3409 	pr_err("%-15s %016llx %-13s %016llx\n",
3410 	       "rsp:", save->rsp, "rax:", save->rax);
3411 	pr_err("%-15s %016llx %-13s %016llx\n",
3412 	       "star:", save->star, "lstar:", save->lstar);
3413 	pr_err("%-15s %016llx %-13s %016llx\n",
3414 	       "cstar:", save->cstar, "sfmask:", save->sfmask);
3415 	pr_err("%-15s %016llx %-13s %016llx\n",
3416 	       "kernel_gs_base:", save->kernel_gs_base,
3417 	       "sysenter_cs:", save->sysenter_cs);
3418 	pr_err("%-15s %016llx %-13s %016llx\n",
3419 	       "sysenter_esp:", save->sysenter_esp,
3420 	       "sysenter_eip:", save->sysenter_eip);
3421 	pr_err("%-15s %016llx %-13s %016llx\n",
3422 	       "gpat:", save->g_pat, "dbgctl:", save->dbgctl);
3423 	pr_err("%-15s %016llx %-13s %016llx\n",
3424 	       "br_from:", save->br_from, "br_to:", save->br_to);
3425 	pr_err("%-15s %016llx %-13s %016llx\n",
3426 	       "excp_from:", save->last_excp_from,
3427 	       "excp_to:", save->last_excp_to);
3428 }
3429 
svm_get_exit_info(struct kvm_vcpu * vcpu,u64 * info1,u64 * info2)3430 static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
3431 {
3432 	struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
3433 
3434 	*info1 = control->exit_info_1;
3435 	*info2 = control->exit_info_2;
3436 }
3437 
handle_exit(struct kvm_vcpu * vcpu)3438 static int handle_exit(struct kvm_vcpu *vcpu)
3439 {
3440 	struct vcpu_svm *svm = to_svm(vcpu);
3441 	struct kvm_run *kvm_run = vcpu->run;
3442 	u32 exit_code = svm->vmcb->control.exit_code;
3443 
3444 	if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
3445 		vcpu->arch.cr0 = svm->vmcb->save.cr0;
3446 	if (npt_enabled)
3447 		vcpu->arch.cr3 = svm->vmcb->save.cr3;
3448 
3449 	if (unlikely(svm->nested.exit_required)) {
3450 		nested_svm_vmexit(svm);
3451 		svm->nested.exit_required = false;
3452 
3453 		return 1;
3454 	}
3455 
3456 	if (is_guest_mode(vcpu)) {
3457 		int vmexit;
3458 
3459 		trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
3460 					svm->vmcb->control.exit_info_1,
3461 					svm->vmcb->control.exit_info_2,
3462 					svm->vmcb->control.exit_int_info,
3463 					svm->vmcb->control.exit_int_info_err,
3464 					KVM_ISA_SVM);
3465 
3466 		vmexit = nested_svm_exit_special(svm);
3467 
3468 		if (vmexit == NESTED_EXIT_CONTINUE)
3469 			vmexit = nested_svm_exit_handled(svm);
3470 
3471 		if (vmexit == NESTED_EXIT_DONE)
3472 			return 1;
3473 	}
3474 
3475 	svm_complete_interrupts(svm);
3476 
3477 	if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
3478 		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
3479 		kvm_run->fail_entry.hardware_entry_failure_reason
3480 			= svm->vmcb->control.exit_code;
3481 		pr_err("KVM: FAILED VMRUN WITH VMCB:\n");
3482 		dump_vmcb(vcpu);
3483 		return 0;
3484 	}
3485 
3486 	if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
3487 	    exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
3488 	    exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
3489 	    exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
3490 		printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
3491 		       "exit_code 0x%x\n",
3492 		       __func__, svm->vmcb->control.exit_int_info,
3493 		       exit_code);
3494 
3495 	if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
3496 	    || !svm_exit_handlers[exit_code]) {
3497 		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
3498 		kvm_run->hw.hardware_exit_reason = exit_code;
3499 		return 0;
3500 	}
3501 
3502 	return svm_exit_handlers[exit_code](svm);
3503 }
3504 
reload_tss(struct kvm_vcpu * vcpu)3505 static void reload_tss(struct kvm_vcpu *vcpu)
3506 {
3507 	int cpu = raw_smp_processor_id();
3508 
3509 	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
3510 	sd->tss_desc->type = 9; /* available 32/64-bit TSS */
3511 	load_TR_desc();
3512 }
3513 
pre_svm_run(struct vcpu_svm * svm)3514 static void pre_svm_run(struct vcpu_svm *svm)
3515 {
3516 	int cpu = raw_smp_processor_id();
3517 
3518 	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
3519 
3520 	/* FIXME: handle wraparound of asid_generation */
3521 	if (svm->asid_generation != sd->asid_generation)
3522 		new_asid(svm, sd);
3523 }
3524 
svm_inject_nmi(struct kvm_vcpu * vcpu)3525 static void svm_inject_nmi(struct kvm_vcpu *vcpu)
3526 {
3527 	struct vcpu_svm *svm = to_svm(vcpu);
3528 
3529 	svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
3530 	vcpu->arch.hflags |= HF_NMI_MASK;
3531 	set_intercept(svm, INTERCEPT_IRET);
3532 	++vcpu->stat.nmi_injections;
3533 }
3534 
svm_inject_irq(struct vcpu_svm * svm,int irq)3535 static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
3536 {
3537 	struct vmcb_control_area *control;
3538 
3539 	control = &svm->vmcb->control;
3540 	control->int_vector = irq;
3541 	control->int_ctl &= ~V_INTR_PRIO_MASK;
3542 	control->int_ctl |= V_IRQ_MASK |
3543 		((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
3544 	mark_dirty(svm->vmcb, VMCB_INTR);
3545 }
3546 
svm_set_irq(struct kvm_vcpu * vcpu)3547 static void svm_set_irq(struct kvm_vcpu *vcpu)
3548 {
3549 	struct vcpu_svm *svm = to_svm(vcpu);
3550 
3551 	BUG_ON(!(gif_set(svm)));
3552 
3553 	trace_kvm_inj_virq(vcpu->arch.interrupt.nr);
3554 	++vcpu->stat.irq_injections;
3555 
3556 	svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
3557 		SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
3558 }
3559 
update_cr8_intercept(struct kvm_vcpu * vcpu,int tpr,int irr)3560 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
3561 {
3562 	struct vcpu_svm *svm = to_svm(vcpu);
3563 
3564 	if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
3565 		return;
3566 
3567 	clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
3568 
3569 	if (irr == -1)
3570 		return;
3571 
3572 	if (tpr >= irr)
3573 		set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
3574 }
3575 
svm_nmi_allowed(struct kvm_vcpu * vcpu)3576 static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
3577 {
3578 	struct vcpu_svm *svm = to_svm(vcpu);
3579 	struct vmcb *vmcb = svm->vmcb;
3580 	int ret;
3581 	ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
3582 	      !(svm->vcpu.arch.hflags & HF_NMI_MASK);
3583 	ret = ret && gif_set(svm) && nested_svm_nmi(svm);
3584 
3585 	return ret;
3586 }
3587 
svm_get_nmi_mask(struct kvm_vcpu * vcpu)3588 static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
3589 {
3590 	struct vcpu_svm *svm = to_svm(vcpu);
3591 
3592 	return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
3593 }
3594 
svm_set_nmi_mask(struct kvm_vcpu * vcpu,bool masked)3595 static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
3596 {
3597 	struct vcpu_svm *svm = to_svm(vcpu);
3598 
3599 	if (masked) {
3600 		svm->vcpu.arch.hflags |= HF_NMI_MASK;
3601 		set_intercept(svm, INTERCEPT_IRET);
3602 	} else {
3603 		svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
3604 		clr_intercept(svm, INTERCEPT_IRET);
3605 	}
3606 }
3607 
svm_interrupt_allowed(struct kvm_vcpu * vcpu)3608 static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
3609 {
3610 	struct vcpu_svm *svm = to_svm(vcpu);
3611 	struct vmcb *vmcb = svm->vmcb;
3612 	int ret;
3613 
3614 	if (!gif_set(svm) ||
3615 	     (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
3616 		return 0;
3617 
3618 	ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF);
3619 
3620 	if (is_guest_mode(vcpu))
3621 		return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
3622 
3623 	return ret;
3624 }
3625 
enable_irq_window(struct kvm_vcpu * vcpu)3626 static void enable_irq_window(struct kvm_vcpu *vcpu)
3627 {
3628 	struct vcpu_svm *svm = to_svm(vcpu);
3629 
3630 	/*
3631 	 * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
3632 	 * 1, because that's a separate STGI/VMRUN intercept.  The next time we
3633 	 * get that intercept, this function will be called again though and
3634 	 * we'll get the vintr intercept.
3635 	 */
3636 	if (gif_set(svm) && nested_svm_intr(svm)) {
3637 		svm_set_vintr(svm);
3638 		svm_inject_irq(svm, 0x0);
3639 	}
3640 }
3641 
enable_nmi_window(struct kvm_vcpu * vcpu)3642 static void enable_nmi_window(struct kvm_vcpu *vcpu)
3643 {
3644 	struct vcpu_svm *svm = to_svm(vcpu);
3645 
3646 	if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
3647 	    == HF_NMI_MASK)
3648 		return; /* IRET will cause a vm exit */
3649 
3650 	/*
3651 	 * Something prevents NMI from been injected. Single step over possible
3652 	 * problem (IRET or exception injection or interrupt shadow)
3653 	 */
3654 	svm->nmi_singlestep = true;
3655 	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
3656 	update_db_intercept(vcpu);
3657 }
3658 
svm_set_tss_addr(struct kvm * kvm,unsigned int addr)3659 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
3660 {
3661 	return 0;
3662 }
3663 
svm_flush_tlb(struct kvm_vcpu * vcpu)3664 static void svm_flush_tlb(struct kvm_vcpu *vcpu)
3665 {
3666 	struct vcpu_svm *svm = to_svm(vcpu);
3667 
3668 	if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
3669 		svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
3670 	else
3671 		svm->asid_generation--;
3672 }
3673 
svm_prepare_guest_switch(struct kvm_vcpu * vcpu)3674 static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
3675 {
3676 }
3677 
sync_cr8_to_lapic(struct kvm_vcpu * vcpu)3678 static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
3679 {
3680 	struct vcpu_svm *svm = to_svm(vcpu);
3681 
3682 	if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
3683 		return;
3684 
3685 	if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {
3686 		int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
3687 		kvm_set_cr8(vcpu, cr8);
3688 	}
3689 }
3690 
sync_lapic_to_cr8(struct kvm_vcpu * vcpu)3691 static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
3692 {
3693 	struct vcpu_svm *svm = to_svm(vcpu);
3694 	u64 cr8;
3695 
3696 	if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
3697 		return;
3698 
3699 	cr8 = kvm_get_cr8(vcpu);
3700 	svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
3701 	svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
3702 }
3703 
svm_complete_interrupts(struct vcpu_svm * svm)3704 static void svm_complete_interrupts(struct vcpu_svm *svm)
3705 {
3706 	u8 vector;
3707 	int type;
3708 	u32 exitintinfo = svm->vmcb->control.exit_int_info;
3709 	unsigned int3_injected = svm->int3_injected;
3710 
3711 	svm->int3_injected = 0;
3712 
3713 	/*
3714 	 * If we've made progress since setting HF_IRET_MASK, we've
3715 	 * executed an IRET and can allow NMI injection.
3716 	 */
3717 	if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
3718 	    && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
3719 		svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
3720 		kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3721 	}
3722 
3723 	svm->vcpu.arch.nmi_injected = false;
3724 	kvm_clear_exception_queue(&svm->vcpu);
3725 	kvm_clear_interrupt_queue(&svm->vcpu);
3726 
3727 	if (!(exitintinfo & SVM_EXITINTINFO_VALID))
3728 		return;
3729 
3730 	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3731 
3732 	vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
3733 	type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
3734 
3735 	switch (type) {
3736 	case SVM_EXITINTINFO_TYPE_NMI:
3737 		svm->vcpu.arch.nmi_injected = true;
3738 		break;
3739 	case SVM_EXITINTINFO_TYPE_EXEPT:
3740 		/*
3741 		 * In case of software exceptions, do not reinject the vector,
3742 		 * but re-execute the instruction instead. Rewind RIP first
3743 		 * if we emulated INT3 before.
3744 		 */
3745 		if (kvm_exception_is_soft(vector)) {
3746 			if (vector == BP_VECTOR && int3_injected &&
3747 			    kvm_is_linear_rip(&svm->vcpu, svm->int3_rip))
3748 				kvm_rip_write(&svm->vcpu,
3749 					      kvm_rip_read(&svm->vcpu) -
3750 					      int3_injected);
3751 			break;
3752 		}
3753 		if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
3754 			u32 err = svm->vmcb->control.exit_int_info_err;
3755 			kvm_requeue_exception_e(&svm->vcpu, vector, err);
3756 
3757 		} else
3758 			kvm_requeue_exception(&svm->vcpu, vector);
3759 		break;
3760 	case SVM_EXITINTINFO_TYPE_INTR:
3761 		kvm_queue_interrupt(&svm->vcpu, vector, false);
3762 		break;
3763 	default:
3764 		break;
3765 	}
3766 }
3767 
svm_cancel_injection(struct kvm_vcpu * vcpu)3768 static void svm_cancel_injection(struct kvm_vcpu *vcpu)
3769 {
3770 	struct vcpu_svm *svm = to_svm(vcpu);
3771 	struct vmcb_control_area *control = &svm->vmcb->control;
3772 
3773 	control->exit_int_info = control->event_inj;
3774 	control->exit_int_info_err = control->event_inj_err;
3775 	control->event_inj = 0;
3776 	svm_complete_interrupts(svm);
3777 }
3778 
3779 #ifdef CONFIG_X86_64
3780 #define R "r"
3781 #else
3782 #define R "e"
3783 #endif
3784 
svm_vcpu_run(struct kvm_vcpu * vcpu)3785 static void svm_vcpu_run(struct kvm_vcpu *vcpu)
3786 {
3787 	struct vcpu_svm *svm = to_svm(vcpu);
3788 
3789 	svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
3790 	svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
3791 	svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
3792 
3793 	/*
3794 	 * A vmexit emulation is required before the vcpu can be executed
3795 	 * again.
3796 	 */
3797 	if (unlikely(svm->nested.exit_required))
3798 		return;
3799 
3800 	pre_svm_run(svm);
3801 
3802 	sync_lapic_to_cr8(vcpu);
3803 
3804 	svm->vmcb->save.cr2 = vcpu->arch.cr2;
3805 
3806 	clgi();
3807 
3808 	local_irq_enable();
3809 
3810 	asm volatile (
3811 		"push %%"R"bp; \n\t"
3812 		"mov %c[rbx](%[svm]), %%"R"bx \n\t"
3813 		"mov %c[rcx](%[svm]), %%"R"cx \n\t"
3814 		"mov %c[rdx](%[svm]), %%"R"dx \n\t"
3815 		"mov %c[rsi](%[svm]), %%"R"si \n\t"
3816 		"mov %c[rdi](%[svm]), %%"R"di \n\t"
3817 		"mov %c[rbp](%[svm]), %%"R"bp \n\t"
3818 #ifdef CONFIG_X86_64
3819 		"mov %c[r8](%[svm]),  %%r8  \n\t"
3820 		"mov %c[r9](%[svm]),  %%r9  \n\t"
3821 		"mov %c[r10](%[svm]), %%r10 \n\t"
3822 		"mov %c[r11](%[svm]), %%r11 \n\t"
3823 		"mov %c[r12](%[svm]), %%r12 \n\t"
3824 		"mov %c[r13](%[svm]), %%r13 \n\t"
3825 		"mov %c[r14](%[svm]), %%r14 \n\t"
3826 		"mov %c[r15](%[svm]), %%r15 \n\t"
3827 #endif
3828 
3829 		/* Enter guest mode */
3830 		"push %%"R"ax \n\t"
3831 		"mov %c[vmcb](%[svm]), %%"R"ax \n\t"
3832 		__ex(SVM_VMLOAD) "\n\t"
3833 		__ex(SVM_VMRUN) "\n\t"
3834 		__ex(SVM_VMSAVE) "\n\t"
3835 		"pop %%"R"ax \n\t"
3836 
3837 		/* Save guest registers, load host registers */
3838 		"mov %%"R"bx, %c[rbx](%[svm]) \n\t"
3839 		"mov %%"R"cx, %c[rcx](%[svm]) \n\t"
3840 		"mov %%"R"dx, %c[rdx](%[svm]) \n\t"
3841 		"mov %%"R"si, %c[rsi](%[svm]) \n\t"
3842 		"mov %%"R"di, %c[rdi](%[svm]) \n\t"
3843 		"mov %%"R"bp, %c[rbp](%[svm]) \n\t"
3844 #ifdef CONFIG_X86_64
3845 		"mov %%r8,  %c[r8](%[svm]) \n\t"
3846 		"mov %%r9,  %c[r9](%[svm]) \n\t"
3847 		"mov %%r10, %c[r10](%[svm]) \n\t"
3848 		"mov %%r11, %c[r11](%[svm]) \n\t"
3849 		"mov %%r12, %c[r12](%[svm]) \n\t"
3850 		"mov %%r13, %c[r13](%[svm]) \n\t"
3851 		"mov %%r14, %c[r14](%[svm]) \n\t"
3852 		"mov %%r15, %c[r15](%[svm]) \n\t"
3853 #endif
3854 		"pop %%"R"bp"
3855 		:
3856 		: [svm]"a"(svm),
3857 		  [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
3858 		  [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])),
3859 		  [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])),
3860 		  [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])),
3861 		  [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])),
3862 		  [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])),
3863 		  [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP]))
3864 #ifdef CONFIG_X86_64
3865 		  , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])),
3866 		  [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])),
3867 		  [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])),
3868 		  [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])),
3869 		  [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])),
3870 		  [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])),
3871 		  [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])),
3872 		  [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
3873 #endif
3874 		: "cc", "memory"
3875 		, R"bx", R"cx", R"dx", R"si", R"di"
3876 #ifdef CONFIG_X86_64
3877 		, "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
3878 #endif
3879 		);
3880 
3881 #ifdef CONFIG_X86_64
3882 	wrmsrl(MSR_GS_BASE, svm->host.gs_base);
3883 #else
3884 	loadsegment(fs, svm->host.fs);
3885 #ifndef CONFIG_X86_32_LAZY_GS
3886 	loadsegment(gs, svm->host.gs);
3887 #endif
3888 #endif
3889 
3890 	reload_tss(vcpu);
3891 
3892 	local_irq_disable();
3893 
3894 	vcpu->arch.cr2 = svm->vmcb->save.cr2;
3895 	vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
3896 	vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
3897 	vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
3898 
3899 	trace_kvm_exit(svm->vmcb->control.exit_code, vcpu, KVM_ISA_SVM);
3900 
3901 	if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
3902 		kvm_before_handle_nmi(&svm->vcpu);
3903 
3904 	stgi();
3905 
3906 	/* Any pending NMI will happen here */
3907 
3908 	if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
3909 		kvm_after_handle_nmi(&svm->vcpu);
3910 
3911 	sync_cr8_to_lapic(vcpu);
3912 
3913 	svm->next_rip = 0;
3914 
3915 	svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
3916 
3917 	/* if exit due to PF check for async PF */
3918 	if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
3919 		svm->apf_reason = kvm_read_and_reset_pf_reason();
3920 
3921 	if (npt_enabled) {
3922 		vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
3923 		vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
3924 	}
3925 
3926 	/*
3927 	 * We need to handle MC intercepts here before the vcpu has a chance to
3928 	 * change the physical cpu
3929 	 */
3930 	if (unlikely(svm->vmcb->control.exit_code ==
3931 		     SVM_EXIT_EXCP_BASE + MC_VECTOR))
3932 		svm_handle_mce(svm);
3933 
3934 	mark_all_clean(svm->vmcb);
3935 }
3936 
3937 #undef R
3938 
3939 static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
3940 {
3941 	struct vcpu_svm *svm = to_svm(vcpu);
3942 
3943 	svm->vmcb->save.cr3 = root;
3944 	mark_dirty(svm->vmcb, VMCB_CR);
3945 	svm_flush_tlb(vcpu);
3946 }
3947 
3948 static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
3949 {
3950 	struct vcpu_svm *svm = to_svm(vcpu);
3951 
3952 	svm->vmcb->control.nested_cr3 = root;
3953 	mark_dirty(svm->vmcb, VMCB_NPT);
3954 
3955 	/* Also sync guest cr3 here in case we live migrate */
3956 	svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
3957 	mark_dirty(svm->vmcb, VMCB_CR);
3958 
3959 	svm_flush_tlb(vcpu);
3960 }
3961 
3962 static int is_disabled(void)
3963 {
3964 	u64 vm_cr;
3965 
3966 	rdmsrl(MSR_VM_CR, vm_cr);
3967 	if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE))
3968 		return 1;
3969 
3970 	return 0;
3971 }
3972 
3973 static void
3974 svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
3975 {
3976 	/*
3977 	 * Patch in the VMMCALL instruction:
3978 	 */
3979 	hypercall[0] = 0x0f;
3980 	hypercall[1] = 0x01;
3981 	hypercall[2] = 0xd9;
3982 }
3983 
3984 static void svm_check_processor_compat(void *rtn)
3985 {
3986 	*(int *)rtn = 0;
3987 }
3988 
3989 static bool svm_cpu_has_accelerated_tpr(void)
3990 {
3991 	return false;
3992 }
3993 
3994 static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
3995 {
3996 	return 0;
3997 }
3998 
3999 static void svm_cpuid_update(struct kvm_vcpu *vcpu)
4000 {
4001 }
4002 
4003 static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
4004 {
4005 	switch (func) {
4006 	case 0x80000001:
4007 		if (nested)
4008 			entry->ecx |= (1 << 2); /* Set SVM bit */
4009 		break;
4010 	case 0x8000000A:
4011 		entry->eax = 1; /* SVM revision 1 */
4012 		entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
4013 				   ASID emulation to nested SVM */
4014 		entry->ecx = 0; /* Reserved */
4015 		entry->edx = 0; /* Per default do not support any
4016 				   additional features */
4017 
4018 		/* Support next_rip if host supports it */
4019 		if (boot_cpu_has(X86_FEATURE_NRIPS))
4020 			entry->edx |= SVM_FEATURE_NRIP;
4021 
4022 		/* Support NPT for the guest if enabled */
4023 		if (npt_enabled)
4024 			entry->edx |= SVM_FEATURE_NPT;
4025 
4026 		break;
4027 	}
4028 }
4029 
4030 static int svm_get_lpage_level(void)
4031 {
4032 	return PT_PDPE_LEVEL;
4033 }
4034 
4035 static bool svm_rdtscp_supported(void)
4036 {
4037 	return false;
4038 }
4039 
4040 static bool svm_has_wbinvd_exit(void)
4041 {
4042 	return true;
4043 }
4044 
4045 static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
4046 {
4047 	struct vcpu_svm *svm = to_svm(vcpu);
4048 
4049 	set_exception_intercept(svm, NM_VECTOR);
4050 	update_cr0_intercept(svm);
4051 }
4052 
4053 #define PRE_EX(exit)  { .exit_code = (exit), \
4054 			.stage = X86_ICPT_PRE_EXCEPT, }
4055 #define POST_EX(exit) { .exit_code = (exit), \
4056 			.stage = X86_ICPT_POST_EXCEPT, }
4057 #define POST_MEM(exit) { .exit_code = (exit), \
4058 			.stage = X86_ICPT_POST_MEMACCESS, }
4059 
4060 static struct __x86_intercept {
4061 	u32 exit_code;
4062 	enum x86_intercept_stage stage;
4063 } x86_intercept_map[] = {
4064 	[x86_intercept_cr_read]		= POST_EX(SVM_EXIT_READ_CR0),
4065 	[x86_intercept_cr_write]	= POST_EX(SVM_EXIT_WRITE_CR0),
4066 	[x86_intercept_clts]		= POST_EX(SVM_EXIT_WRITE_CR0),
4067 	[x86_intercept_lmsw]		= POST_EX(SVM_EXIT_WRITE_CR0),
4068 	[x86_intercept_smsw]		= POST_EX(SVM_EXIT_READ_CR0),
4069 	[x86_intercept_dr_read]		= POST_EX(SVM_EXIT_READ_DR0),
4070 	[x86_intercept_dr_write]	= POST_EX(SVM_EXIT_WRITE_DR0),
4071 	[x86_intercept_sldt]		= POST_EX(SVM_EXIT_LDTR_READ),
4072 	[x86_intercept_str]		= POST_EX(SVM_EXIT_TR_READ),
4073 	[x86_intercept_lldt]		= POST_EX(SVM_EXIT_LDTR_WRITE),
4074 	[x86_intercept_ltr]		= POST_EX(SVM_EXIT_TR_WRITE),
4075 	[x86_intercept_sgdt]		= POST_EX(SVM_EXIT_GDTR_READ),
4076 	[x86_intercept_sidt]		= POST_EX(SVM_EXIT_IDTR_READ),
4077 	[x86_intercept_lgdt]		= POST_EX(SVM_EXIT_GDTR_WRITE),
4078 	[x86_intercept_lidt]		= POST_EX(SVM_EXIT_IDTR_WRITE),
4079 	[x86_intercept_vmrun]		= POST_EX(SVM_EXIT_VMRUN),
4080 	[x86_intercept_vmmcall]		= POST_EX(SVM_EXIT_VMMCALL),
4081 	[x86_intercept_vmload]		= POST_EX(SVM_EXIT_VMLOAD),
4082 	[x86_intercept_vmsave]		= POST_EX(SVM_EXIT_VMSAVE),
4083 	[x86_intercept_stgi]		= POST_EX(SVM_EXIT_STGI),
4084 	[x86_intercept_clgi]		= POST_EX(SVM_EXIT_CLGI),
4085 	[x86_intercept_skinit]		= POST_EX(SVM_EXIT_SKINIT),
4086 	[x86_intercept_invlpga]		= POST_EX(SVM_EXIT_INVLPGA),
4087 	[x86_intercept_rdtscp]		= POST_EX(SVM_EXIT_RDTSCP),
4088 	[x86_intercept_monitor]		= POST_MEM(SVM_EXIT_MONITOR),
4089 	[x86_intercept_mwait]		= POST_EX(SVM_EXIT_MWAIT),
4090 	[x86_intercept_invlpg]		= POST_EX(SVM_EXIT_INVLPG),
4091 	[x86_intercept_invd]		= POST_EX(SVM_EXIT_INVD),
4092 	[x86_intercept_wbinvd]		= POST_EX(SVM_EXIT_WBINVD),
4093 	[x86_intercept_wrmsr]		= POST_EX(SVM_EXIT_MSR),
4094 	[x86_intercept_rdtsc]		= POST_EX(SVM_EXIT_RDTSC),
4095 	[x86_intercept_rdmsr]		= POST_EX(SVM_EXIT_MSR),
4096 	[x86_intercept_rdpmc]		= POST_EX(SVM_EXIT_RDPMC),
4097 	[x86_intercept_cpuid]		= PRE_EX(SVM_EXIT_CPUID),
4098 	[x86_intercept_rsm]		= PRE_EX(SVM_EXIT_RSM),
4099 	[x86_intercept_pause]		= PRE_EX(SVM_EXIT_PAUSE),
4100 	[x86_intercept_pushf]		= PRE_EX(SVM_EXIT_PUSHF),
4101 	[x86_intercept_popf]		= PRE_EX(SVM_EXIT_POPF),
4102 	[x86_intercept_intn]		= PRE_EX(SVM_EXIT_SWINT),
4103 	[x86_intercept_iret]		= PRE_EX(SVM_EXIT_IRET),
4104 	[x86_intercept_icebp]		= PRE_EX(SVM_EXIT_ICEBP),
4105 	[x86_intercept_hlt]		= POST_EX(SVM_EXIT_HLT),
4106 	[x86_intercept_in]		= POST_EX(SVM_EXIT_IOIO),
4107 	[x86_intercept_ins]		= POST_EX(SVM_EXIT_IOIO),
4108 	[x86_intercept_out]		= POST_EX(SVM_EXIT_IOIO),
4109 	[x86_intercept_outs]		= POST_EX(SVM_EXIT_IOIO),
4110 };
4111 
4112 #undef PRE_EX
4113 #undef POST_EX
4114 #undef POST_MEM
4115 
4116 static int svm_check_intercept(struct kvm_vcpu *vcpu,
4117 			       struct x86_instruction_info *info,
4118 			       enum x86_intercept_stage stage)
4119 {
4120 	struct vcpu_svm *svm = to_svm(vcpu);
4121 	int vmexit, ret = X86EMUL_CONTINUE;
4122 	struct __x86_intercept icpt_info;
4123 	struct vmcb *vmcb = svm->vmcb;
4124 
4125 	if (info->intercept >= ARRAY_SIZE(x86_intercept_map))
4126 		goto out;
4127 
4128 	icpt_info = x86_intercept_map[info->intercept];
4129 
4130 	if (stage != icpt_info.stage)
4131 		goto out;
4132 
4133 	switch (icpt_info.exit_code) {
4134 	case SVM_EXIT_READ_CR0:
4135 		if (info->intercept == x86_intercept_cr_read)
4136 			icpt_info.exit_code += info->modrm_reg;
4137 		break;
4138 	case SVM_EXIT_WRITE_CR0: {
4139 		unsigned long cr0, val;
4140 		u64 intercept;
4141 
4142 		if (info->intercept == x86_intercept_cr_write)
4143 			icpt_info.exit_code += info->modrm_reg;
4144 
4145 		if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0)
4146 			break;
4147 
4148 		intercept = svm->nested.intercept;
4149 
4150 		if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
4151 			break;
4152 
4153 		cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
4154 		val = info->src_val  & ~SVM_CR0_SELECTIVE_MASK;
4155 
4156 		if (info->intercept == x86_intercept_lmsw) {
4157 			cr0 &= 0xfUL;
4158 			val &= 0xfUL;
4159 			/* lmsw can't clear PE - catch this here */
4160 			if (cr0 & X86_CR0_PE)
4161 				val |= X86_CR0_PE;
4162 		}
4163 
4164 		if (cr0 ^ val)
4165 			icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
4166 
4167 		break;
4168 	}
4169 	case SVM_EXIT_READ_DR0:
4170 	case SVM_EXIT_WRITE_DR0:
4171 		icpt_info.exit_code += info->modrm_reg;
4172 		break;
4173 	case SVM_EXIT_MSR:
4174 		if (info->intercept == x86_intercept_wrmsr)
4175 			vmcb->control.exit_info_1 = 1;
4176 		else
4177 			vmcb->control.exit_info_1 = 0;
4178 		break;
4179 	case SVM_EXIT_PAUSE:
4180 		/*
4181 		 * We get this for NOP only, but pause
4182 		 * is rep not, check this here
4183 		 */
4184 		if (info->rep_prefix != REPE_PREFIX)
4185 			goto out;
4186 	case SVM_EXIT_IOIO: {
4187 		u64 exit_info;
4188 		u32 bytes;
4189 
4190 		exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16;
4191 
4192 		if (info->intercept == x86_intercept_in ||
4193 		    info->intercept == x86_intercept_ins) {
4194 			exit_info |= SVM_IOIO_TYPE_MASK;
4195 			bytes = info->src_bytes;
4196 		} else {
4197 			bytes = info->dst_bytes;
4198 		}
4199 
4200 		if (info->intercept == x86_intercept_outs ||
4201 		    info->intercept == x86_intercept_ins)
4202 			exit_info |= SVM_IOIO_STR_MASK;
4203 
4204 		if (info->rep_prefix)
4205 			exit_info |= SVM_IOIO_REP_MASK;
4206 
4207 		bytes = min(bytes, 4u);
4208 
4209 		exit_info |= bytes << SVM_IOIO_SIZE_SHIFT;
4210 
4211 		exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1);
4212 
4213 		vmcb->control.exit_info_1 = exit_info;
4214 		vmcb->control.exit_info_2 = info->next_rip;
4215 
4216 		break;
4217 	}
4218 	default:
4219 		break;
4220 	}
4221 
4222 	vmcb->control.next_rip  = info->next_rip;
4223 	vmcb->control.exit_code = icpt_info.exit_code;
4224 	vmexit = nested_svm_exit_handled(svm);
4225 
4226 	ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
4227 					   : X86EMUL_CONTINUE;
4228 
4229 out:
4230 	return ret;
4231 }
4232 
4233 static struct kvm_x86_ops svm_x86_ops = {
4234 	.cpu_has_kvm_support = has_svm,
4235 	.disabled_by_bios = is_disabled,
4236 	.hardware_setup = svm_hardware_setup,
4237 	.hardware_unsetup = svm_hardware_unsetup,
4238 	.check_processor_compatibility = svm_check_processor_compat,
4239 	.hardware_enable = svm_hardware_enable,
4240 	.hardware_disable = svm_hardware_disable,
4241 	.cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
4242 
4243 	.vcpu_create = svm_create_vcpu,
4244 	.vcpu_free = svm_free_vcpu,
4245 	.vcpu_reset = svm_vcpu_reset,
4246 
4247 	.prepare_guest_switch = svm_prepare_guest_switch,
4248 	.vcpu_load = svm_vcpu_load,
4249 	.vcpu_put = svm_vcpu_put,
4250 
4251 	.set_guest_debug = svm_guest_debug,
4252 	.get_msr = svm_get_msr,
4253 	.set_msr = svm_set_msr,
4254 	.get_segment_base = svm_get_segment_base,
4255 	.get_segment = svm_get_segment,
4256 	.set_segment = svm_set_segment,
4257 	.get_cpl = svm_get_cpl,
4258 	.get_cs_db_l_bits = kvm_get_cs_db_l_bits,
4259 	.decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
4260 	.decache_cr3 = svm_decache_cr3,
4261 	.decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
4262 	.set_cr0 = svm_set_cr0,
4263 	.set_cr3 = svm_set_cr3,
4264 	.set_cr4 = svm_set_cr4,
4265 	.set_efer = svm_set_efer,
4266 	.get_idt = svm_get_idt,
4267 	.set_idt = svm_set_idt,
4268 	.get_gdt = svm_get_gdt,
4269 	.set_gdt = svm_set_gdt,
4270 	.set_dr7 = svm_set_dr7,
4271 	.cache_reg = svm_cache_reg,
4272 	.get_rflags = svm_get_rflags,
4273 	.set_rflags = svm_set_rflags,
4274 	.fpu_activate = svm_fpu_activate,
4275 	.fpu_deactivate = svm_fpu_deactivate,
4276 
4277 	.tlb_flush = svm_flush_tlb,
4278 
4279 	.run = svm_vcpu_run,
4280 	.handle_exit = handle_exit,
4281 	.skip_emulated_instruction = skip_emulated_instruction,
4282 	.set_interrupt_shadow = svm_set_interrupt_shadow,
4283 	.get_interrupt_shadow = svm_get_interrupt_shadow,
4284 	.patch_hypercall = svm_patch_hypercall,
4285 	.set_irq = svm_set_irq,
4286 	.set_nmi = svm_inject_nmi,
4287 	.queue_exception = svm_queue_exception,
4288 	.cancel_injection = svm_cancel_injection,
4289 	.interrupt_allowed = svm_interrupt_allowed,
4290 	.nmi_allowed = svm_nmi_allowed,
4291 	.get_nmi_mask = svm_get_nmi_mask,
4292 	.set_nmi_mask = svm_set_nmi_mask,
4293 	.enable_nmi_window = enable_nmi_window,
4294 	.enable_irq_window = enable_irq_window,
4295 	.update_cr8_intercept = update_cr8_intercept,
4296 
4297 	.set_tss_addr = svm_set_tss_addr,
4298 	.get_tdp_level = get_npt_level,
4299 	.get_mt_mask = svm_get_mt_mask,
4300 
4301 	.get_exit_info = svm_get_exit_info,
4302 
4303 	.get_lpage_level = svm_get_lpage_level,
4304 
4305 	.cpuid_update = svm_cpuid_update,
4306 
4307 	.rdtscp_supported = svm_rdtscp_supported,
4308 
4309 	.set_supported_cpuid = svm_set_supported_cpuid,
4310 
4311 	.has_wbinvd_exit = svm_has_wbinvd_exit,
4312 
4313 	.set_tsc_khz = svm_set_tsc_khz,
4314 	.write_tsc_offset = svm_write_tsc_offset,
4315 	.adjust_tsc_offset = svm_adjust_tsc_offset,
4316 	.compute_tsc_offset = svm_compute_tsc_offset,
4317 	.read_l1_tsc = svm_read_l1_tsc,
4318 
4319 	.set_tdp_cr3 = set_tdp_cr3,
4320 
4321 	.check_intercept = svm_check_intercept,
4322 };
4323 
4324 static int __init svm_init(void)
4325 {
4326 	return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
4327 			__alignof__(struct vcpu_svm), THIS_MODULE);
4328 }
4329 
4330 static void __exit svm_exit(void)
4331 {
4332 	kvm_exit();
4333 }
4334 
4335 module_init(svm_init)
4336 module_exit(svm_exit)
4337