1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Kernel-based Virtual Machine driver for Linux
4  *
5  * AMD SVM support
6  *
7  * Copyright (C) 2006 Qumranet, Inc.
8  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
9  *
10  * Authors:
11  *   Yaniv Kamay  <yaniv@qumranet.com>
12  *   Avi Kivity   <avi@qumranet.com>
13  */
14 
15 #define pr_fmt(fmt) "SVM: " fmt
16 
17 #include <linux/kvm_types.h>
18 #include <linux/hashtable.h>
19 #include <linux/amd-iommu.h>
20 #include <linux/kvm_host.h>
21 
22 #include <asm/irq_remapping.h>
23 
24 #include "trace.h"
25 #include "lapic.h"
26 #include "x86.h"
27 #include "irq.h"
28 #include "svm.h"
29 
30 /* AVIC GATAG is encoded using VM and VCPU IDs */
31 #define AVIC_VCPU_ID_BITS		8
32 #define AVIC_VCPU_ID_MASK		((1 << AVIC_VCPU_ID_BITS) - 1)
33 
34 #define AVIC_VM_ID_BITS			24
35 #define AVIC_VM_ID_NR			(1 << AVIC_VM_ID_BITS)
36 #define AVIC_VM_ID_MASK			((1 << AVIC_VM_ID_BITS) - 1)
37 
38 #define AVIC_GATAG(x, y)		(((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
39 						(y & AVIC_VCPU_ID_MASK))
40 #define AVIC_GATAG_TO_VMID(x)		((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
41 #define AVIC_GATAG_TO_VCPUID(x)		(x & AVIC_VCPU_ID_MASK)
42 
43 static bool force_avic;
44 module_param_unsafe(force_avic, bool, 0444);
45 
46 /* Note:
47  * This hash table is used to map VM_ID to a struct kvm_svm,
48  * when handling AMD IOMMU GALOG notification to schedule in
49  * a particular vCPU.
50  */
51 #define SVM_VM_DATA_HASH_BITS	8
52 static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
53 static u32 next_vm_id = 0;
54 static bool next_vm_id_wrapped = 0;
55 static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
56 enum avic_modes avic_mode;
57 
58 /*
59  * This is a wrapper of struct amd_iommu_ir_data.
60  */
61 struct amd_svm_iommu_ir {
62 	struct list_head node;	/* Used by SVM for per-vcpu ir_list */
63 	void *data;		/* Storing pointer to struct amd_ir_data */
64 };
65 
avic_activate_vmcb(struct vcpu_svm * svm)66 static void avic_activate_vmcb(struct vcpu_svm *svm)
67 {
68 	struct vmcb *vmcb = svm->vmcb01.ptr;
69 
70 	vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK);
71 	vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK;
72 
73 	vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
74 
75 	/* Note:
76 	 * KVM can support hybrid-AVIC mode, where KVM emulates x2APIC
77 	 * MSR accesses, while interrupt injection to a running vCPU
78 	 * can be achieved using AVIC doorbell. The AVIC hardware still
79 	 * accelerate MMIO accesses, but this does not cause any harm
80 	 * as the guest is not supposed to access xAPIC mmio when uses x2APIC.
81 	 */
82 	if (apic_x2apic_mode(svm->vcpu.arch.apic) &&
83 	    avic_mode == AVIC_MODE_X2) {
84 		vmcb->control.int_ctl |= X2APIC_MODE_MASK;
85 		vmcb->control.avic_physical_id |= X2AVIC_MAX_PHYSICAL_ID;
86 		/* Disabling MSR intercept for x2APIC registers */
87 		svm_set_x2apic_msr_interception(svm, false);
88 	} else {
89 		/* For xAVIC and hybrid-xAVIC modes */
90 		vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID;
91 		/* Enabling MSR intercept for x2APIC registers */
92 		svm_set_x2apic_msr_interception(svm, true);
93 	}
94 }
95 
avic_deactivate_vmcb(struct vcpu_svm * svm)96 static void avic_deactivate_vmcb(struct vcpu_svm *svm)
97 {
98 	struct vmcb *vmcb = svm->vmcb01.ptr;
99 
100 	vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK);
101 	vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK;
102 
103 	/*
104 	 * If running nested and the guest uses its own MSR bitmap, there
105 	 * is no need to update L0's msr bitmap
106 	 */
107 	if (is_guest_mode(&svm->vcpu) &&
108 	    vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT))
109 		return;
110 
111 	/* Enabling MSR intercept for x2APIC registers */
112 	svm_set_x2apic_msr_interception(svm, true);
113 }
114 
115 /* Note:
116  * This function is called from IOMMU driver to notify
117  * SVM to schedule in a particular vCPU of a particular VM.
118  */
avic_ga_log_notifier(u32 ga_tag)119 int avic_ga_log_notifier(u32 ga_tag)
120 {
121 	unsigned long flags;
122 	struct kvm_svm *kvm_svm;
123 	struct kvm_vcpu *vcpu = NULL;
124 	u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
125 	u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
126 
127 	pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
128 	trace_kvm_avic_ga_log(vm_id, vcpu_id);
129 
130 	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
131 	hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
132 		if (kvm_svm->avic_vm_id != vm_id)
133 			continue;
134 		vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id);
135 		break;
136 	}
137 	spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
138 
139 	/* Note:
140 	 * At this point, the IOMMU should have already set the pending
141 	 * bit in the vAPIC backing page. So, we just need to schedule
142 	 * in the vcpu.
143 	 */
144 	if (vcpu)
145 		kvm_vcpu_wake_up(vcpu);
146 
147 	return 0;
148 }
149 
avic_vm_destroy(struct kvm * kvm)150 void avic_vm_destroy(struct kvm *kvm)
151 {
152 	unsigned long flags;
153 	struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
154 
155 	if (!enable_apicv)
156 		return;
157 
158 	if (kvm_svm->avic_logical_id_table_page)
159 		__free_page(kvm_svm->avic_logical_id_table_page);
160 	if (kvm_svm->avic_physical_id_table_page)
161 		__free_page(kvm_svm->avic_physical_id_table_page);
162 
163 	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
164 	hash_del(&kvm_svm->hnode);
165 	spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
166 }
167 
avic_vm_init(struct kvm * kvm)168 int avic_vm_init(struct kvm *kvm)
169 {
170 	unsigned long flags;
171 	int err = -ENOMEM;
172 	struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
173 	struct kvm_svm *k2;
174 	struct page *p_page;
175 	struct page *l_page;
176 	u32 vm_id;
177 
178 	if (!enable_apicv)
179 		return 0;
180 
181 	/* Allocating physical APIC ID table (4KB) */
182 	p_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
183 	if (!p_page)
184 		goto free_avic;
185 
186 	kvm_svm->avic_physical_id_table_page = p_page;
187 
188 	/* Allocating logical APIC ID table (4KB) */
189 	l_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
190 	if (!l_page)
191 		goto free_avic;
192 
193 	kvm_svm->avic_logical_id_table_page = l_page;
194 
195 	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
196  again:
197 	vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
198 	if (vm_id == 0) { /* id is 1-based, zero is not okay */
199 		next_vm_id_wrapped = 1;
200 		goto again;
201 	}
202 	/* Is it still in use? Only possible if wrapped at least once */
203 	if (next_vm_id_wrapped) {
204 		hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) {
205 			if (k2->avic_vm_id == vm_id)
206 				goto again;
207 		}
208 	}
209 	kvm_svm->avic_vm_id = vm_id;
210 	hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id);
211 	spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
212 
213 	return 0;
214 
215 free_avic:
216 	avic_vm_destroy(kvm);
217 	return err;
218 }
219 
avic_init_vmcb(struct vcpu_svm * svm,struct vmcb * vmcb)220 void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb)
221 {
222 	struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
223 	phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
224 	phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
225 	phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page));
226 
227 	vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
228 	vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
229 	vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
230 	vmcb->control.avic_vapic_bar = APIC_DEFAULT_PHYS_BASE & VMCB_AVIC_APIC_BAR_MASK;
231 
232 	if (kvm_apicv_activated(svm->vcpu.kvm))
233 		avic_activate_vmcb(svm);
234 	else
235 		avic_deactivate_vmcb(svm);
236 }
237 
avic_get_physical_id_entry(struct kvm_vcpu * vcpu,unsigned int index)238 static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
239 				       unsigned int index)
240 {
241 	u64 *avic_physical_id_table;
242 	struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
243 
244 	if ((avic_mode == AVIC_MODE_X1 && index > AVIC_MAX_PHYSICAL_ID) ||
245 	    (avic_mode == AVIC_MODE_X2 && index > X2AVIC_MAX_PHYSICAL_ID))
246 		return NULL;
247 
248 	avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
249 
250 	return &avic_physical_id_table[index];
251 }
252 
253 /*
254  * Note:
255  * AVIC hardware walks the nested page table to check permissions,
256  * but does not use the SPA address specified in the leaf page
257  * table entry since it uses  address in the AVIC_BACKING_PAGE pointer
258  * field of the VMCB. Therefore, we set up the
259  * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
260  */
avic_alloc_access_page(struct kvm * kvm)261 static int avic_alloc_access_page(struct kvm *kvm)
262 {
263 	void __user *ret;
264 	int r = 0;
265 
266 	mutex_lock(&kvm->slots_lock);
267 
268 	if (kvm->arch.apic_access_memslot_enabled)
269 		goto out;
270 
271 	ret = __x86_set_memory_region(kvm,
272 				      APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
273 				      APIC_DEFAULT_PHYS_BASE,
274 				      PAGE_SIZE);
275 	if (IS_ERR(ret)) {
276 		r = PTR_ERR(ret);
277 		goto out;
278 	}
279 
280 	kvm->arch.apic_access_memslot_enabled = true;
281 out:
282 	mutex_unlock(&kvm->slots_lock);
283 	return r;
284 }
285 
avic_init_backing_page(struct kvm_vcpu * vcpu)286 static int avic_init_backing_page(struct kvm_vcpu *vcpu)
287 {
288 	u64 *entry, new_entry;
289 	int id = vcpu->vcpu_id;
290 	struct vcpu_svm *svm = to_svm(vcpu);
291 
292 	if ((avic_mode == AVIC_MODE_X1 && id > AVIC_MAX_PHYSICAL_ID) ||
293 	    (avic_mode == AVIC_MODE_X2 && id > X2AVIC_MAX_PHYSICAL_ID))
294 		return -EINVAL;
295 
296 	if (!vcpu->arch.apic->regs)
297 		return -EINVAL;
298 
299 	if (kvm_apicv_activated(vcpu->kvm)) {
300 		int ret;
301 
302 		ret = avic_alloc_access_page(vcpu->kvm);
303 		if (ret)
304 			return ret;
305 	}
306 
307 	svm->avic_backing_page = virt_to_page(vcpu->arch.apic->regs);
308 
309 	/* Setting AVIC backing page address in the phy APIC ID table */
310 	entry = avic_get_physical_id_entry(vcpu, id);
311 	if (!entry)
312 		return -EINVAL;
313 
314 	new_entry = __sme_set((page_to_phys(svm->avic_backing_page) &
315 			      AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
316 			      AVIC_PHYSICAL_ID_ENTRY_VALID_MASK);
317 	WRITE_ONCE(*entry, new_entry);
318 
319 	svm->avic_physical_id_cache = entry;
320 
321 	return 0;
322 }
323 
avic_ring_doorbell(struct kvm_vcpu * vcpu)324 void avic_ring_doorbell(struct kvm_vcpu *vcpu)
325 {
326 	/*
327 	 * Note, the vCPU could get migrated to a different pCPU at any point,
328 	 * which could result in signalling the wrong/previous pCPU.  But if
329 	 * that happens the vCPU is guaranteed to do a VMRUN (after being
330 	 * migrated) and thus will process pending interrupts, i.e. a doorbell
331 	 * is not needed (and the spurious one is harmless).
332 	 */
333 	int cpu = READ_ONCE(vcpu->cpu);
334 
335 	if (cpu != get_cpu()) {
336 		wrmsrl(MSR_AMD64_SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu));
337 		trace_kvm_avic_doorbell(vcpu->vcpu_id, kvm_cpu_get_apicid(cpu));
338 	}
339 	put_cpu();
340 }
341 
342 /*
343  * A fast-path version of avic_kick_target_vcpus(), which attempts to match
344  * destination APIC ID to vCPU without looping through all vCPUs.
345  */
avic_kick_target_vcpus_fast(struct kvm * kvm,struct kvm_lapic * source,u32 icrl,u32 icrh,u32 index)346 static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source,
347 				       u32 icrl, u32 icrh, u32 index)
348 {
349 	u32 l1_physical_id, dest;
350 	struct kvm_vcpu *target_vcpu;
351 	int dest_mode = icrl & APIC_DEST_MASK;
352 	int shorthand = icrl & APIC_SHORT_MASK;
353 	struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
354 
355 	if (shorthand != APIC_DEST_NOSHORT)
356 		return -EINVAL;
357 
358 	if (apic_x2apic_mode(source))
359 		dest = icrh;
360 	else
361 		dest = GET_XAPIC_DEST_FIELD(icrh);
362 
363 	if (dest_mode == APIC_DEST_PHYSICAL) {
364 		/* broadcast destination, use slow path */
365 		if (apic_x2apic_mode(source) && dest == X2APIC_BROADCAST)
366 			return -EINVAL;
367 		if (!apic_x2apic_mode(source) && dest == APIC_BROADCAST)
368 			return -EINVAL;
369 
370 		l1_physical_id = dest;
371 
372 		if (WARN_ON_ONCE(l1_physical_id != index))
373 			return -EINVAL;
374 
375 	} else {
376 		u32 bitmap, cluster;
377 		int logid_index;
378 
379 		if (apic_x2apic_mode(source)) {
380 			/* 16 bit dest mask, 16 bit cluster id */
381 			bitmap = dest & 0xFFFF0000;
382 			cluster = (dest >> 16) << 4;
383 		} else if (kvm_lapic_get_reg(source, APIC_DFR) == APIC_DFR_FLAT) {
384 			/* 8 bit dest mask*/
385 			bitmap = dest;
386 			cluster = 0;
387 		} else {
388 			/* 4 bit desk mask, 4 bit cluster id */
389 			bitmap = dest & 0xF;
390 			cluster = (dest >> 4) << 2;
391 		}
392 
393 		if (unlikely(!bitmap))
394 			/* guest bug: nobody to send the logical interrupt to */
395 			return 0;
396 
397 		if (!is_power_of_2(bitmap))
398 			/* multiple logical destinations, use slow path */
399 			return -EINVAL;
400 
401 		logid_index = cluster + __ffs(bitmap);
402 
403 		if (!apic_x2apic_mode(source)) {
404 			u32 *avic_logical_id_table =
405 				page_address(kvm_svm->avic_logical_id_table_page);
406 
407 			u32 logid_entry = avic_logical_id_table[logid_index];
408 
409 			if (WARN_ON_ONCE(index != logid_index))
410 				return -EINVAL;
411 
412 			/* guest bug: non existing/reserved logical destination */
413 			if (unlikely(!(logid_entry & AVIC_LOGICAL_ID_ENTRY_VALID_MASK)))
414 				return 0;
415 
416 			l1_physical_id = logid_entry &
417 					 AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
418 		} else {
419 			/*
420 			 * For x2APIC logical mode, cannot leverage the index.
421 			 * Instead, calculate physical ID from logical ID in ICRH.
422 			 */
423 			int cluster = (icrh & 0xffff0000) >> 16;
424 			int apic = ffs(icrh & 0xffff) - 1;
425 
426 			/*
427 			 * If the x2APIC logical ID sub-field (i.e. icrh[15:0])
428 			 * contains anything but a single bit, we cannot use the
429 			 * fast path, because it is limited to a single vCPU.
430 			 */
431 			if (apic < 0 || icrh != (1 << apic))
432 				return -EINVAL;
433 
434 			l1_physical_id = (cluster << 4) + apic;
435 		}
436 	}
437 
438 	target_vcpu = kvm_get_vcpu_by_id(kvm, l1_physical_id);
439 	if (unlikely(!target_vcpu))
440 		/* guest bug: non existing vCPU is a target of this IPI*/
441 		return 0;
442 
443 	target_vcpu->arch.apic->irr_pending = true;
444 	svm_complete_interrupt_delivery(target_vcpu,
445 					icrl & APIC_MODE_MASK,
446 					icrl & APIC_INT_LEVELTRIG,
447 					icrl & APIC_VECTOR_MASK);
448 	return 0;
449 }
450 
avic_kick_target_vcpus(struct kvm * kvm,struct kvm_lapic * source,u32 icrl,u32 icrh,u32 index)451 static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
452 				   u32 icrl, u32 icrh, u32 index)
453 {
454 	unsigned long i;
455 	struct kvm_vcpu *vcpu;
456 
457 	if (!avic_kick_target_vcpus_fast(kvm, source, icrl, icrh, index))
458 		return;
459 
460 	trace_kvm_avic_kick_vcpu_slowpath(icrh, icrl, index);
461 
462 	/*
463 	 * Wake any target vCPUs that are blocking, i.e. waiting for a wake
464 	 * event.  There's no need to signal doorbells, as hardware has handled
465 	 * vCPUs that were in guest at the time of the IPI, and vCPUs that have
466 	 * since entered the guest will have processed pending IRQs at VMRUN.
467 	 */
468 	kvm_for_each_vcpu(i, vcpu, kvm) {
469 		u32 dest;
470 
471 		if (apic_x2apic_mode(vcpu->arch.apic))
472 			dest = icrh;
473 		else
474 			dest = GET_XAPIC_DEST_FIELD(icrh);
475 
476 		if (kvm_apic_match_dest(vcpu, source, icrl & APIC_SHORT_MASK,
477 					dest, icrl & APIC_DEST_MASK)) {
478 			vcpu->arch.apic->irr_pending = true;
479 			svm_complete_interrupt_delivery(vcpu,
480 							icrl & APIC_MODE_MASK,
481 							icrl & APIC_INT_LEVELTRIG,
482 							icrl & APIC_VECTOR_MASK);
483 		}
484 	}
485 }
486 
avic_incomplete_ipi_interception(struct kvm_vcpu * vcpu)487 int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
488 {
489 	struct vcpu_svm *svm = to_svm(vcpu);
490 	u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
491 	u32 icrl = svm->vmcb->control.exit_info_1;
492 	u32 id = svm->vmcb->control.exit_info_2 >> 32;
493 	u32 index = svm->vmcb->control.exit_info_2 & 0x1FF;
494 	struct kvm_lapic *apic = vcpu->arch.apic;
495 
496 	trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index);
497 
498 	switch (id) {
499 	case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
500 		/*
501 		 * Emulate IPIs that are not handled by AVIC hardware, which
502 		 * only virtualizes Fixed, Edge-Triggered INTRs.  The exit is
503 		 * a trap, e.g. ICR holds the correct value and RIP has been
504 		 * advanced, KVM is responsible only for emulating the IPI.
505 		 * Sadly, hardware may sometimes leave the BUSY flag set, in
506 		 * which case KVM needs to emulate the ICR write as well in
507 		 * order to clear the BUSY flag.
508 		 */
509 		if (icrl & APIC_ICR_BUSY)
510 			kvm_apic_write_nodecode(vcpu, APIC_ICR);
511 		else
512 			kvm_apic_send_ipi(apic, icrl, icrh);
513 		break;
514 	case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING:
515 		/*
516 		 * At this point, we expect that the AVIC HW has already
517 		 * set the appropriate IRR bits on the valid target
518 		 * vcpus. So, we just need to kick the appropriate vcpu.
519 		 */
520 		avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh, index);
521 		break;
522 	case AVIC_IPI_FAILURE_INVALID_TARGET:
523 		break;
524 	case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
525 		WARN_ONCE(1, "Invalid backing page\n");
526 		break;
527 	default:
528 		pr_err("Unknown IPI interception\n");
529 	}
530 
531 	return 1;
532 }
533 
avic_vcpu_get_apicv_inhibit_reasons(struct kvm_vcpu * vcpu)534 unsigned long avic_vcpu_get_apicv_inhibit_reasons(struct kvm_vcpu *vcpu)
535 {
536 	if (is_guest_mode(vcpu))
537 		return APICV_INHIBIT_REASON_NESTED;
538 	return 0;
539 }
540 
avic_get_logical_id_entry(struct kvm_vcpu * vcpu,u32 ldr,bool flat)541 static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
542 {
543 	struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
544 	int index;
545 	u32 *logical_apic_id_table;
546 	int dlid = GET_APIC_LOGICAL_ID(ldr);
547 
548 	if (!dlid)
549 		return NULL;
550 
551 	if (flat) { /* flat */
552 		index = ffs(dlid) - 1;
553 		if (index > 7)
554 			return NULL;
555 	} else { /* cluster */
556 		int cluster = (dlid & 0xf0) >> 4;
557 		int apic = ffs(dlid & 0x0f) - 1;
558 
559 		if ((apic < 0) || (apic > 7) ||
560 		    (cluster >= 0xf))
561 			return NULL;
562 		index = (cluster << 2) + apic;
563 	}
564 
565 	logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page);
566 
567 	return &logical_apic_id_table[index];
568 }
569 
avic_ldr_write(struct kvm_vcpu * vcpu,u8 g_physical_id,u32 ldr)570 static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr)
571 {
572 	bool flat;
573 	u32 *entry, new_entry;
574 
575 	flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
576 	entry = avic_get_logical_id_entry(vcpu, ldr, flat);
577 	if (!entry)
578 		return -EINVAL;
579 
580 	new_entry = READ_ONCE(*entry);
581 	new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
582 	new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
583 	new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
584 	WRITE_ONCE(*entry, new_entry);
585 
586 	return 0;
587 }
588 
avic_invalidate_logical_id_entry(struct kvm_vcpu * vcpu)589 static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu)
590 {
591 	struct vcpu_svm *svm = to_svm(vcpu);
592 	bool flat = svm->dfr_reg == APIC_DFR_FLAT;
593 	u32 *entry;
594 
595 	/* Note: x2AVIC does not use logical APIC ID table */
596 	if (apic_x2apic_mode(vcpu->arch.apic))
597 		return;
598 
599 	entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat);
600 	if (entry)
601 		clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry);
602 }
603 
avic_handle_ldr_update(struct kvm_vcpu * vcpu)604 static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
605 {
606 	int ret = 0;
607 	struct vcpu_svm *svm = to_svm(vcpu);
608 	u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
609 	u32 id = kvm_xapic_id(vcpu->arch.apic);
610 
611 	/* AVIC does not support LDR update for x2APIC */
612 	if (apic_x2apic_mode(vcpu->arch.apic))
613 		return 0;
614 
615 	if (ldr == svm->ldr_reg)
616 		return 0;
617 
618 	avic_invalidate_logical_id_entry(vcpu);
619 
620 	if (ldr)
621 		ret = avic_ldr_write(vcpu, id, ldr);
622 
623 	if (!ret)
624 		svm->ldr_reg = ldr;
625 
626 	return ret;
627 }
628 
avic_handle_dfr_update(struct kvm_vcpu * vcpu)629 static void avic_handle_dfr_update(struct kvm_vcpu *vcpu)
630 {
631 	struct vcpu_svm *svm = to_svm(vcpu);
632 	u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
633 
634 	if (svm->dfr_reg == dfr)
635 		return;
636 
637 	avic_invalidate_logical_id_entry(vcpu);
638 	svm->dfr_reg = dfr;
639 }
640 
avic_unaccel_trap_write(struct kvm_vcpu * vcpu)641 static int avic_unaccel_trap_write(struct kvm_vcpu *vcpu)
642 {
643 	u32 offset = to_svm(vcpu)->vmcb->control.exit_info_1 &
644 				AVIC_UNACCEL_ACCESS_OFFSET_MASK;
645 
646 	switch (offset) {
647 	case APIC_LDR:
648 		if (avic_handle_ldr_update(vcpu))
649 			return 0;
650 		break;
651 	case APIC_DFR:
652 		avic_handle_dfr_update(vcpu);
653 		break;
654 	default:
655 		break;
656 	}
657 
658 	kvm_apic_write_nodecode(vcpu, offset);
659 	return 1;
660 }
661 
is_avic_unaccelerated_access_trap(u32 offset)662 static bool is_avic_unaccelerated_access_trap(u32 offset)
663 {
664 	bool ret = false;
665 
666 	switch (offset) {
667 	case APIC_ID:
668 	case APIC_EOI:
669 	case APIC_RRR:
670 	case APIC_LDR:
671 	case APIC_DFR:
672 	case APIC_SPIV:
673 	case APIC_ESR:
674 	case APIC_ICR:
675 	case APIC_LVTT:
676 	case APIC_LVTTHMR:
677 	case APIC_LVTPC:
678 	case APIC_LVT0:
679 	case APIC_LVT1:
680 	case APIC_LVTERR:
681 	case APIC_TMICT:
682 	case APIC_TDCR:
683 		ret = true;
684 		break;
685 	default:
686 		break;
687 	}
688 	return ret;
689 }
690 
avic_unaccelerated_access_interception(struct kvm_vcpu * vcpu)691 int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu)
692 {
693 	struct vcpu_svm *svm = to_svm(vcpu);
694 	int ret = 0;
695 	u32 offset = svm->vmcb->control.exit_info_1 &
696 		     AVIC_UNACCEL_ACCESS_OFFSET_MASK;
697 	u32 vector = svm->vmcb->control.exit_info_2 &
698 		     AVIC_UNACCEL_ACCESS_VECTOR_MASK;
699 	bool write = (svm->vmcb->control.exit_info_1 >> 32) &
700 		     AVIC_UNACCEL_ACCESS_WRITE_MASK;
701 	bool trap = is_avic_unaccelerated_access_trap(offset);
702 
703 	trace_kvm_avic_unaccelerated_access(vcpu->vcpu_id, offset,
704 					    trap, write, vector);
705 	if (trap) {
706 		/* Handling Trap */
707 		WARN_ONCE(!write, "svm: Handling trap read.\n");
708 		ret = avic_unaccel_trap_write(vcpu);
709 	} else {
710 		/* Handling Fault */
711 		ret = kvm_emulate_instruction(vcpu, 0);
712 	}
713 
714 	return ret;
715 }
716 
avic_init_vcpu(struct vcpu_svm * svm)717 int avic_init_vcpu(struct vcpu_svm *svm)
718 {
719 	int ret;
720 	struct kvm_vcpu *vcpu = &svm->vcpu;
721 
722 	if (!enable_apicv || !irqchip_in_kernel(vcpu->kvm))
723 		return 0;
724 
725 	ret = avic_init_backing_page(vcpu);
726 	if (ret)
727 		return ret;
728 
729 	INIT_LIST_HEAD(&svm->ir_list);
730 	spin_lock_init(&svm->ir_list_lock);
731 	svm->dfr_reg = APIC_DFR_FLAT;
732 
733 	return ret;
734 }
735 
avic_apicv_post_state_restore(struct kvm_vcpu * vcpu)736 void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu)
737 {
738 	avic_handle_dfr_update(vcpu);
739 	avic_handle_ldr_update(vcpu);
740 }
741 
avic_set_virtual_apic_mode(struct kvm_vcpu * vcpu)742 void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
743 {
744 	if (!lapic_in_kernel(vcpu) || avic_mode == AVIC_MODE_NONE)
745 		return;
746 
747 	if (kvm_get_apic_mode(vcpu) == LAPIC_MODE_INVALID) {
748 		WARN_ONCE(true, "Invalid local APIC state (vcpu_id=%d)", vcpu->vcpu_id);
749 		return;
750 	}
751 	avic_refresh_apicv_exec_ctrl(vcpu);
752 }
753 
avic_set_pi_irte_mode(struct kvm_vcpu * vcpu,bool activate)754 static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
755 {
756 	int ret = 0;
757 	unsigned long flags;
758 	struct amd_svm_iommu_ir *ir;
759 	struct vcpu_svm *svm = to_svm(vcpu);
760 
761 	if (!kvm_arch_has_assigned_device(vcpu->kvm))
762 		return 0;
763 
764 	/*
765 	 * Here, we go through the per-vcpu ir_list to update all existing
766 	 * interrupt remapping table entry targeting this vcpu.
767 	 */
768 	spin_lock_irqsave(&svm->ir_list_lock, flags);
769 
770 	if (list_empty(&svm->ir_list))
771 		goto out;
772 
773 	list_for_each_entry(ir, &svm->ir_list, node) {
774 		if (activate)
775 			ret = amd_iommu_activate_guest_mode(ir->data);
776 		else
777 			ret = amd_iommu_deactivate_guest_mode(ir->data);
778 		if (ret)
779 			break;
780 	}
781 out:
782 	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
783 	return ret;
784 }
785 
svm_ir_list_del(struct vcpu_svm * svm,struct amd_iommu_pi_data * pi)786 static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
787 {
788 	unsigned long flags;
789 	struct amd_svm_iommu_ir *cur;
790 
791 	spin_lock_irqsave(&svm->ir_list_lock, flags);
792 	list_for_each_entry(cur, &svm->ir_list, node) {
793 		if (cur->data != pi->ir_data)
794 			continue;
795 		list_del(&cur->node);
796 		kfree(cur);
797 		break;
798 	}
799 	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
800 }
801 
svm_ir_list_add(struct vcpu_svm * svm,struct amd_iommu_pi_data * pi)802 static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
803 {
804 	int ret = 0;
805 	unsigned long flags;
806 	struct amd_svm_iommu_ir *ir;
807 
808 	/**
809 	 * In some cases, the existing irte is updated and re-set,
810 	 * so we need to check here if it's already been * added
811 	 * to the ir_list.
812 	 */
813 	if (pi->ir_data && (pi->prev_ga_tag != 0)) {
814 		struct kvm *kvm = svm->vcpu.kvm;
815 		u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
816 		struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
817 		struct vcpu_svm *prev_svm;
818 
819 		if (!prev_vcpu) {
820 			ret = -EINVAL;
821 			goto out;
822 		}
823 
824 		prev_svm = to_svm(prev_vcpu);
825 		svm_ir_list_del(prev_svm, pi);
826 	}
827 
828 	/**
829 	 * Allocating new amd_iommu_pi_data, which will get
830 	 * add to the per-vcpu ir_list.
831 	 */
832 	ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT);
833 	if (!ir) {
834 		ret = -ENOMEM;
835 		goto out;
836 	}
837 	ir->data = pi->ir_data;
838 
839 	spin_lock_irqsave(&svm->ir_list_lock, flags);
840 	list_add(&ir->node, &svm->ir_list);
841 	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
842 out:
843 	return ret;
844 }
845 
846 /*
847  * Note:
848  * The HW cannot support posting multicast/broadcast
849  * interrupts to a vCPU. So, we still use legacy interrupt
850  * remapping for these kind of interrupts.
851  *
852  * For lowest-priority interrupts, we only support
853  * those with single CPU as the destination, e.g. user
854  * configures the interrupts via /proc/irq or uses
855  * irqbalance to make the interrupts single-CPU.
856  */
857 static int
get_pi_vcpu_info(struct kvm * kvm,struct kvm_kernel_irq_routing_entry * e,struct vcpu_data * vcpu_info,struct vcpu_svm ** svm)858 get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
859 		 struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
860 {
861 	struct kvm_lapic_irq irq;
862 	struct kvm_vcpu *vcpu = NULL;
863 
864 	kvm_set_msi_irq(kvm, e, &irq);
865 
866 	if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
867 	    !kvm_irq_is_postable(&irq)) {
868 		pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
869 			 __func__, irq.vector);
870 		return -1;
871 	}
872 
873 	pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
874 		 irq.vector);
875 	*svm = to_svm(vcpu);
876 	vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page));
877 	vcpu_info->vector = irq.vector;
878 
879 	return 0;
880 }
881 
882 /*
883  * avic_pi_update_irte - set IRTE for Posted-Interrupts
884  *
885  * @kvm: kvm
886  * @host_irq: host irq of the interrupt
887  * @guest_irq: gsi of the interrupt
888  * @set: set or unset PI
889  * returns 0 on success, < 0 on failure
890  */
avic_pi_update_irte(struct kvm * kvm,unsigned int host_irq,uint32_t guest_irq,bool set)891 int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
892 			uint32_t guest_irq, bool set)
893 {
894 	struct kvm_kernel_irq_routing_entry *e;
895 	struct kvm_irq_routing_table *irq_rt;
896 	int idx, ret = 0;
897 
898 	if (!kvm_arch_has_assigned_device(kvm) ||
899 	    !irq_remapping_cap(IRQ_POSTING_CAP))
900 		return 0;
901 
902 	pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
903 		 __func__, host_irq, guest_irq, set);
904 
905 	idx = srcu_read_lock(&kvm->irq_srcu);
906 	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
907 
908 	if (guest_irq >= irq_rt->nr_rt_entries ||
909 		hlist_empty(&irq_rt->map[guest_irq])) {
910 		pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
911 			     guest_irq, irq_rt->nr_rt_entries);
912 		goto out;
913 	}
914 
915 	hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
916 		struct vcpu_data vcpu_info;
917 		struct vcpu_svm *svm = NULL;
918 
919 		if (e->type != KVM_IRQ_ROUTING_MSI)
920 			continue;
921 
922 		/**
923 		 * Here, we setup with legacy mode in the following cases:
924 		 * 1. When cannot target interrupt to a specific vcpu.
925 		 * 2. Unsetting posted interrupt.
926 		 * 3. APIC virtualization is disabled for the vcpu.
927 		 * 4. IRQ has incompatible delivery mode (SMI, INIT, etc)
928 		 */
929 		if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
930 		    kvm_vcpu_apicv_active(&svm->vcpu)) {
931 			struct amd_iommu_pi_data pi;
932 
933 			/* Try to enable guest_mode in IRTE */
934 			pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
935 					    AVIC_HPA_MASK);
936 			pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id,
937 						     svm->vcpu.vcpu_id);
938 			pi.is_guest_mode = true;
939 			pi.vcpu_data = &vcpu_info;
940 			ret = irq_set_vcpu_affinity(host_irq, &pi);
941 
942 			/**
943 			 * Here, we successfully setting up vcpu affinity in
944 			 * IOMMU guest mode. Now, we need to store the posted
945 			 * interrupt information in a per-vcpu ir_list so that
946 			 * we can reference to them directly when we update vcpu
947 			 * scheduling information in IOMMU irte.
948 			 */
949 			if (!ret && pi.is_guest_mode)
950 				svm_ir_list_add(svm, &pi);
951 		} else {
952 			/* Use legacy mode in IRTE */
953 			struct amd_iommu_pi_data pi;
954 
955 			/**
956 			 * Here, pi is used to:
957 			 * - Tell IOMMU to use legacy mode for this interrupt.
958 			 * - Retrieve ga_tag of prior interrupt remapping data.
959 			 */
960 			pi.prev_ga_tag = 0;
961 			pi.is_guest_mode = false;
962 			ret = irq_set_vcpu_affinity(host_irq, &pi);
963 
964 			/**
965 			 * Check if the posted interrupt was previously
966 			 * setup with the guest_mode by checking if the ga_tag
967 			 * was cached. If so, we need to clean up the per-vcpu
968 			 * ir_list.
969 			 */
970 			if (!ret && pi.prev_ga_tag) {
971 				int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
972 				struct kvm_vcpu *vcpu;
973 
974 				vcpu = kvm_get_vcpu_by_id(kvm, id);
975 				if (vcpu)
976 					svm_ir_list_del(to_svm(vcpu), &pi);
977 			}
978 		}
979 
980 		if (!ret && svm) {
981 			trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id,
982 						 e->gsi, vcpu_info.vector,
983 						 vcpu_info.pi_desc_addr, set);
984 		}
985 
986 		if (ret < 0) {
987 			pr_err("%s: failed to update PI IRTE\n", __func__);
988 			goto out;
989 		}
990 	}
991 
992 	ret = 0;
993 out:
994 	srcu_read_unlock(&kvm->irq_srcu, idx);
995 	return ret;
996 }
997 
avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)998 bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
999 {
1000 	ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
1001 			  BIT(APICV_INHIBIT_REASON_ABSENT) |
1002 			  BIT(APICV_INHIBIT_REASON_HYPERV) |
1003 			  BIT(APICV_INHIBIT_REASON_NESTED) |
1004 			  BIT(APICV_INHIBIT_REASON_IRQWIN) |
1005 			  BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
1006 			  BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
1007 			  BIT(APICV_INHIBIT_REASON_SEV)      |
1008 			  BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |
1009 			  BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
1010 
1011 	return supported & BIT(reason);
1012 }
1013 
1014 
1015 static inline int
avic_update_iommu_vcpu_affinity(struct kvm_vcpu * vcpu,int cpu,bool r)1016 avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
1017 {
1018 	int ret = 0;
1019 	unsigned long flags;
1020 	struct amd_svm_iommu_ir *ir;
1021 	struct vcpu_svm *svm = to_svm(vcpu);
1022 
1023 	if (!kvm_arch_has_assigned_device(vcpu->kvm))
1024 		return 0;
1025 
1026 	/*
1027 	 * Here, we go through the per-vcpu ir_list to update all existing
1028 	 * interrupt remapping table entry targeting this vcpu.
1029 	 */
1030 	spin_lock_irqsave(&svm->ir_list_lock, flags);
1031 
1032 	if (list_empty(&svm->ir_list))
1033 		goto out;
1034 
1035 	list_for_each_entry(ir, &svm->ir_list, node) {
1036 		ret = amd_iommu_update_ga(cpu, r, ir->data);
1037 		if (ret)
1038 			break;
1039 	}
1040 out:
1041 	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
1042 	return ret;
1043 }
1044 
avic_vcpu_load(struct kvm_vcpu * vcpu,int cpu)1045 void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1046 {
1047 	u64 entry;
1048 	int h_physical_id = kvm_cpu_get_apicid(cpu);
1049 	struct vcpu_svm *svm = to_svm(vcpu);
1050 
1051 	lockdep_assert_preemption_disabled();
1052 
1053 	if (WARN_ON(h_physical_id & ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
1054 		return;
1055 
1056 	/*
1057 	 * No need to update anything if the vCPU is blocking, i.e. if the vCPU
1058 	 * is being scheduled in after being preempted.  The CPU entries in the
1059 	 * Physical APIC table and IRTE are consumed iff IsRun{ning} is '1'.
1060 	 * If the vCPU was migrated, its new CPU value will be stuffed when the
1061 	 * vCPU unblocks.
1062 	 */
1063 	if (kvm_vcpu_is_blocking(vcpu))
1064 		return;
1065 
1066 	entry = READ_ONCE(*(svm->avic_physical_id_cache));
1067 
1068 	entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
1069 	entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
1070 	entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
1071 
1072 	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
1073 	avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
1074 }
1075 
avic_vcpu_put(struct kvm_vcpu * vcpu)1076 void avic_vcpu_put(struct kvm_vcpu *vcpu)
1077 {
1078 	u64 entry;
1079 	struct vcpu_svm *svm = to_svm(vcpu);
1080 
1081 	lockdep_assert_preemption_disabled();
1082 
1083 	entry = READ_ONCE(*(svm->avic_physical_id_cache));
1084 
1085 	/* Nothing to do if IsRunning == '0' due to vCPU blocking. */
1086 	if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
1087 		return;
1088 
1089 	avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
1090 
1091 	entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
1092 	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
1093 }
1094 
1095 
avic_refresh_apicv_exec_ctrl(struct kvm_vcpu * vcpu)1096 void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
1097 {
1098 	struct vcpu_svm *svm = to_svm(vcpu);
1099 	struct vmcb *vmcb = svm->vmcb01.ptr;
1100 	bool activated = kvm_vcpu_apicv_active(vcpu);
1101 
1102 	if (!enable_apicv)
1103 		return;
1104 
1105 	if (activated) {
1106 		/**
1107 		 * During AVIC temporary deactivation, guest could update
1108 		 * APIC ID, DFR and LDR registers, which would not be trapped
1109 		 * by avic_unaccelerated_access_interception(). In this case,
1110 		 * we need to check and update the AVIC logical APIC ID table
1111 		 * accordingly before re-activating.
1112 		 */
1113 		avic_apicv_post_state_restore(vcpu);
1114 		avic_activate_vmcb(svm);
1115 	} else {
1116 		avic_deactivate_vmcb(svm);
1117 	}
1118 	vmcb_mark_dirty(vmcb, VMCB_AVIC);
1119 
1120 	if (activated)
1121 		avic_vcpu_load(vcpu, vcpu->cpu);
1122 	else
1123 		avic_vcpu_put(vcpu);
1124 
1125 	avic_set_pi_irte_mode(vcpu, activated);
1126 }
1127 
avic_vcpu_blocking(struct kvm_vcpu * vcpu)1128 void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
1129 {
1130 	if (!kvm_vcpu_apicv_active(vcpu))
1131 		return;
1132 
1133        /*
1134         * Unload the AVIC when the vCPU is about to block, _before_
1135         * the vCPU actually blocks.
1136         *
1137         * Any IRQs that arrive before IsRunning=0 will not cause an
1138         * incomplete IPI vmexit on the source, therefore vIRR will also
1139         * be checked by kvm_vcpu_check_block() before blocking.  The
1140         * memory barrier implicit in set_current_state orders writing
1141         * IsRunning=0 before reading the vIRR.  The processor needs a
1142         * matching memory barrier on interrupt delivery between writing
1143         * IRR and reading IsRunning; the lack of this barrier might be
1144         * the cause of errata #1235).
1145         */
1146 	avic_vcpu_put(vcpu);
1147 }
1148 
avic_vcpu_unblocking(struct kvm_vcpu * vcpu)1149 void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
1150 {
1151 	if (!kvm_vcpu_apicv_active(vcpu))
1152 		return;
1153 
1154 	avic_vcpu_load(vcpu, vcpu->cpu);
1155 }
1156 
1157 /*
1158  * Note:
1159  * - The module param avic enable both xAPIC and x2APIC mode.
1160  * - Hypervisor can support both xAVIC and x2AVIC in the same guest.
1161  * - The mode can be switched at run-time.
1162  */
avic_hardware_setup(struct kvm_x86_ops * x86_ops)1163 bool avic_hardware_setup(struct kvm_x86_ops *x86_ops)
1164 {
1165 	if (!npt_enabled)
1166 		return false;
1167 
1168 	if (boot_cpu_has(X86_FEATURE_AVIC)) {
1169 		avic_mode = AVIC_MODE_X1;
1170 		pr_info("AVIC enabled\n");
1171 	} else if (force_avic) {
1172 		/*
1173 		 * Some older systems does not advertise AVIC support.
1174 		 * See Revision Guide for specific AMD processor for more detail.
1175 		 */
1176 		avic_mode = AVIC_MODE_X1;
1177 		pr_warn("AVIC is not supported in CPUID but force enabled");
1178 		pr_warn("Your system might crash and burn");
1179 	}
1180 
1181 	/* AVIC is a prerequisite for x2AVIC. */
1182 	if (boot_cpu_has(X86_FEATURE_X2AVIC)) {
1183 		if (avic_mode == AVIC_MODE_X1) {
1184 			avic_mode = AVIC_MODE_X2;
1185 			pr_info("x2AVIC enabled\n");
1186 		} else {
1187 			pr_warn(FW_BUG "Cannot support x2AVIC due to AVIC is disabled");
1188 			pr_warn(FW_BUG "Try enable AVIC using force_avic option");
1189 		}
1190 	}
1191 
1192 	if (avic_mode != AVIC_MODE_NONE)
1193 		amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
1194 
1195 	return !!avic_mode;
1196 }
1197