1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * hosting IBM Z kernel virtual machines (s390x)
4 *
5 * Copyright IBM Corp. 2008, 2020
6 *
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Christian Ehrhardt <ehrhardt@de.ibm.com>
10 * Jason J. Herne <jjherne@us.ibm.com>
11 */
12
13 #define KMSG_COMPONENT "kvm-s390"
14 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
15
16 #include <linux/compiler.h>
17 #include <linux/err.h>
18 #include <linux/fs.h>
19 #include <linux/hrtimer.h>
20 #include <linux/init.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/mman.h>
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/pgtable.h>
34 #include <linux/mmu_notifier.h>
35
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include <asm/fpu/api.h>
49 #include "kvm-s390.h"
50 #include "gaccess.h"
51 #include "pci.h"
52
53 #define CREATE_TRACE_POINTS
54 #include "trace.h"
55 #include "trace-s390.h"
56
57 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
58 #define LOCAL_IRQS 32
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60 (KVM_MAX_VCPUS + LOCAL_IRQS))
61
62 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
63 KVM_GENERIC_VM_STATS(),
64 STATS_DESC_COUNTER(VM, inject_io),
65 STATS_DESC_COUNTER(VM, inject_float_mchk),
66 STATS_DESC_COUNTER(VM, inject_pfault_done),
67 STATS_DESC_COUNTER(VM, inject_service_signal),
68 STATS_DESC_COUNTER(VM, inject_virtio),
69 STATS_DESC_COUNTER(VM, aen_forward)
70 };
71
72 const struct kvm_stats_header kvm_vm_stats_header = {
73 .name_size = KVM_STATS_NAME_SIZE,
74 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
75 .id_offset = sizeof(struct kvm_stats_header),
76 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
77 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
78 sizeof(kvm_vm_stats_desc),
79 };
80
81 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
82 KVM_GENERIC_VCPU_STATS(),
83 STATS_DESC_COUNTER(VCPU, exit_userspace),
84 STATS_DESC_COUNTER(VCPU, exit_null),
85 STATS_DESC_COUNTER(VCPU, exit_external_request),
86 STATS_DESC_COUNTER(VCPU, exit_io_request),
87 STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
88 STATS_DESC_COUNTER(VCPU, exit_stop_request),
89 STATS_DESC_COUNTER(VCPU, exit_validity),
90 STATS_DESC_COUNTER(VCPU, exit_instruction),
91 STATS_DESC_COUNTER(VCPU, exit_pei),
92 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
93 STATS_DESC_COUNTER(VCPU, instruction_lctl),
94 STATS_DESC_COUNTER(VCPU, instruction_lctlg),
95 STATS_DESC_COUNTER(VCPU, instruction_stctl),
96 STATS_DESC_COUNTER(VCPU, instruction_stctg),
97 STATS_DESC_COUNTER(VCPU, exit_program_interruption),
98 STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
99 STATS_DESC_COUNTER(VCPU, exit_operation_exception),
100 STATS_DESC_COUNTER(VCPU, deliver_ckc),
101 STATS_DESC_COUNTER(VCPU, deliver_cputm),
102 STATS_DESC_COUNTER(VCPU, deliver_external_call),
103 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
104 STATS_DESC_COUNTER(VCPU, deliver_service_signal),
105 STATS_DESC_COUNTER(VCPU, deliver_virtio),
106 STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
107 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
108 STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
109 STATS_DESC_COUNTER(VCPU, deliver_program),
110 STATS_DESC_COUNTER(VCPU, deliver_io),
111 STATS_DESC_COUNTER(VCPU, deliver_machine_check),
112 STATS_DESC_COUNTER(VCPU, exit_wait_state),
113 STATS_DESC_COUNTER(VCPU, inject_ckc),
114 STATS_DESC_COUNTER(VCPU, inject_cputm),
115 STATS_DESC_COUNTER(VCPU, inject_external_call),
116 STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
117 STATS_DESC_COUNTER(VCPU, inject_mchk),
118 STATS_DESC_COUNTER(VCPU, inject_pfault_init),
119 STATS_DESC_COUNTER(VCPU, inject_program),
120 STATS_DESC_COUNTER(VCPU, inject_restart),
121 STATS_DESC_COUNTER(VCPU, inject_set_prefix),
122 STATS_DESC_COUNTER(VCPU, inject_stop_signal),
123 STATS_DESC_COUNTER(VCPU, instruction_epsw),
124 STATS_DESC_COUNTER(VCPU, instruction_gs),
125 STATS_DESC_COUNTER(VCPU, instruction_io_other),
126 STATS_DESC_COUNTER(VCPU, instruction_lpsw),
127 STATS_DESC_COUNTER(VCPU, instruction_lpswe),
128 STATS_DESC_COUNTER(VCPU, instruction_pfmf),
129 STATS_DESC_COUNTER(VCPU, instruction_ptff),
130 STATS_DESC_COUNTER(VCPU, instruction_sck),
131 STATS_DESC_COUNTER(VCPU, instruction_sckpf),
132 STATS_DESC_COUNTER(VCPU, instruction_stidp),
133 STATS_DESC_COUNTER(VCPU, instruction_spx),
134 STATS_DESC_COUNTER(VCPU, instruction_stpx),
135 STATS_DESC_COUNTER(VCPU, instruction_stap),
136 STATS_DESC_COUNTER(VCPU, instruction_iske),
137 STATS_DESC_COUNTER(VCPU, instruction_ri),
138 STATS_DESC_COUNTER(VCPU, instruction_rrbe),
139 STATS_DESC_COUNTER(VCPU, instruction_sske),
140 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
141 STATS_DESC_COUNTER(VCPU, instruction_stsi),
142 STATS_DESC_COUNTER(VCPU, instruction_stfl),
143 STATS_DESC_COUNTER(VCPU, instruction_tb),
144 STATS_DESC_COUNTER(VCPU, instruction_tpi),
145 STATS_DESC_COUNTER(VCPU, instruction_tprot),
146 STATS_DESC_COUNTER(VCPU, instruction_tsch),
147 STATS_DESC_COUNTER(VCPU, instruction_sie),
148 STATS_DESC_COUNTER(VCPU, instruction_essa),
149 STATS_DESC_COUNTER(VCPU, instruction_sthyi),
150 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
151 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
152 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
153 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
154 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
155 STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
156 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
157 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
158 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
159 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
160 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
161 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
162 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
163 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
164 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
165 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
166 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
167 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
168 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
169 STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
170 STATS_DESC_COUNTER(VCPU, diag_9c_forward),
171 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
172 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
173 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
174 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
175 STATS_DESC_COUNTER(VCPU, pfault_sync)
176 };
177
178 const struct kvm_stats_header kvm_vcpu_stats_header = {
179 .name_size = KVM_STATS_NAME_SIZE,
180 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
181 .id_offset = sizeof(struct kvm_stats_header),
182 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
183 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
184 sizeof(kvm_vcpu_stats_desc),
185 };
186
187 /* allow nested virtualization in KVM (if enabled by user space) */
188 static int nested;
189 module_param(nested, int, S_IRUGO);
190 MODULE_PARM_DESC(nested, "Nested virtualization support");
191
192 /* allow 1m huge page guest backing, if !nested */
193 static int hpage;
194 module_param(hpage, int, 0444);
195 MODULE_PARM_DESC(hpage, "1m huge page backing support");
196
197 /* maximum percentage of steal time for polling. >100 is treated like 100 */
198 static u8 halt_poll_max_steal = 10;
199 module_param(halt_poll_max_steal, byte, 0644);
200 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
201
202 /* if set to true, the GISA will be initialized and used if available */
203 static bool use_gisa = true;
204 module_param(use_gisa, bool, 0644);
205 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
206
207 /* maximum diag9c forwarding per second */
208 unsigned int diag9c_forwarding_hz;
209 module_param(diag9c_forwarding_hz, uint, 0644);
210 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
211
212 /*
213 * For now we handle at most 16 double words as this is what the s390 base
214 * kernel handles and stores in the prefix page. If we ever need to go beyond
215 * this, this requires changes to code, but the external uapi can stay.
216 */
217 #define SIZE_INTERNAL 16
218
219 /*
220 * Base feature mask that defines default mask for facilities. Consists of the
221 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
222 */
223 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
224 /*
225 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
226 * and defines the facilities that can be enabled via a cpu model.
227 */
228 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
229
kvm_s390_fac_size(void)230 static unsigned long kvm_s390_fac_size(void)
231 {
232 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
233 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
234 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
235 sizeof(stfle_fac_list));
236
237 return SIZE_INTERNAL;
238 }
239
240 /* available cpu features supported by kvm */
241 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
242 /* available subfunctions indicated via query / "test bit" */
243 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
244
245 static struct gmap_notifier gmap_notifier;
246 static struct gmap_notifier vsie_gmap_notifier;
247 debug_info_t *kvm_s390_dbf;
248 debug_info_t *kvm_s390_dbf_uv;
249
250 /* Section: not file related */
kvm_arch_hardware_enable(void)251 int kvm_arch_hardware_enable(void)
252 {
253 /* every s390 is virtualization enabled ;-) */
254 return 0;
255 }
256
kvm_arch_check_processor_compat(void * opaque)257 int kvm_arch_check_processor_compat(void *opaque)
258 {
259 return 0;
260 }
261
262 /* forward declarations */
263 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
264 unsigned long end);
265 static int sca_switch_to_extended(struct kvm *kvm);
266
kvm_clock_sync_scb(struct kvm_s390_sie_block * scb,u64 delta)267 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
268 {
269 u8 delta_idx = 0;
270
271 /*
272 * The TOD jumps by delta, we have to compensate this by adding
273 * -delta to the epoch.
274 */
275 delta = -delta;
276
277 /* sign-extension - we're adding to signed values below */
278 if ((s64)delta < 0)
279 delta_idx = -1;
280
281 scb->epoch += delta;
282 if (scb->ecd & ECD_MEF) {
283 scb->epdx += delta_idx;
284 if (scb->epoch < delta)
285 scb->epdx += 1;
286 }
287 }
288
289 /*
290 * This callback is executed during stop_machine(). All CPUs are therefore
291 * temporarily stopped. In order not to change guest behavior, we have to
292 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
293 * so a CPU won't be stopped while calculating with the epoch.
294 */
kvm_clock_sync(struct notifier_block * notifier,unsigned long val,void * v)295 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
296 void *v)
297 {
298 struct kvm *kvm;
299 struct kvm_vcpu *vcpu;
300 unsigned long i;
301 unsigned long long *delta = v;
302
303 list_for_each_entry(kvm, &vm_list, vm_list) {
304 kvm_for_each_vcpu(i, vcpu, kvm) {
305 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
306 if (i == 0) {
307 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
308 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
309 }
310 if (vcpu->arch.cputm_enabled)
311 vcpu->arch.cputm_start += *delta;
312 if (vcpu->arch.vsie_block)
313 kvm_clock_sync_scb(vcpu->arch.vsie_block,
314 *delta);
315 }
316 }
317 return NOTIFY_OK;
318 }
319
320 static struct notifier_block kvm_clock_notifier = {
321 .notifier_call = kvm_clock_sync,
322 };
323
kvm_arch_hardware_setup(void * opaque)324 int kvm_arch_hardware_setup(void *opaque)
325 {
326 gmap_notifier.notifier_call = kvm_gmap_notifier;
327 gmap_register_pte_notifier(&gmap_notifier);
328 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
329 gmap_register_pte_notifier(&vsie_gmap_notifier);
330 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
331 &kvm_clock_notifier);
332 return 0;
333 }
334
kvm_arch_hardware_unsetup(void)335 void kvm_arch_hardware_unsetup(void)
336 {
337 gmap_unregister_pte_notifier(&gmap_notifier);
338 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
339 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
340 &kvm_clock_notifier);
341 }
342
allow_cpu_feat(unsigned long nr)343 static void allow_cpu_feat(unsigned long nr)
344 {
345 set_bit_inv(nr, kvm_s390_available_cpu_feat);
346 }
347
plo_test_bit(unsigned char nr)348 static inline int plo_test_bit(unsigned char nr)
349 {
350 unsigned long function = (unsigned long)nr | 0x100;
351 int cc;
352
353 asm volatile(
354 " lgr 0,%[function]\n"
355 /* Parameter registers are ignored for "test bit" */
356 " plo 0,0,0,0(0)\n"
357 " ipm %0\n"
358 " srl %0,28\n"
359 : "=d" (cc)
360 : [function] "d" (function)
361 : "cc", "0");
362 return cc == 0;
363 }
364
__insn32_query(unsigned int opcode,u8 * query)365 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
366 {
367 asm volatile(
368 " lghi 0,0\n"
369 " lgr 1,%[query]\n"
370 /* Parameter registers are ignored */
371 " .insn rrf,%[opc] << 16,2,4,6,0\n"
372 :
373 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
374 : "cc", "memory", "0", "1");
375 }
376
377 #define INSN_SORTL 0xb938
378 #define INSN_DFLTCC 0xb939
379
kvm_s390_cpu_feat_init(void)380 static void kvm_s390_cpu_feat_init(void)
381 {
382 int i;
383
384 for (i = 0; i < 256; ++i) {
385 if (plo_test_bit(i))
386 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
387 }
388
389 if (test_facility(28)) /* TOD-clock steering */
390 ptff(kvm_s390_available_subfunc.ptff,
391 sizeof(kvm_s390_available_subfunc.ptff),
392 PTFF_QAF);
393
394 if (test_facility(17)) { /* MSA */
395 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
396 kvm_s390_available_subfunc.kmac);
397 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
398 kvm_s390_available_subfunc.kmc);
399 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
400 kvm_s390_available_subfunc.km);
401 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
402 kvm_s390_available_subfunc.kimd);
403 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
404 kvm_s390_available_subfunc.klmd);
405 }
406 if (test_facility(76)) /* MSA3 */
407 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
408 kvm_s390_available_subfunc.pckmo);
409 if (test_facility(77)) { /* MSA4 */
410 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
411 kvm_s390_available_subfunc.kmctr);
412 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
413 kvm_s390_available_subfunc.kmf);
414 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
415 kvm_s390_available_subfunc.kmo);
416 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
417 kvm_s390_available_subfunc.pcc);
418 }
419 if (test_facility(57)) /* MSA5 */
420 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
421 kvm_s390_available_subfunc.ppno);
422
423 if (test_facility(146)) /* MSA8 */
424 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
425 kvm_s390_available_subfunc.kma);
426
427 if (test_facility(155)) /* MSA9 */
428 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
429 kvm_s390_available_subfunc.kdsa);
430
431 if (test_facility(150)) /* SORTL */
432 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
433
434 if (test_facility(151)) /* DFLTCC */
435 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
436
437 if (MACHINE_HAS_ESOP)
438 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
439 /*
440 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
441 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
442 */
443 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
444 !test_facility(3) || !nested)
445 return;
446 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
447 if (sclp.has_64bscao)
448 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
449 if (sclp.has_siif)
450 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
451 if (sclp.has_gpere)
452 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
453 if (sclp.has_gsls)
454 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
455 if (sclp.has_ib)
456 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
457 if (sclp.has_cei)
458 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
459 if (sclp.has_ibs)
460 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
461 if (sclp.has_kss)
462 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
463 /*
464 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
465 * all skey handling functions read/set the skey from the PGSTE
466 * instead of the real storage key.
467 *
468 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
469 * pages being detected as preserved although they are resident.
470 *
471 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
472 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
473 *
474 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
475 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
476 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
477 *
478 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
479 * cannot easily shadow the SCA because of the ipte lock.
480 */
481 }
482
kvm_arch_init(void * opaque)483 int kvm_arch_init(void *opaque)
484 {
485 int rc = -ENOMEM;
486
487 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
488 if (!kvm_s390_dbf)
489 return -ENOMEM;
490
491 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
492 if (!kvm_s390_dbf_uv)
493 goto out;
494
495 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
496 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
497 goto out;
498
499 kvm_s390_cpu_feat_init();
500
501 /* Register floating interrupt controller interface. */
502 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
503 if (rc) {
504 pr_err("A FLIC registration call failed with rc=%d\n", rc);
505 goto out;
506 }
507
508 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
509 rc = kvm_s390_pci_init();
510 if (rc) {
511 pr_err("Unable to allocate AIFT for PCI\n");
512 goto out;
513 }
514 }
515
516 rc = kvm_s390_gib_init(GAL_ISC);
517 if (rc)
518 goto out;
519
520 return 0;
521
522 out:
523 kvm_arch_exit();
524 return rc;
525 }
526
kvm_arch_exit(void)527 void kvm_arch_exit(void)
528 {
529 kvm_s390_gib_destroy();
530 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
531 kvm_s390_pci_exit();
532 debug_unregister(kvm_s390_dbf);
533 debug_unregister(kvm_s390_dbf_uv);
534 }
535
536 /* Section: device related */
kvm_arch_dev_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)537 long kvm_arch_dev_ioctl(struct file *filp,
538 unsigned int ioctl, unsigned long arg)
539 {
540 if (ioctl == KVM_S390_ENABLE_SIE)
541 return s390_enable_sie();
542 return -EINVAL;
543 }
544
kvm_vm_ioctl_check_extension(struct kvm * kvm,long ext)545 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
546 {
547 int r;
548
549 switch (ext) {
550 case KVM_CAP_S390_PSW:
551 case KVM_CAP_S390_GMAP:
552 case KVM_CAP_SYNC_MMU:
553 #ifdef CONFIG_KVM_S390_UCONTROL
554 case KVM_CAP_S390_UCONTROL:
555 #endif
556 case KVM_CAP_ASYNC_PF:
557 case KVM_CAP_SYNC_REGS:
558 case KVM_CAP_ONE_REG:
559 case KVM_CAP_ENABLE_CAP:
560 case KVM_CAP_S390_CSS_SUPPORT:
561 case KVM_CAP_IOEVENTFD:
562 case KVM_CAP_DEVICE_CTRL:
563 case KVM_CAP_S390_IRQCHIP:
564 case KVM_CAP_VM_ATTRIBUTES:
565 case KVM_CAP_MP_STATE:
566 case KVM_CAP_IMMEDIATE_EXIT:
567 case KVM_CAP_S390_INJECT_IRQ:
568 case KVM_CAP_S390_USER_SIGP:
569 case KVM_CAP_S390_USER_STSI:
570 case KVM_CAP_S390_SKEYS:
571 case KVM_CAP_S390_IRQ_STATE:
572 case KVM_CAP_S390_USER_INSTR0:
573 case KVM_CAP_S390_CMMA_MIGRATION:
574 case KVM_CAP_S390_AIS:
575 case KVM_CAP_S390_AIS_MIGRATION:
576 case KVM_CAP_S390_VCPU_RESETS:
577 case KVM_CAP_SET_GUEST_DEBUG:
578 case KVM_CAP_S390_DIAG318:
579 case KVM_CAP_S390_MEM_OP_EXTENSION:
580 r = 1;
581 break;
582 case KVM_CAP_SET_GUEST_DEBUG2:
583 r = KVM_GUESTDBG_VALID_MASK;
584 break;
585 case KVM_CAP_S390_HPAGE_1M:
586 r = 0;
587 if (hpage && !kvm_is_ucontrol(kvm))
588 r = 1;
589 break;
590 case KVM_CAP_S390_MEM_OP:
591 r = MEM_OP_MAX_SIZE;
592 break;
593 case KVM_CAP_NR_VCPUS:
594 case KVM_CAP_MAX_VCPUS:
595 case KVM_CAP_MAX_VCPU_ID:
596 r = KVM_S390_BSCA_CPU_SLOTS;
597 if (!kvm_s390_use_sca_entries())
598 r = KVM_MAX_VCPUS;
599 else if (sclp.has_esca && sclp.has_64bscao)
600 r = KVM_S390_ESCA_CPU_SLOTS;
601 if (ext == KVM_CAP_NR_VCPUS)
602 r = min_t(unsigned int, num_online_cpus(), r);
603 break;
604 case KVM_CAP_S390_COW:
605 r = MACHINE_HAS_ESOP;
606 break;
607 case KVM_CAP_S390_VECTOR_REGISTERS:
608 r = MACHINE_HAS_VX;
609 break;
610 case KVM_CAP_S390_RI:
611 r = test_facility(64);
612 break;
613 case KVM_CAP_S390_GS:
614 r = test_facility(133);
615 break;
616 case KVM_CAP_S390_BPB:
617 r = test_facility(82);
618 break;
619 case KVM_CAP_S390_PROTECTED:
620 r = is_prot_virt_host();
621 break;
622 case KVM_CAP_S390_PROTECTED_DUMP: {
623 u64 pv_cmds_dump[] = {
624 BIT_UVC_CMD_DUMP_INIT,
625 BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE,
626 BIT_UVC_CMD_DUMP_CPU,
627 BIT_UVC_CMD_DUMP_COMPLETE,
628 };
629 int i;
630
631 r = is_prot_virt_host();
632
633 for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) {
634 if (!test_bit_inv(pv_cmds_dump[i],
635 (unsigned long *)&uv_info.inst_calls_list)) {
636 r = 0;
637 break;
638 }
639 }
640 break;
641 }
642 case KVM_CAP_S390_ZPCI_OP:
643 r = kvm_s390_pci_interp_allowed();
644 break;
645 case KVM_CAP_S390_CPU_TOPOLOGY:
646 r = test_facility(11);
647 break;
648 default:
649 r = 0;
650 }
651 return r;
652 }
653
kvm_arch_sync_dirty_log(struct kvm * kvm,struct kvm_memory_slot * memslot)654 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
655 {
656 int i;
657 gfn_t cur_gfn, last_gfn;
658 unsigned long gaddr, vmaddr;
659 struct gmap *gmap = kvm->arch.gmap;
660 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
661
662 /* Loop over all guest segments */
663 cur_gfn = memslot->base_gfn;
664 last_gfn = memslot->base_gfn + memslot->npages;
665 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
666 gaddr = gfn_to_gpa(cur_gfn);
667 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
668 if (kvm_is_error_hva(vmaddr))
669 continue;
670
671 bitmap_zero(bitmap, _PAGE_ENTRIES);
672 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
673 for (i = 0; i < _PAGE_ENTRIES; i++) {
674 if (test_bit(i, bitmap))
675 mark_page_dirty(kvm, cur_gfn + i);
676 }
677
678 if (fatal_signal_pending(current))
679 return;
680 cond_resched();
681 }
682 }
683
684 /* Section: vm related */
685 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
686
687 /*
688 * Get (and clear) the dirty memory log for a memory slot.
689 */
kvm_vm_ioctl_get_dirty_log(struct kvm * kvm,struct kvm_dirty_log * log)690 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
691 struct kvm_dirty_log *log)
692 {
693 int r;
694 unsigned long n;
695 struct kvm_memory_slot *memslot;
696 int is_dirty;
697
698 if (kvm_is_ucontrol(kvm))
699 return -EINVAL;
700
701 mutex_lock(&kvm->slots_lock);
702
703 r = -EINVAL;
704 if (log->slot >= KVM_USER_MEM_SLOTS)
705 goto out;
706
707 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
708 if (r)
709 goto out;
710
711 /* Clear the dirty log */
712 if (is_dirty) {
713 n = kvm_dirty_bitmap_bytes(memslot);
714 memset(memslot->dirty_bitmap, 0, n);
715 }
716 r = 0;
717 out:
718 mutex_unlock(&kvm->slots_lock);
719 return r;
720 }
721
icpt_operexc_on_all_vcpus(struct kvm * kvm)722 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
723 {
724 unsigned long i;
725 struct kvm_vcpu *vcpu;
726
727 kvm_for_each_vcpu(i, vcpu, kvm) {
728 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
729 }
730 }
731
kvm_vm_ioctl_enable_cap(struct kvm * kvm,struct kvm_enable_cap * cap)732 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
733 {
734 int r;
735
736 if (cap->flags)
737 return -EINVAL;
738
739 switch (cap->cap) {
740 case KVM_CAP_S390_IRQCHIP:
741 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
742 kvm->arch.use_irqchip = 1;
743 r = 0;
744 break;
745 case KVM_CAP_S390_USER_SIGP:
746 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
747 kvm->arch.user_sigp = 1;
748 r = 0;
749 break;
750 case KVM_CAP_S390_VECTOR_REGISTERS:
751 mutex_lock(&kvm->lock);
752 if (kvm->created_vcpus) {
753 r = -EBUSY;
754 } else if (MACHINE_HAS_VX) {
755 set_kvm_facility(kvm->arch.model.fac_mask, 129);
756 set_kvm_facility(kvm->arch.model.fac_list, 129);
757 if (test_facility(134)) {
758 set_kvm_facility(kvm->arch.model.fac_mask, 134);
759 set_kvm_facility(kvm->arch.model.fac_list, 134);
760 }
761 if (test_facility(135)) {
762 set_kvm_facility(kvm->arch.model.fac_mask, 135);
763 set_kvm_facility(kvm->arch.model.fac_list, 135);
764 }
765 if (test_facility(148)) {
766 set_kvm_facility(kvm->arch.model.fac_mask, 148);
767 set_kvm_facility(kvm->arch.model.fac_list, 148);
768 }
769 if (test_facility(152)) {
770 set_kvm_facility(kvm->arch.model.fac_mask, 152);
771 set_kvm_facility(kvm->arch.model.fac_list, 152);
772 }
773 if (test_facility(192)) {
774 set_kvm_facility(kvm->arch.model.fac_mask, 192);
775 set_kvm_facility(kvm->arch.model.fac_list, 192);
776 }
777 r = 0;
778 } else
779 r = -EINVAL;
780 mutex_unlock(&kvm->lock);
781 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
782 r ? "(not available)" : "(success)");
783 break;
784 case KVM_CAP_S390_RI:
785 r = -EINVAL;
786 mutex_lock(&kvm->lock);
787 if (kvm->created_vcpus) {
788 r = -EBUSY;
789 } else if (test_facility(64)) {
790 set_kvm_facility(kvm->arch.model.fac_mask, 64);
791 set_kvm_facility(kvm->arch.model.fac_list, 64);
792 r = 0;
793 }
794 mutex_unlock(&kvm->lock);
795 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
796 r ? "(not available)" : "(success)");
797 break;
798 case KVM_CAP_S390_AIS:
799 mutex_lock(&kvm->lock);
800 if (kvm->created_vcpus) {
801 r = -EBUSY;
802 } else {
803 set_kvm_facility(kvm->arch.model.fac_mask, 72);
804 set_kvm_facility(kvm->arch.model.fac_list, 72);
805 r = 0;
806 }
807 mutex_unlock(&kvm->lock);
808 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
809 r ? "(not available)" : "(success)");
810 break;
811 case KVM_CAP_S390_GS:
812 r = -EINVAL;
813 mutex_lock(&kvm->lock);
814 if (kvm->created_vcpus) {
815 r = -EBUSY;
816 } else if (test_facility(133)) {
817 set_kvm_facility(kvm->arch.model.fac_mask, 133);
818 set_kvm_facility(kvm->arch.model.fac_list, 133);
819 r = 0;
820 }
821 mutex_unlock(&kvm->lock);
822 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
823 r ? "(not available)" : "(success)");
824 break;
825 case KVM_CAP_S390_HPAGE_1M:
826 mutex_lock(&kvm->lock);
827 if (kvm->created_vcpus)
828 r = -EBUSY;
829 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
830 r = -EINVAL;
831 else {
832 r = 0;
833 mmap_write_lock(kvm->mm);
834 kvm->mm->context.allow_gmap_hpage_1m = 1;
835 mmap_write_unlock(kvm->mm);
836 /*
837 * We might have to create fake 4k page
838 * tables. To avoid that the hardware works on
839 * stale PGSTEs, we emulate these instructions.
840 */
841 kvm->arch.use_skf = 0;
842 kvm->arch.use_pfmfi = 0;
843 }
844 mutex_unlock(&kvm->lock);
845 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
846 r ? "(not available)" : "(success)");
847 break;
848 case KVM_CAP_S390_USER_STSI:
849 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
850 kvm->arch.user_stsi = 1;
851 r = 0;
852 break;
853 case KVM_CAP_S390_USER_INSTR0:
854 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
855 kvm->arch.user_instr0 = 1;
856 icpt_operexc_on_all_vcpus(kvm);
857 r = 0;
858 break;
859 case KVM_CAP_S390_CPU_TOPOLOGY:
860 r = -EINVAL;
861 mutex_lock(&kvm->lock);
862 if (kvm->created_vcpus) {
863 r = -EBUSY;
864 } else if (test_facility(11)) {
865 set_kvm_facility(kvm->arch.model.fac_mask, 11);
866 set_kvm_facility(kvm->arch.model.fac_list, 11);
867 r = 0;
868 }
869 mutex_unlock(&kvm->lock);
870 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
871 r ? "(not available)" : "(success)");
872 break;
873 default:
874 r = -EINVAL;
875 break;
876 }
877 return r;
878 }
879
kvm_s390_get_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)880 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
881 {
882 int ret;
883
884 switch (attr->attr) {
885 case KVM_S390_VM_MEM_LIMIT_SIZE:
886 ret = 0;
887 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
888 kvm->arch.mem_limit);
889 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
890 ret = -EFAULT;
891 break;
892 default:
893 ret = -ENXIO;
894 break;
895 }
896 return ret;
897 }
898
kvm_s390_set_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)899 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
900 {
901 int ret;
902 unsigned int idx;
903 switch (attr->attr) {
904 case KVM_S390_VM_MEM_ENABLE_CMMA:
905 ret = -ENXIO;
906 if (!sclp.has_cmma)
907 break;
908
909 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
910 mutex_lock(&kvm->lock);
911 if (kvm->created_vcpus)
912 ret = -EBUSY;
913 else if (kvm->mm->context.allow_gmap_hpage_1m)
914 ret = -EINVAL;
915 else {
916 kvm->arch.use_cmma = 1;
917 /* Not compatible with cmma. */
918 kvm->arch.use_pfmfi = 0;
919 ret = 0;
920 }
921 mutex_unlock(&kvm->lock);
922 break;
923 case KVM_S390_VM_MEM_CLR_CMMA:
924 ret = -ENXIO;
925 if (!sclp.has_cmma)
926 break;
927 ret = -EINVAL;
928 if (!kvm->arch.use_cmma)
929 break;
930
931 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
932 mutex_lock(&kvm->lock);
933 idx = srcu_read_lock(&kvm->srcu);
934 s390_reset_cmma(kvm->arch.gmap->mm);
935 srcu_read_unlock(&kvm->srcu, idx);
936 mutex_unlock(&kvm->lock);
937 ret = 0;
938 break;
939 case KVM_S390_VM_MEM_LIMIT_SIZE: {
940 unsigned long new_limit;
941
942 if (kvm_is_ucontrol(kvm))
943 return -EINVAL;
944
945 if (get_user(new_limit, (u64 __user *)attr->addr))
946 return -EFAULT;
947
948 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
949 new_limit > kvm->arch.mem_limit)
950 return -E2BIG;
951
952 if (!new_limit)
953 return -EINVAL;
954
955 /* gmap_create takes last usable address */
956 if (new_limit != KVM_S390_NO_MEM_LIMIT)
957 new_limit -= 1;
958
959 ret = -EBUSY;
960 mutex_lock(&kvm->lock);
961 if (!kvm->created_vcpus) {
962 /* gmap_create will round the limit up */
963 struct gmap *new = gmap_create(current->mm, new_limit);
964
965 if (!new) {
966 ret = -ENOMEM;
967 } else {
968 gmap_remove(kvm->arch.gmap);
969 new->private = kvm;
970 kvm->arch.gmap = new;
971 ret = 0;
972 }
973 }
974 mutex_unlock(&kvm->lock);
975 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
976 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
977 (void *) kvm->arch.gmap->asce);
978 break;
979 }
980 default:
981 ret = -ENXIO;
982 break;
983 }
984 return ret;
985 }
986
987 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
988
kvm_s390_vcpu_crypto_reset_all(struct kvm * kvm)989 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
990 {
991 struct kvm_vcpu *vcpu;
992 unsigned long i;
993
994 kvm_s390_vcpu_block_all(kvm);
995
996 kvm_for_each_vcpu(i, vcpu, kvm) {
997 kvm_s390_vcpu_crypto_setup(vcpu);
998 /* recreate the shadow crycb by leaving the VSIE handler */
999 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1000 }
1001
1002 kvm_s390_vcpu_unblock_all(kvm);
1003 }
1004
kvm_s390_vm_set_crypto(struct kvm * kvm,struct kvm_device_attr * attr)1005 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
1006 {
1007 mutex_lock(&kvm->lock);
1008 switch (attr->attr) {
1009 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1010 if (!test_kvm_facility(kvm, 76)) {
1011 mutex_unlock(&kvm->lock);
1012 return -EINVAL;
1013 }
1014 get_random_bytes(
1015 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1016 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1017 kvm->arch.crypto.aes_kw = 1;
1018 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
1019 break;
1020 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1021 if (!test_kvm_facility(kvm, 76)) {
1022 mutex_unlock(&kvm->lock);
1023 return -EINVAL;
1024 }
1025 get_random_bytes(
1026 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1027 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1028 kvm->arch.crypto.dea_kw = 1;
1029 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
1030 break;
1031 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1032 if (!test_kvm_facility(kvm, 76)) {
1033 mutex_unlock(&kvm->lock);
1034 return -EINVAL;
1035 }
1036 kvm->arch.crypto.aes_kw = 0;
1037 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
1038 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1039 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
1040 break;
1041 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1042 if (!test_kvm_facility(kvm, 76)) {
1043 mutex_unlock(&kvm->lock);
1044 return -EINVAL;
1045 }
1046 kvm->arch.crypto.dea_kw = 0;
1047 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
1048 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1049 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
1050 break;
1051 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1052 if (!ap_instructions_available()) {
1053 mutex_unlock(&kvm->lock);
1054 return -EOPNOTSUPP;
1055 }
1056 kvm->arch.crypto.apie = 1;
1057 break;
1058 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1059 if (!ap_instructions_available()) {
1060 mutex_unlock(&kvm->lock);
1061 return -EOPNOTSUPP;
1062 }
1063 kvm->arch.crypto.apie = 0;
1064 break;
1065 default:
1066 mutex_unlock(&kvm->lock);
1067 return -ENXIO;
1068 }
1069
1070 kvm_s390_vcpu_crypto_reset_all(kvm);
1071 mutex_unlock(&kvm->lock);
1072 return 0;
1073 }
1074
kvm_s390_vcpu_pci_setup(struct kvm_vcpu * vcpu)1075 static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu)
1076 {
1077 /* Only set the ECB bits after guest requests zPCI interpretation */
1078 if (!vcpu->kvm->arch.use_zpci_interp)
1079 return;
1080
1081 vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI;
1082 vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI;
1083 }
1084
kvm_s390_vcpu_pci_enable_interp(struct kvm * kvm)1085 void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm)
1086 {
1087 struct kvm_vcpu *vcpu;
1088 unsigned long i;
1089
1090 lockdep_assert_held(&kvm->lock);
1091
1092 if (!kvm_s390_pci_interp_allowed())
1093 return;
1094
1095 /*
1096 * If host is configured for PCI and the necessary facilities are
1097 * available, turn on interpretation for the life of this guest
1098 */
1099 kvm->arch.use_zpci_interp = 1;
1100
1101 kvm_s390_vcpu_block_all(kvm);
1102
1103 kvm_for_each_vcpu(i, vcpu, kvm) {
1104 kvm_s390_vcpu_pci_setup(vcpu);
1105 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1106 }
1107
1108 kvm_s390_vcpu_unblock_all(kvm);
1109 }
1110
kvm_s390_sync_request_broadcast(struct kvm * kvm,int req)1111 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1112 {
1113 unsigned long cx;
1114 struct kvm_vcpu *vcpu;
1115
1116 kvm_for_each_vcpu(cx, vcpu, kvm)
1117 kvm_s390_sync_request(req, vcpu);
1118 }
1119
1120 /*
1121 * Must be called with kvm->srcu held to avoid races on memslots, and with
1122 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1123 */
kvm_s390_vm_start_migration(struct kvm * kvm)1124 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1125 {
1126 struct kvm_memory_slot *ms;
1127 struct kvm_memslots *slots;
1128 unsigned long ram_pages = 0;
1129 int bkt;
1130
1131 /* migration mode already enabled */
1132 if (kvm->arch.migration_mode)
1133 return 0;
1134 slots = kvm_memslots(kvm);
1135 if (!slots || kvm_memslots_empty(slots))
1136 return -EINVAL;
1137
1138 if (!kvm->arch.use_cmma) {
1139 kvm->arch.migration_mode = 1;
1140 return 0;
1141 }
1142 /* mark all the pages in active slots as dirty */
1143 kvm_for_each_memslot(ms, bkt, slots) {
1144 if (!ms->dirty_bitmap)
1145 return -EINVAL;
1146 /*
1147 * The second half of the bitmap is only used on x86,
1148 * and would be wasted otherwise, so we put it to good
1149 * use here to keep track of the state of the storage
1150 * attributes.
1151 */
1152 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1153 ram_pages += ms->npages;
1154 }
1155 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1156 kvm->arch.migration_mode = 1;
1157 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1158 return 0;
1159 }
1160
1161 /*
1162 * Must be called with kvm->slots_lock to avoid races with ourselves and
1163 * kvm_s390_vm_start_migration.
1164 */
kvm_s390_vm_stop_migration(struct kvm * kvm)1165 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1166 {
1167 /* migration mode already disabled */
1168 if (!kvm->arch.migration_mode)
1169 return 0;
1170 kvm->arch.migration_mode = 0;
1171 if (kvm->arch.use_cmma)
1172 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1173 return 0;
1174 }
1175
kvm_s390_vm_set_migration(struct kvm * kvm,struct kvm_device_attr * attr)1176 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1177 struct kvm_device_attr *attr)
1178 {
1179 int res = -ENXIO;
1180
1181 mutex_lock(&kvm->slots_lock);
1182 switch (attr->attr) {
1183 case KVM_S390_VM_MIGRATION_START:
1184 res = kvm_s390_vm_start_migration(kvm);
1185 break;
1186 case KVM_S390_VM_MIGRATION_STOP:
1187 res = kvm_s390_vm_stop_migration(kvm);
1188 break;
1189 default:
1190 break;
1191 }
1192 mutex_unlock(&kvm->slots_lock);
1193
1194 return res;
1195 }
1196
kvm_s390_vm_get_migration(struct kvm * kvm,struct kvm_device_attr * attr)1197 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1198 struct kvm_device_attr *attr)
1199 {
1200 u64 mig = kvm->arch.migration_mode;
1201
1202 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1203 return -ENXIO;
1204
1205 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1206 return -EFAULT;
1207 return 0;
1208 }
1209
1210 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
1211
kvm_s390_set_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1212 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1213 {
1214 struct kvm_s390_vm_tod_clock gtod;
1215
1216 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1217 return -EFAULT;
1218
1219 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1220 return -EINVAL;
1221 __kvm_s390_set_tod_clock(kvm, >od);
1222
1223 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1224 gtod.epoch_idx, gtod.tod);
1225
1226 return 0;
1227 }
1228
kvm_s390_set_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1229 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1230 {
1231 u8 gtod_high;
1232
1233 if (copy_from_user(>od_high, (void __user *)attr->addr,
1234 sizeof(gtod_high)))
1235 return -EFAULT;
1236
1237 if (gtod_high != 0)
1238 return -EINVAL;
1239 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1240
1241 return 0;
1242 }
1243
kvm_s390_set_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1244 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1245 {
1246 struct kvm_s390_vm_tod_clock gtod = { 0 };
1247
1248 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1249 sizeof(gtod.tod)))
1250 return -EFAULT;
1251
1252 __kvm_s390_set_tod_clock(kvm, >od);
1253 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1254 return 0;
1255 }
1256
kvm_s390_set_tod(struct kvm * kvm,struct kvm_device_attr * attr)1257 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1258 {
1259 int ret;
1260
1261 if (attr->flags)
1262 return -EINVAL;
1263
1264 mutex_lock(&kvm->lock);
1265 /*
1266 * For protected guests, the TOD is managed by the ultravisor, so trying
1267 * to change it will never bring the expected results.
1268 */
1269 if (kvm_s390_pv_is_protected(kvm)) {
1270 ret = -EOPNOTSUPP;
1271 goto out_unlock;
1272 }
1273
1274 switch (attr->attr) {
1275 case KVM_S390_VM_TOD_EXT:
1276 ret = kvm_s390_set_tod_ext(kvm, attr);
1277 break;
1278 case KVM_S390_VM_TOD_HIGH:
1279 ret = kvm_s390_set_tod_high(kvm, attr);
1280 break;
1281 case KVM_S390_VM_TOD_LOW:
1282 ret = kvm_s390_set_tod_low(kvm, attr);
1283 break;
1284 default:
1285 ret = -ENXIO;
1286 break;
1287 }
1288
1289 out_unlock:
1290 mutex_unlock(&kvm->lock);
1291 return ret;
1292 }
1293
kvm_s390_get_tod_clock(struct kvm * kvm,struct kvm_s390_vm_tod_clock * gtod)1294 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1295 struct kvm_s390_vm_tod_clock *gtod)
1296 {
1297 union tod_clock clk;
1298
1299 preempt_disable();
1300
1301 store_tod_clock_ext(&clk);
1302
1303 gtod->tod = clk.tod + kvm->arch.epoch;
1304 gtod->epoch_idx = 0;
1305 if (test_kvm_facility(kvm, 139)) {
1306 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1307 if (gtod->tod < clk.tod)
1308 gtod->epoch_idx += 1;
1309 }
1310
1311 preempt_enable();
1312 }
1313
kvm_s390_get_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1314 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1315 {
1316 struct kvm_s390_vm_tod_clock gtod;
1317
1318 memset(>od, 0, sizeof(gtod));
1319 kvm_s390_get_tod_clock(kvm, >od);
1320 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1321 return -EFAULT;
1322
1323 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1324 gtod.epoch_idx, gtod.tod);
1325 return 0;
1326 }
1327
kvm_s390_get_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1328 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1329 {
1330 u8 gtod_high = 0;
1331
1332 if (copy_to_user((void __user *)attr->addr, >od_high,
1333 sizeof(gtod_high)))
1334 return -EFAULT;
1335 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1336
1337 return 0;
1338 }
1339
kvm_s390_get_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1340 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1341 {
1342 u64 gtod;
1343
1344 gtod = kvm_s390_get_tod_clock_fast(kvm);
1345 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1346 return -EFAULT;
1347 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1348
1349 return 0;
1350 }
1351
kvm_s390_get_tod(struct kvm * kvm,struct kvm_device_attr * attr)1352 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1353 {
1354 int ret;
1355
1356 if (attr->flags)
1357 return -EINVAL;
1358
1359 switch (attr->attr) {
1360 case KVM_S390_VM_TOD_EXT:
1361 ret = kvm_s390_get_tod_ext(kvm, attr);
1362 break;
1363 case KVM_S390_VM_TOD_HIGH:
1364 ret = kvm_s390_get_tod_high(kvm, attr);
1365 break;
1366 case KVM_S390_VM_TOD_LOW:
1367 ret = kvm_s390_get_tod_low(kvm, attr);
1368 break;
1369 default:
1370 ret = -ENXIO;
1371 break;
1372 }
1373 return ret;
1374 }
1375
kvm_s390_set_processor(struct kvm * kvm,struct kvm_device_attr * attr)1376 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1377 {
1378 struct kvm_s390_vm_cpu_processor *proc;
1379 u16 lowest_ibc, unblocked_ibc;
1380 int ret = 0;
1381
1382 mutex_lock(&kvm->lock);
1383 if (kvm->created_vcpus) {
1384 ret = -EBUSY;
1385 goto out;
1386 }
1387 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1388 if (!proc) {
1389 ret = -ENOMEM;
1390 goto out;
1391 }
1392 if (!copy_from_user(proc, (void __user *)attr->addr,
1393 sizeof(*proc))) {
1394 kvm->arch.model.cpuid = proc->cpuid;
1395 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1396 unblocked_ibc = sclp.ibc & 0xfff;
1397 if (lowest_ibc && proc->ibc) {
1398 if (proc->ibc > unblocked_ibc)
1399 kvm->arch.model.ibc = unblocked_ibc;
1400 else if (proc->ibc < lowest_ibc)
1401 kvm->arch.model.ibc = lowest_ibc;
1402 else
1403 kvm->arch.model.ibc = proc->ibc;
1404 }
1405 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1406 S390_ARCH_FAC_LIST_SIZE_BYTE);
1407 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1408 kvm->arch.model.ibc,
1409 kvm->arch.model.cpuid);
1410 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1411 kvm->arch.model.fac_list[0],
1412 kvm->arch.model.fac_list[1],
1413 kvm->arch.model.fac_list[2]);
1414 } else
1415 ret = -EFAULT;
1416 kfree(proc);
1417 out:
1418 mutex_unlock(&kvm->lock);
1419 return ret;
1420 }
1421
kvm_s390_set_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1422 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1423 struct kvm_device_attr *attr)
1424 {
1425 struct kvm_s390_vm_cpu_feat data;
1426
1427 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1428 return -EFAULT;
1429 if (!bitmap_subset((unsigned long *) data.feat,
1430 kvm_s390_available_cpu_feat,
1431 KVM_S390_VM_CPU_FEAT_NR_BITS))
1432 return -EINVAL;
1433
1434 mutex_lock(&kvm->lock);
1435 if (kvm->created_vcpus) {
1436 mutex_unlock(&kvm->lock);
1437 return -EBUSY;
1438 }
1439 bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1440 mutex_unlock(&kvm->lock);
1441 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1442 data.feat[0],
1443 data.feat[1],
1444 data.feat[2]);
1445 return 0;
1446 }
1447
kvm_s390_set_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1448 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1449 struct kvm_device_attr *attr)
1450 {
1451 mutex_lock(&kvm->lock);
1452 if (kvm->created_vcpus) {
1453 mutex_unlock(&kvm->lock);
1454 return -EBUSY;
1455 }
1456
1457 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1458 sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1459 mutex_unlock(&kvm->lock);
1460 return -EFAULT;
1461 }
1462 mutex_unlock(&kvm->lock);
1463
1464 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1465 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1466 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1467 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1468 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1469 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1470 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1471 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1472 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1473 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1474 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1475 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1476 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1477 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1478 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
1479 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1480 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1481 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1482 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1483 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1484 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1485 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1486 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1487 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1488 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1489 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1490 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1491 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1492 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1493 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1494 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1495 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1496 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1497 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1498 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1499 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1500 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1501 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1502 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1503 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1504 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1505 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1506 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1507 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1508 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1509 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1510 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1511 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1512 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1513 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1514 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1515 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1516 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1517 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1518 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1519 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1520 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1521
1522 return 0;
1523 }
1524
kvm_s390_set_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1525 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1526 {
1527 int ret = -ENXIO;
1528
1529 switch (attr->attr) {
1530 case KVM_S390_VM_CPU_PROCESSOR:
1531 ret = kvm_s390_set_processor(kvm, attr);
1532 break;
1533 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1534 ret = kvm_s390_set_processor_feat(kvm, attr);
1535 break;
1536 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1537 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1538 break;
1539 }
1540 return ret;
1541 }
1542
kvm_s390_get_processor(struct kvm * kvm,struct kvm_device_attr * attr)1543 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1544 {
1545 struct kvm_s390_vm_cpu_processor *proc;
1546 int ret = 0;
1547
1548 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1549 if (!proc) {
1550 ret = -ENOMEM;
1551 goto out;
1552 }
1553 proc->cpuid = kvm->arch.model.cpuid;
1554 proc->ibc = kvm->arch.model.ibc;
1555 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1556 S390_ARCH_FAC_LIST_SIZE_BYTE);
1557 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1558 kvm->arch.model.ibc,
1559 kvm->arch.model.cpuid);
1560 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1561 kvm->arch.model.fac_list[0],
1562 kvm->arch.model.fac_list[1],
1563 kvm->arch.model.fac_list[2]);
1564 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1565 ret = -EFAULT;
1566 kfree(proc);
1567 out:
1568 return ret;
1569 }
1570
kvm_s390_get_machine(struct kvm * kvm,struct kvm_device_attr * attr)1571 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1572 {
1573 struct kvm_s390_vm_cpu_machine *mach;
1574 int ret = 0;
1575
1576 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1577 if (!mach) {
1578 ret = -ENOMEM;
1579 goto out;
1580 }
1581 get_cpu_id((struct cpuid *) &mach->cpuid);
1582 mach->ibc = sclp.ibc;
1583 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1584 S390_ARCH_FAC_LIST_SIZE_BYTE);
1585 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1586 sizeof(stfle_fac_list));
1587 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1588 kvm->arch.model.ibc,
1589 kvm->arch.model.cpuid);
1590 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1591 mach->fac_mask[0],
1592 mach->fac_mask[1],
1593 mach->fac_mask[2]);
1594 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1595 mach->fac_list[0],
1596 mach->fac_list[1],
1597 mach->fac_list[2]);
1598 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1599 ret = -EFAULT;
1600 kfree(mach);
1601 out:
1602 return ret;
1603 }
1604
kvm_s390_get_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1605 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1606 struct kvm_device_attr *attr)
1607 {
1608 struct kvm_s390_vm_cpu_feat data;
1609
1610 bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1611 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1612 return -EFAULT;
1613 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1614 data.feat[0],
1615 data.feat[1],
1616 data.feat[2]);
1617 return 0;
1618 }
1619
kvm_s390_get_machine_feat(struct kvm * kvm,struct kvm_device_attr * attr)1620 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1621 struct kvm_device_attr *attr)
1622 {
1623 struct kvm_s390_vm_cpu_feat data;
1624
1625 bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1626 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1627 return -EFAULT;
1628 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1629 data.feat[0],
1630 data.feat[1],
1631 data.feat[2]);
1632 return 0;
1633 }
1634
kvm_s390_get_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1635 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1636 struct kvm_device_attr *attr)
1637 {
1638 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1639 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1640 return -EFAULT;
1641
1642 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1643 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1644 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1645 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1646 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1647 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1648 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1649 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1650 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1651 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1652 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1653 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1654 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1655 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1656 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
1657 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1658 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1659 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1660 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1661 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1662 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1663 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1664 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1665 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1666 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1667 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1668 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1669 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1670 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1671 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1672 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1673 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1674 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1675 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1676 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1677 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1678 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1679 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1680 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1681 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1682 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1683 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1684 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1685 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1686 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1687 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1688 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1689 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1690 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1691 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1692 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1693 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1694 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1695 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1696 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1697 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1698 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1699
1700 return 0;
1701 }
1702
kvm_s390_get_machine_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1703 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1704 struct kvm_device_attr *attr)
1705 {
1706 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1707 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1708 return -EFAULT;
1709
1710 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1711 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1712 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1713 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1714 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1715 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
1716 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1717 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1718 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
1719 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1720 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1721 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
1722 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1723 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1724 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
1725 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1726 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1727 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
1728 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1729 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1730 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
1731 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1732 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1733 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
1734 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1735 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1736 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
1737 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1738 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1739 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
1740 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1741 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1742 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
1743 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1744 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1745 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
1746 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1747 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1748 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
1749 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1750 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1751 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
1752 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1753 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1754 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
1755 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1756 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1757 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1758 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1759 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1760 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1761 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1762 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1763 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1764 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1765 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1766 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1767
1768 return 0;
1769 }
1770
kvm_s390_get_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1771 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1772 {
1773 int ret = -ENXIO;
1774
1775 switch (attr->attr) {
1776 case KVM_S390_VM_CPU_PROCESSOR:
1777 ret = kvm_s390_get_processor(kvm, attr);
1778 break;
1779 case KVM_S390_VM_CPU_MACHINE:
1780 ret = kvm_s390_get_machine(kvm, attr);
1781 break;
1782 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1783 ret = kvm_s390_get_processor_feat(kvm, attr);
1784 break;
1785 case KVM_S390_VM_CPU_MACHINE_FEAT:
1786 ret = kvm_s390_get_machine_feat(kvm, attr);
1787 break;
1788 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1789 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1790 break;
1791 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1792 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1793 break;
1794 }
1795 return ret;
1796 }
1797
1798 /**
1799 * kvm_s390_update_topology_change_report - update CPU topology change report
1800 * @kvm: guest KVM description
1801 * @val: set or clear the MTCR bit
1802 *
1803 * Updates the Multiprocessor Topology-Change-Report bit to signal
1804 * the guest with a topology change.
1805 * This is only relevant if the topology facility is present.
1806 *
1807 * The SCA version, bsca or esca, doesn't matter as offset is the same.
1808 */
kvm_s390_update_topology_change_report(struct kvm * kvm,bool val)1809 static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
1810 {
1811 union sca_utility new, old;
1812 struct bsca_block *sca;
1813
1814 read_lock(&kvm->arch.sca_lock);
1815 sca = kvm->arch.sca;
1816 do {
1817 old = READ_ONCE(sca->utility);
1818 new = old;
1819 new.mtcr = val;
1820 } while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
1821 read_unlock(&kvm->arch.sca_lock);
1822 }
1823
kvm_s390_set_topo_change_indication(struct kvm * kvm,struct kvm_device_attr * attr)1824 static int kvm_s390_set_topo_change_indication(struct kvm *kvm,
1825 struct kvm_device_attr *attr)
1826 {
1827 if (!test_kvm_facility(kvm, 11))
1828 return -ENXIO;
1829
1830 kvm_s390_update_topology_change_report(kvm, !!attr->attr);
1831 return 0;
1832 }
1833
kvm_s390_get_topo_change_indication(struct kvm * kvm,struct kvm_device_attr * attr)1834 static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
1835 struct kvm_device_attr *attr)
1836 {
1837 u8 topo;
1838
1839 if (!test_kvm_facility(kvm, 11))
1840 return -ENXIO;
1841
1842 read_lock(&kvm->arch.sca_lock);
1843 topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
1844 read_unlock(&kvm->arch.sca_lock);
1845
1846 return put_user(topo, (u8 __user *)attr->addr);
1847 }
1848
kvm_s390_vm_set_attr(struct kvm * kvm,struct kvm_device_attr * attr)1849 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1850 {
1851 int ret;
1852
1853 switch (attr->group) {
1854 case KVM_S390_VM_MEM_CTRL:
1855 ret = kvm_s390_set_mem_control(kvm, attr);
1856 break;
1857 case KVM_S390_VM_TOD:
1858 ret = kvm_s390_set_tod(kvm, attr);
1859 break;
1860 case KVM_S390_VM_CPU_MODEL:
1861 ret = kvm_s390_set_cpu_model(kvm, attr);
1862 break;
1863 case KVM_S390_VM_CRYPTO:
1864 ret = kvm_s390_vm_set_crypto(kvm, attr);
1865 break;
1866 case KVM_S390_VM_MIGRATION:
1867 ret = kvm_s390_vm_set_migration(kvm, attr);
1868 break;
1869 case KVM_S390_VM_CPU_TOPOLOGY:
1870 ret = kvm_s390_set_topo_change_indication(kvm, attr);
1871 break;
1872 default:
1873 ret = -ENXIO;
1874 break;
1875 }
1876
1877 return ret;
1878 }
1879
kvm_s390_vm_get_attr(struct kvm * kvm,struct kvm_device_attr * attr)1880 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1881 {
1882 int ret;
1883
1884 switch (attr->group) {
1885 case KVM_S390_VM_MEM_CTRL:
1886 ret = kvm_s390_get_mem_control(kvm, attr);
1887 break;
1888 case KVM_S390_VM_TOD:
1889 ret = kvm_s390_get_tod(kvm, attr);
1890 break;
1891 case KVM_S390_VM_CPU_MODEL:
1892 ret = kvm_s390_get_cpu_model(kvm, attr);
1893 break;
1894 case KVM_S390_VM_MIGRATION:
1895 ret = kvm_s390_vm_get_migration(kvm, attr);
1896 break;
1897 case KVM_S390_VM_CPU_TOPOLOGY:
1898 ret = kvm_s390_get_topo_change_indication(kvm, attr);
1899 break;
1900 default:
1901 ret = -ENXIO;
1902 break;
1903 }
1904
1905 return ret;
1906 }
1907
kvm_s390_vm_has_attr(struct kvm * kvm,struct kvm_device_attr * attr)1908 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1909 {
1910 int ret;
1911
1912 switch (attr->group) {
1913 case KVM_S390_VM_MEM_CTRL:
1914 switch (attr->attr) {
1915 case KVM_S390_VM_MEM_ENABLE_CMMA:
1916 case KVM_S390_VM_MEM_CLR_CMMA:
1917 ret = sclp.has_cmma ? 0 : -ENXIO;
1918 break;
1919 case KVM_S390_VM_MEM_LIMIT_SIZE:
1920 ret = 0;
1921 break;
1922 default:
1923 ret = -ENXIO;
1924 break;
1925 }
1926 break;
1927 case KVM_S390_VM_TOD:
1928 switch (attr->attr) {
1929 case KVM_S390_VM_TOD_LOW:
1930 case KVM_S390_VM_TOD_HIGH:
1931 ret = 0;
1932 break;
1933 default:
1934 ret = -ENXIO;
1935 break;
1936 }
1937 break;
1938 case KVM_S390_VM_CPU_MODEL:
1939 switch (attr->attr) {
1940 case KVM_S390_VM_CPU_PROCESSOR:
1941 case KVM_S390_VM_CPU_MACHINE:
1942 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1943 case KVM_S390_VM_CPU_MACHINE_FEAT:
1944 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1945 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1946 ret = 0;
1947 break;
1948 default:
1949 ret = -ENXIO;
1950 break;
1951 }
1952 break;
1953 case KVM_S390_VM_CRYPTO:
1954 switch (attr->attr) {
1955 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1956 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1957 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1958 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1959 ret = 0;
1960 break;
1961 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1962 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1963 ret = ap_instructions_available() ? 0 : -ENXIO;
1964 break;
1965 default:
1966 ret = -ENXIO;
1967 break;
1968 }
1969 break;
1970 case KVM_S390_VM_MIGRATION:
1971 ret = 0;
1972 break;
1973 case KVM_S390_VM_CPU_TOPOLOGY:
1974 ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO;
1975 break;
1976 default:
1977 ret = -ENXIO;
1978 break;
1979 }
1980
1981 return ret;
1982 }
1983
kvm_s390_get_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1984 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1985 {
1986 uint8_t *keys;
1987 uint64_t hva;
1988 int srcu_idx, i, r = 0;
1989
1990 if (args->flags != 0)
1991 return -EINVAL;
1992
1993 /* Is this guest using storage keys? */
1994 if (!mm_uses_skeys(current->mm))
1995 return KVM_S390_GET_SKEYS_NONE;
1996
1997 /* Enforce sane limit on memory allocation */
1998 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1999 return -EINVAL;
2000
2001 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2002 if (!keys)
2003 return -ENOMEM;
2004
2005 mmap_read_lock(current->mm);
2006 srcu_idx = srcu_read_lock(&kvm->srcu);
2007 for (i = 0; i < args->count; i++) {
2008 hva = gfn_to_hva(kvm, args->start_gfn + i);
2009 if (kvm_is_error_hva(hva)) {
2010 r = -EFAULT;
2011 break;
2012 }
2013
2014 r = get_guest_storage_key(current->mm, hva, &keys[i]);
2015 if (r)
2016 break;
2017 }
2018 srcu_read_unlock(&kvm->srcu, srcu_idx);
2019 mmap_read_unlock(current->mm);
2020
2021 if (!r) {
2022 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
2023 sizeof(uint8_t) * args->count);
2024 if (r)
2025 r = -EFAULT;
2026 }
2027
2028 kvfree(keys);
2029 return r;
2030 }
2031
kvm_s390_set_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)2032 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
2033 {
2034 uint8_t *keys;
2035 uint64_t hva;
2036 int srcu_idx, i, r = 0;
2037 bool unlocked;
2038
2039 if (args->flags != 0)
2040 return -EINVAL;
2041
2042 /* Enforce sane limit on memory allocation */
2043 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2044 return -EINVAL;
2045
2046 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2047 if (!keys)
2048 return -ENOMEM;
2049
2050 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
2051 sizeof(uint8_t) * args->count);
2052 if (r) {
2053 r = -EFAULT;
2054 goto out;
2055 }
2056
2057 /* Enable storage key handling for the guest */
2058 r = s390_enable_skey();
2059 if (r)
2060 goto out;
2061
2062 i = 0;
2063 mmap_read_lock(current->mm);
2064 srcu_idx = srcu_read_lock(&kvm->srcu);
2065 while (i < args->count) {
2066 unlocked = false;
2067 hva = gfn_to_hva(kvm, args->start_gfn + i);
2068 if (kvm_is_error_hva(hva)) {
2069 r = -EFAULT;
2070 break;
2071 }
2072
2073 /* Lowest order bit is reserved */
2074 if (keys[i] & 0x01) {
2075 r = -EINVAL;
2076 break;
2077 }
2078
2079 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
2080 if (r) {
2081 r = fixup_user_fault(current->mm, hva,
2082 FAULT_FLAG_WRITE, &unlocked);
2083 if (r)
2084 break;
2085 }
2086 if (!r)
2087 i++;
2088 }
2089 srcu_read_unlock(&kvm->srcu, srcu_idx);
2090 mmap_read_unlock(current->mm);
2091 out:
2092 kvfree(keys);
2093 return r;
2094 }
2095
2096 /*
2097 * Base address and length must be sent at the start of each block, therefore
2098 * it's cheaper to send some clean data, as long as it's less than the size of
2099 * two longs.
2100 */
2101 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
2102 /* for consistency */
2103 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
2104
kvm_s390_peek_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)2105 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2106 u8 *res, unsigned long bufsize)
2107 {
2108 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
2109
2110 args->count = 0;
2111 while (args->count < bufsize) {
2112 hva = gfn_to_hva(kvm, cur_gfn);
2113 /*
2114 * We return an error if the first value was invalid, but we
2115 * return successfully if at least one value was copied.
2116 */
2117 if (kvm_is_error_hva(hva))
2118 return args->count ? 0 : -EFAULT;
2119 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2120 pgstev = 0;
2121 res[args->count++] = (pgstev >> 24) & 0x43;
2122 cur_gfn++;
2123 }
2124
2125 return 0;
2126 }
2127
gfn_to_memslot_approx(struct kvm_memslots * slots,gfn_t gfn)2128 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
2129 gfn_t gfn)
2130 {
2131 return ____gfn_to_memslot(slots, gfn, true);
2132 }
2133
kvm_s390_next_dirty_cmma(struct kvm_memslots * slots,unsigned long cur_gfn)2134 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2135 unsigned long cur_gfn)
2136 {
2137 struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
2138 unsigned long ofs = cur_gfn - ms->base_gfn;
2139 struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
2140
2141 if (ms->base_gfn + ms->npages <= cur_gfn) {
2142 mnode = rb_next(mnode);
2143 /* If we are above the highest slot, wrap around */
2144 if (!mnode)
2145 mnode = rb_first(&slots->gfn_tree);
2146
2147 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2148 ofs = 0;
2149 }
2150 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2151 while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
2152 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2153 ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
2154 }
2155 return ms->base_gfn + ofs;
2156 }
2157
kvm_s390_get_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)2158 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2159 u8 *res, unsigned long bufsize)
2160 {
2161 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2162 struct kvm_memslots *slots = kvm_memslots(kvm);
2163 struct kvm_memory_slot *ms;
2164
2165 if (unlikely(kvm_memslots_empty(slots)))
2166 return 0;
2167
2168 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2169 ms = gfn_to_memslot(kvm, cur_gfn);
2170 args->count = 0;
2171 args->start_gfn = cur_gfn;
2172 if (!ms)
2173 return 0;
2174 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2175 mem_end = kvm_s390_get_gfn_end(slots);
2176
2177 while (args->count < bufsize) {
2178 hva = gfn_to_hva(kvm, cur_gfn);
2179 if (kvm_is_error_hva(hva))
2180 return 0;
2181 /* Decrement only if we actually flipped the bit to 0 */
2182 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2183 atomic64_dec(&kvm->arch.cmma_dirty_pages);
2184 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2185 pgstev = 0;
2186 /* Save the value */
2187 res[args->count++] = (pgstev >> 24) & 0x43;
2188 /* If the next bit is too far away, stop. */
2189 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2190 return 0;
2191 /* If we reached the previous "next", find the next one */
2192 if (cur_gfn == next_gfn)
2193 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2194 /* Reached the end of memory or of the buffer, stop */
2195 if ((next_gfn >= mem_end) ||
2196 (next_gfn - args->start_gfn >= bufsize))
2197 return 0;
2198 cur_gfn++;
2199 /* Reached the end of the current memslot, take the next one. */
2200 if (cur_gfn - ms->base_gfn >= ms->npages) {
2201 ms = gfn_to_memslot(kvm, cur_gfn);
2202 if (!ms)
2203 return 0;
2204 }
2205 }
2206 return 0;
2207 }
2208
2209 /*
2210 * This function searches for the next page with dirty CMMA attributes, and
2211 * saves the attributes in the buffer up to either the end of the buffer or
2212 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2213 * no trailing clean bytes are saved.
2214 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2215 * output buffer will indicate 0 as length.
2216 */
kvm_s390_get_cmma_bits(struct kvm * kvm,struct kvm_s390_cmma_log * args)2217 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2218 struct kvm_s390_cmma_log *args)
2219 {
2220 unsigned long bufsize;
2221 int srcu_idx, peek, ret;
2222 u8 *values;
2223
2224 if (!kvm->arch.use_cmma)
2225 return -ENXIO;
2226 /* Invalid/unsupported flags were specified */
2227 if (args->flags & ~KVM_S390_CMMA_PEEK)
2228 return -EINVAL;
2229 /* Migration mode query, and we are not doing a migration */
2230 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2231 if (!peek && !kvm->arch.migration_mode)
2232 return -EINVAL;
2233 /* CMMA is disabled or was not used, or the buffer has length zero */
2234 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2235 if (!bufsize || !kvm->mm->context.uses_cmm) {
2236 memset(args, 0, sizeof(*args));
2237 return 0;
2238 }
2239 /* We are not peeking, and there are no dirty pages */
2240 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2241 memset(args, 0, sizeof(*args));
2242 return 0;
2243 }
2244
2245 values = vmalloc(bufsize);
2246 if (!values)
2247 return -ENOMEM;
2248
2249 mmap_read_lock(kvm->mm);
2250 srcu_idx = srcu_read_lock(&kvm->srcu);
2251 if (peek)
2252 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2253 else
2254 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2255 srcu_read_unlock(&kvm->srcu, srcu_idx);
2256 mmap_read_unlock(kvm->mm);
2257
2258 if (kvm->arch.migration_mode)
2259 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2260 else
2261 args->remaining = 0;
2262
2263 if (copy_to_user((void __user *)args->values, values, args->count))
2264 ret = -EFAULT;
2265
2266 vfree(values);
2267 return ret;
2268 }
2269
2270 /*
2271 * This function sets the CMMA attributes for the given pages. If the input
2272 * buffer has zero length, no action is taken, otherwise the attributes are
2273 * set and the mm->context.uses_cmm flag is set.
2274 */
kvm_s390_set_cmma_bits(struct kvm * kvm,const struct kvm_s390_cmma_log * args)2275 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2276 const struct kvm_s390_cmma_log *args)
2277 {
2278 unsigned long hva, mask, pgstev, i;
2279 uint8_t *bits;
2280 int srcu_idx, r = 0;
2281
2282 mask = args->mask;
2283
2284 if (!kvm->arch.use_cmma)
2285 return -ENXIO;
2286 /* invalid/unsupported flags */
2287 if (args->flags != 0)
2288 return -EINVAL;
2289 /* Enforce sane limit on memory allocation */
2290 if (args->count > KVM_S390_CMMA_SIZE_MAX)
2291 return -EINVAL;
2292 /* Nothing to do */
2293 if (args->count == 0)
2294 return 0;
2295
2296 bits = vmalloc(array_size(sizeof(*bits), args->count));
2297 if (!bits)
2298 return -ENOMEM;
2299
2300 r = copy_from_user(bits, (void __user *)args->values, args->count);
2301 if (r) {
2302 r = -EFAULT;
2303 goto out;
2304 }
2305
2306 mmap_read_lock(kvm->mm);
2307 srcu_idx = srcu_read_lock(&kvm->srcu);
2308 for (i = 0; i < args->count; i++) {
2309 hva = gfn_to_hva(kvm, args->start_gfn + i);
2310 if (kvm_is_error_hva(hva)) {
2311 r = -EFAULT;
2312 break;
2313 }
2314
2315 pgstev = bits[i];
2316 pgstev = pgstev << 24;
2317 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2318 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2319 }
2320 srcu_read_unlock(&kvm->srcu, srcu_idx);
2321 mmap_read_unlock(kvm->mm);
2322
2323 if (!kvm->mm->context.uses_cmm) {
2324 mmap_write_lock(kvm->mm);
2325 kvm->mm->context.uses_cmm = 1;
2326 mmap_write_unlock(kvm->mm);
2327 }
2328 out:
2329 vfree(bits);
2330 return r;
2331 }
2332
2333 /**
2334 * kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to
2335 * non protected.
2336 * @kvm: the VM whose protected vCPUs are to be converted
2337 * @rc: return value for the RC field of the UVC (in case of error)
2338 * @rrc: return value for the RRC field of the UVC (in case of error)
2339 *
2340 * Does not stop in case of error, tries to convert as many
2341 * CPUs as possible. In case of error, the RC and RRC of the last error are
2342 * returned.
2343 *
2344 * Return: 0 in case of success, otherwise -EIO
2345 */
kvm_s390_cpus_from_pv(struct kvm * kvm,u16 * rc,u16 * rrc)2346 int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2347 {
2348 struct kvm_vcpu *vcpu;
2349 unsigned long i;
2350 u16 _rc, _rrc;
2351 int ret = 0;
2352
2353 /*
2354 * We ignore failures and try to destroy as many CPUs as possible.
2355 * At the same time we must not free the assigned resources when
2356 * this fails, as the ultravisor has still access to that memory.
2357 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2358 * behind.
2359 * We want to return the first failure rc and rrc, though.
2360 */
2361 kvm_for_each_vcpu(i, vcpu, kvm) {
2362 mutex_lock(&vcpu->mutex);
2363 if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) {
2364 *rc = _rc;
2365 *rrc = _rrc;
2366 ret = -EIO;
2367 }
2368 mutex_unlock(&vcpu->mutex);
2369 }
2370 /* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
2371 if (use_gisa)
2372 kvm_s390_gisa_enable(kvm);
2373 return ret;
2374 }
2375
2376 /**
2377 * kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM
2378 * to protected.
2379 * @kvm: the VM whose protected vCPUs are to be converted
2380 * @rc: return value for the RC field of the UVC (in case of error)
2381 * @rrc: return value for the RRC field of the UVC (in case of error)
2382 *
2383 * Tries to undo the conversion in case of error.
2384 *
2385 * Return: 0 in case of success, otherwise -EIO
2386 */
kvm_s390_cpus_to_pv(struct kvm * kvm,u16 * rc,u16 * rrc)2387 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2388 {
2389 unsigned long i;
2390 int r = 0;
2391 u16 dummy;
2392
2393 struct kvm_vcpu *vcpu;
2394
2395 /* Disable the GISA if the ultravisor does not support AIV. */
2396 if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
2397 kvm_s390_gisa_disable(kvm);
2398
2399 kvm_for_each_vcpu(i, vcpu, kvm) {
2400 mutex_lock(&vcpu->mutex);
2401 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2402 mutex_unlock(&vcpu->mutex);
2403 if (r)
2404 break;
2405 }
2406 if (r)
2407 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2408 return r;
2409 }
2410
2411 /*
2412 * Here we provide user space with a direct interface to query UV
2413 * related data like UV maxima and available features as well as
2414 * feature specific data.
2415 *
2416 * To facilitate future extension of the data structures we'll try to
2417 * write data up to the maximum requested length.
2418 */
kvm_s390_handle_pv_info(struct kvm_s390_pv_info * info)2419 static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info)
2420 {
2421 ssize_t len_min;
2422
2423 switch (info->header.id) {
2424 case KVM_PV_INFO_VM: {
2425 len_min = sizeof(info->header) + sizeof(info->vm);
2426
2427 if (info->header.len_max < len_min)
2428 return -EINVAL;
2429
2430 memcpy(info->vm.inst_calls_list,
2431 uv_info.inst_calls_list,
2432 sizeof(uv_info.inst_calls_list));
2433
2434 /* It's max cpuid not max cpus, so it's off by one */
2435 info->vm.max_cpus = uv_info.max_guest_cpu_id + 1;
2436 info->vm.max_guests = uv_info.max_num_sec_conf;
2437 info->vm.max_guest_addr = uv_info.max_sec_stor_addr;
2438 info->vm.feature_indication = uv_info.uv_feature_indications;
2439
2440 return len_min;
2441 }
2442 case KVM_PV_INFO_DUMP: {
2443 len_min = sizeof(info->header) + sizeof(info->dump);
2444
2445 if (info->header.len_max < len_min)
2446 return -EINVAL;
2447
2448 info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len;
2449 info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len;
2450 info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len;
2451 return len_min;
2452 }
2453 default:
2454 return -EINVAL;
2455 }
2456 }
2457
kvm_s390_pv_dmp(struct kvm * kvm,struct kvm_pv_cmd * cmd,struct kvm_s390_pv_dmp dmp)2458 static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
2459 struct kvm_s390_pv_dmp dmp)
2460 {
2461 int r = -EINVAL;
2462 void __user *result_buff = (void __user *)dmp.buff_addr;
2463
2464 switch (dmp.subcmd) {
2465 case KVM_PV_DUMP_INIT: {
2466 if (kvm->arch.pv.dumping)
2467 break;
2468
2469 /*
2470 * Block SIE entry as concurrent dump UVCs could lead
2471 * to validities.
2472 */
2473 kvm_s390_vcpu_block_all(kvm);
2474
2475 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2476 UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc);
2477 KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x",
2478 cmd->rc, cmd->rrc);
2479 if (!r) {
2480 kvm->arch.pv.dumping = true;
2481 } else {
2482 kvm_s390_vcpu_unblock_all(kvm);
2483 r = -EINVAL;
2484 }
2485 break;
2486 }
2487 case KVM_PV_DUMP_CONFIG_STOR_STATE: {
2488 if (!kvm->arch.pv.dumping)
2489 break;
2490
2491 /*
2492 * gaddr is an output parameter since we might stop
2493 * early. As dmp will be copied back in our caller, we
2494 * don't need to do it ourselves.
2495 */
2496 r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len,
2497 &cmd->rc, &cmd->rrc);
2498 break;
2499 }
2500 case KVM_PV_DUMP_COMPLETE: {
2501 if (!kvm->arch.pv.dumping)
2502 break;
2503
2504 r = -EINVAL;
2505 if (dmp.buff_len < uv_info.conf_dump_finalize_len)
2506 break;
2507
2508 r = kvm_s390_pv_dump_complete(kvm, result_buff,
2509 &cmd->rc, &cmd->rrc);
2510 break;
2511 }
2512 default:
2513 r = -ENOTTY;
2514 break;
2515 }
2516
2517 return r;
2518 }
2519
kvm_s390_handle_pv(struct kvm * kvm,struct kvm_pv_cmd * cmd)2520 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2521 {
2522 int r = 0;
2523 u16 dummy;
2524 void __user *argp = (void __user *)cmd->data;
2525
2526 switch (cmd->cmd) {
2527 case KVM_PV_ENABLE: {
2528 r = -EINVAL;
2529 if (kvm_s390_pv_is_protected(kvm))
2530 break;
2531
2532 /*
2533 * FMT 4 SIE needs esca. As we never switch back to bsca from
2534 * esca, we need no cleanup in the error cases below
2535 */
2536 r = sca_switch_to_extended(kvm);
2537 if (r)
2538 break;
2539
2540 mmap_write_lock(current->mm);
2541 r = gmap_mark_unmergeable();
2542 mmap_write_unlock(current->mm);
2543 if (r)
2544 break;
2545
2546 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2547 if (r)
2548 break;
2549
2550 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2551 if (r)
2552 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2553
2554 /* we need to block service interrupts from now on */
2555 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2556 break;
2557 }
2558 case KVM_PV_DISABLE: {
2559 r = -EINVAL;
2560 if (!kvm_s390_pv_is_protected(kvm))
2561 break;
2562
2563 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2564 /*
2565 * If a CPU could not be destroyed, destroy VM will also fail.
2566 * There is no point in trying to destroy it. Instead return
2567 * the rc and rrc from the first CPU that failed destroying.
2568 */
2569 if (r)
2570 break;
2571 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2572
2573 /* no need to block service interrupts any more */
2574 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2575 break;
2576 }
2577 case KVM_PV_SET_SEC_PARMS: {
2578 struct kvm_s390_pv_sec_parm parms = {};
2579 void *hdr;
2580
2581 r = -EINVAL;
2582 if (!kvm_s390_pv_is_protected(kvm))
2583 break;
2584
2585 r = -EFAULT;
2586 if (copy_from_user(&parms, argp, sizeof(parms)))
2587 break;
2588
2589 /* Currently restricted to 8KB */
2590 r = -EINVAL;
2591 if (parms.length > PAGE_SIZE * 2)
2592 break;
2593
2594 r = -ENOMEM;
2595 hdr = vmalloc(parms.length);
2596 if (!hdr)
2597 break;
2598
2599 r = -EFAULT;
2600 if (!copy_from_user(hdr, (void __user *)parms.origin,
2601 parms.length))
2602 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2603 &cmd->rc, &cmd->rrc);
2604
2605 vfree(hdr);
2606 break;
2607 }
2608 case KVM_PV_UNPACK: {
2609 struct kvm_s390_pv_unp unp = {};
2610
2611 r = -EINVAL;
2612 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2613 break;
2614
2615 r = -EFAULT;
2616 if (copy_from_user(&unp, argp, sizeof(unp)))
2617 break;
2618
2619 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2620 &cmd->rc, &cmd->rrc);
2621 break;
2622 }
2623 case KVM_PV_VERIFY: {
2624 r = -EINVAL;
2625 if (!kvm_s390_pv_is_protected(kvm))
2626 break;
2627
2628 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2629 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2630 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2631 cmd->rrc);
2632 break;
2633 }
2634 case KVM_PV_PREP_RESET: {
2635 r = -EINVAL;
2636 if (!kvm_s390_pv_is_protected(kvm))
2637 break;
2638
2639 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2640 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2641 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2642 cmd->rc, cmd->rrc);
2643 break;
2644 }
2645 case KVM_PV_UNSHARE_ALL: {
2646 r = -EINVAL;
2647 if (!kvm_s390_pv_is_protected(kvm))
2648 break;
2649
2650 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2651 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2652 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2653 cmd->rc, cmd->rrc);
2654 break;
2655 }
2656 case KVM_PV_INFO: {
2657 struct kvm_s390_pv_info info = {};
2658 ssize_t data_len;
2659
2660 /*
2661 * No need to check the VM protection here.
2662 *
2663 * Maybe user space wants to query some of the data
2664 * when the VM is still unprotected. If we see the
2665 * need to fence a new data command we can still
2666 * return an error in the info handler.
2667 */
2668
2669 r = -EFAULT;
2670 if (copy_from_user(&info, argp, sizeof(info.header)))
2671 break;
2672
2673 r = -EINVAL;
2674 if (info.header.len_max < sizeof(info.header))
2675 break;
2676
2677 data_len = kvm_s390_handle_pv_info(&info);
2678 if (data_len < 0) {
2679 r = data_len;
2680 break;
2681 }
2682 /*
2683 * If a data command struct is extended (multiple
2684 * times) this can be used to determine how much of it
2685 * is valid.
2686 */
2687 info.header.len_written = data_len;
2688
2689 r = -EFAULT;
2690 if (copy_to_user(argp, &info, data_len))
2691 break;
2692
2693 r = 0;
2694 break;
2695 }
2696 case KVM_PV_DUMP: {
2697 struct kvm_s390_pv_dmp dmp;
2698
2699 r = -EINVAL;
2700 if (!kvm_s390_pv_is_protected(kvm))
2701 break;
2702
2703 r = -EFAULT;
2704 if (copy_from_user(&dmp, argp, sizeof(dmp)))
2705 break;
2706
2707 r = kvm_s390_pv_dmp(kvm, cmd, dmp);
2708 if (r)
2709 break;
2710
2711 if (copy_to_user(argp, &dmp, sizeof(dmp))) {
2712 r = -EFAULT;
2713 break;
2714 }
2715
2716 break;
2717 }
2718 default:
2719 r = -ENOTTY;
2720 }
2721 return r;
2722 }
2723
access_key_invalid(u8 access_key)2724 static bool access_key_invalid(u8 access_key)
2725 {
2726 return access_key > 0xf;
2727 }
2728
kvm_s390_vm_mem_op(struct kvm * kvm,struct kvm_s390_mem_op * mop)2729 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2730 {
2731 void __user *uaddr = (void __user *)mop->buf;
2732 u64 supported_flags;
2733 void *tmpbuf = NULL;
2734 int r, srcu_idx;
2735
2736 supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
2737 | KVM_S390_MEMOP_F_CHECK_ONLY;
2738 if (mop->flags & ~supported_flags || !mop->size)
2739 return -EINVAL;
2740 if (mop->size > MEM_OP_MAX_SIZE)
2741 return -E2BIG;
2742 /*
2743 * This is technically a heuristic only, if the kvm->lock is not
2744 * taken, it is not guaranteed that the vm is/remains non-protected.
2745 * This is ok from a kernel perspective, wrongdoing is detected
2746 * on the access, -EFAULT is returned and the vm may crash the
2747 * next time it accesses the memory in question.
2748 * There is no sane usecase to do switching and a memop on two
2749 * different CPUs at the same time.
2750 */
2751 if (kvm_s390_pv_get_handle(kvm))
2752 return -EINVAL;
2753 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
2754 if (access_key_invalid(mop->key))
2755 return -EINVAL;
2756 } else {
2757 mop->key = 0;
2758 }
2759 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2760 tmpbuf = vmalloc(mop->size);
2761 if (!tmpbuf)
2762 return -ENOMEM;
2763 }
2764
2765 srcu_idx = srcu_read_lock(&kvm->srcu);
2766
2767 if (kvm_is_error_gpa(kvm, mop->gaddr)) {
2768 r = PGM_ADDRESSING;
2769 goto out_unlock;
2770 }
2771
2772 switch (mop->op) {
2773 case KVM_S390_MEMOP_ABSOLUTE_READ: {
2774 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2775 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
2776 } else {
2777 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2778 mop->size, GACC_FETCH, mop->key);
2779 if (r == 0) {
2780 if (copy_to_user(uaddr, tmpbuf, mop->size))
2781 r = -EFAULT;
2782 }
2783 }
2784 break;
2785 }
2786 case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
2787 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2788 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
2789 } else {
2790 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2791 r = -EFAULT;
2792 break;
2793 }
2794 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2795 mop->size, GACC_STORE, mop->key);
2796 }
2797 break;
2798 }
2799 default:
2800 r = -EINVAL;
2801 }
2802
2803 out_unlock:
2804 srcu_read_unlock(&kvm->srcu, srcu_idx);
2805
2806 vfree(tmpbuf);
2807 return r;
2808 }
2809
kvm_arch_vm_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)2810 long kvm_arch_vm_ioctl(struct file *filp,
2811 unsigned int ioctl, unsigned long arg)
2812 {
2813 struct kvm *kvm = filp->private_data;
2814 void __user *argp = (void __user *)arg;
2815 struct kvm_device_attr attr;
2816 int r;
2817
2818 switch (ioctl) {
2819 case KVM_S390_INTERRUPT: {
2820 struct kvm_s390_interrupt s390int;
2821
2822 r = -EFAULT;
2823 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2824 break;
2825 r = kvm_s390_inject_vm(kvm, &s390int);
2826 break;
2827 }
2828 case KVM_CREATE_IRQCHIP: {
2829 struct kvm_irq_routing_entry routing;
2830
2831 r = -EINVAL;
2832 if (kvm->arch.use_irqchip) {
2833 /* Set up dummy routing. */
2834 memset(&routing, 0, sizeof(routing));
2835 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2836 }
2837 break;
2838 }
2839 case KVM_SET_DEVICE_ATTR: {
2840 r = -EFAULT;
2841 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2842 break;
2843 r = kvm_s390_vm_set_attr(kvm, &attr);
2844 break;
2845 }
2846 case KVM_GET_DEVICE_ATTR: {
2847 r = -EFAULT;
2848 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2849 break;
2850 r = kvm_s390_vm_get_attr(kvm, &attr);
2851 break;
2852 }
2853 case KVM_HAS_DEVICE_ATTR: {
2854 r = -EFAULT;
2855 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2856 break;
2857 r = kvm_s390_vm_has_attr(kvm, &attr);
2858 break;
2859 }
2860 case KVM_S390_GET_SKEYS: {
2861 struct kvm_s390_skeys args;
2862
2863 r = -EFAULT;
2864 if (copy_from_user(&args, argp,
2865 sizeof(struct kvm_s390_skeys)))
2866 break;
2867 r = kvm_s390_get_skeys(kvm, &args);
2868 break;
2869 }
2870 case KVM_S390_SET_SKEYS: {
2871 struct kvm_s390_skeys args;
2872
2873 r = -EFAULT;
2874 if (copy_from_user(&args, argp,
2875 sizeof(struct kvm_s390_skeys)))
2876 break;
2877 r = kvm_s390_set_skeys(kvm, &args);
2878 break;
2879 }
2880 case KVM_S390_GET_CMMA_BITS: {
2881 struct kvm_s390_cmma_log args;
2882
2883 r = -EFAULT;
2884 if (copy_from_user(&args, argp, sizeof(args)))
2885 break;
2886 mutex_lock(&kvm->slots_lock);
2887 r = kvm_s390_get_cmma_bits(kvm, &args);
2888 mutex_unlock(&kvm->slots_lock);
2889 if (!r) {
2890 r = copy_to_user(argp, &args, sizeof(args));
2891 if (r)
2892 r = -EFAULT;
2893 }
2894 break;
2895 }
2896 case KVM_S390_SET_CMMA_BITS: {
2897 struct kvm_s390_cmma_log args;
2898
2899 r = -EFAULT;
2900 if (copy_from_user(&args, argp, sizeof(args)))
2901 break;
2902 mutex_lock(&kvm->slots_lock);
2903 r = kvm_s390_set_cmma_bits(kvm, &args);
2904 mutex_unlock(&kvm->slots_lock);
2905 break;
2906 }
2907 case KVM_S390_PV_COMMAND: {
2908 struct kvm_pv_cmd args;
2909
2910 /* protvirt means user cpu state */
2911 kvm_s390_set_user_cpu_state_ctrl(kvm);
2912 r = 0;
2913 if (!is_prot_virt_host()) {
2914 r = -EINVAL;
2915 break;
2916 }
2917 if (copy_from_user(&args, argp, sizeof(args))) {
2918 r = -EFAULT;
2919 break;
2920 }
2921 if (args.flags) {
2922 r = -EINVAL;
2923 break;
2924 }
2925 mutex_lock(&kvm->lock);
2926 r = kvm_s390_handle_pv(kvm, &args);
2927 mutex_unlock(&kvm->lock);
2928 if (copy_to_user(argp, &args, sizeof(args))) {
2929 r = -EFAULT;
2930 break;
2931 }
2932 break;
2933 }
2934 case KVM_S390_MEM_OP: {
2935 struct kvm_s390_mem_op mem_op;
2936
2937 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2938 r = kvm_s390_vm_mem_op(kvm, &mem_op);
2939 else
2940 r = -EFAULT;
2941 break;
2942 }
2943 case KVM_S390_ZPCI_OP: {
2944 struct kvm_s390_zpci_op args;
2945
2946 r = -EINVAL;
2947 if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
2948 break;
2949 if (copy_from_user(&args, argp, sizeof(args))) {
2950 r = -EFAULT;
2951 break;
2952 }
2953 r = kvm_s390_pci_zpci_op(kvm, &args);
2954 break;
2955 }
2956 default:
2957 r = -ENOTTY;
2958 }
2959
2960 return r;
2961 }
2962
kvm_s390_apxa_installed(void)2963 static int kvm_s390_apxa_installed(void)
2964 {
2965 struct ap_config_info info;
2966
2967 if (ap_instructions_available()) {
2968 if (ap_qci(&info) == 0)
2969 return info.apxa;
2970 }
2971
2972 return 0;
2973 }
2974
2975 /*
2976 * The format of the crypto control block (CRYCB) is specified in the 3 low
2977 * order bits of the CRYCB designation (CRYCBD) field as follows:
2978 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2979 * AP extended addressing (APXA) facility are installed.
2980 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2981 * Format 2: Both the APXA and MSAX3 facilities are installed
2982 */
kvm_s390_set_crycb_format(struct kvm * kvm)2983 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2984 {
2985 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2986
2987 /* Clear the CRYCB format bits - i.e., set format 0 by default */
2988 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2989
2990 /* Check whether MSAX3 is installed */
2991 if (!test_kvm_facility(kvm, 76))
2992 return;
2993
2994 if (kvm_s390_apxa_installed())
2995 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2996 else
2997 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2998 }
2999
3000 /*
3001 * kvm_arch_crypto_set_masks
3002 *
3003 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3004 * to be set.
3005 * @apm: the mask identifying the accessible AP adapters
3006 * @aqm: the mask identifying the accessible AP domains
3007 * @adm: the mask identifying the accessible AP control domains
3008 *
3009 * Set the masks that identify the adapters, domains and control domains to
3010 * which the KVM guest is granted access.
3011 *
3012 * Note: The kvm->lock mutex must be locked by the caller before invoking this
3013 * function.
3014 */
kvm_arch_crypto_set_masks(struct kvm * kvm,unsigned long * apm,unsigned long * aqm,unsigned long * adm)3015 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
3016 unsigned long *aqm, unsigned long *adm)
3017 {
3018 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
3019
3020 kvm_s390_vcpu_block_all(kvm);
3021
3022 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
3023 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
3024 memcpy(crycb->apcb1.apm, apm, 32);
3025 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
3026 apm[0], apm[1], apm[2], apm[3]);
3027 memcpy(crycb->apcb1.aqm, aqm, 32);
3028 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
3029 aqm[0], aqm[1], aqm[2], aqm[3]);
3030 memcpy(crycb->apcb1.adm, adm, 32);
3031 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
3032 adm[0], adm[1], adm[2], adm[3]);
3033 break;
3034 case CRYCB_FORMAT1:
3035 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
3036 memcpy(crycb->apcb0.apm, apm, 8);
3037 memcpy(crycb->apcb0.aqm, aqm, 2);
3038 memcpy(crycb->apcb0.adm, adm, 2);
3039 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
3040 apm[0], *((unsigned short *)aqm),
3041 *((unsigned short *)adm));
3042 break;
3043 default: /* Can not happen */
3044 break;
3045 }
3046
3047 /* recreate the shadow crycb for each vcpu */
3048 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3049 kvm_s390_vcpu_unblock_all(kvm);
3050 }
3051 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
3052
3053 /*
3054 * kvm_arch_crypto_clear_masks
3055 *
3056 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3057 * to be cleared.
3058 *
3059 * Clear the masks that identify the adapters, domains and control domains to
3060 * which the KVM guest is granted access.
3061 *
3062 * Note: The kvm->lock mutex must be locked by the caller before invoking this
3063 * function.
3064 */
kvm_arch_crypto_clear_masks(struct kvm * kvm)3065 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
3066 {
3067 kvm_s390_vcpu_block_all(kvm);
3068
3069 memset(&kvm->arch.crypto.crycb->apcb0, 0,
3070 sizeof(kvm->arch.crypto.crycb->apcb0));
3071 memset(&kvm->arch.crypto.crycb->apcb1, 0,
3072 sizeof(kvm->arch.crypto.crycb->apcb1));
3073
3074 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
3075 /* recreate the shadow crycb for each vcpu */
3076 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3077 kvm_s390_vcpu_unblock_all(kvm);
3078 }
3079 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
3080
kvm_s390_get_initial_cpuid(void)3081 static u64 kvm_s390_get_initial_cpuid(void)
3082 {
3083 struct cpuid cpuid;
3084
3085 get_cpu_id(&cpuid);
3086 cpuid.version = 0xff;
3087 return *((u64 *) &cpuid);
3088 }
3089
kvm_s390_crypto_init(struct kvm * kvm)3090 static void kvm_s390_crypto_init(struct kvm *kvm)
3091 {
3092 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
3093 kvm_s390_set_crycb_format(kvm);
3094 init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
3095
3096 if (!test_kvm_facility(kvm, 76))
3097 return;
3098
3099 /* Enable AES/DEA protected key functions by default */
3100 kvm->arch.crypto.aes_kw = 1;
3101 kvm->arch.crypto.dea_kw = 1;
3102 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
3103 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
3104 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
3105 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
3106 }
3107
sca_dispose(struct kvm * kvm)3108 static void sca_dispose(struct kvm *kvm)
3109 {
3110 if (kvm->arch.use_esca)
3111 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
3112 else
3113 free_page((unsigned long)(kvm->arch.sca));
3114 kvm->arch.sca = NULL;
3115 }
3116
kvm_arch_free_vm(struct kvm * kvm)3117 void kvm_arch_free_vm(struct kvm *kvm)
3118 {
3119 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
3120 kvm_s390_pci_clear_list(kvm);
3121
3122 __kvm_arch_free_vm(kvm);
3123 }
3124
kvm_arch_init_vm(struct kvm * kvm,unsigned long type)3125 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
3126 {
3127 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
3128 int i, rc;
3129 char debug_name[16];
3130 static unsigned long sca_offset;
3131
3132 rc = -EINVAL;
3133 #ifdef CONFIG_KVM_S390_UCONTROL
3134 if (type & ~KVM_VM_S390_UCONTROL)
3135 goto out_err;
3136 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
3137 goto out_err;
3138 #else
3139 if (type)
3140 goto out_err;
3141 #endif
3142
3143 rc = s390_enable_sie();
3144 if (rc)
3145 goto out_err;
3146
3147 rc = -ENOMEM;
3148
3149 if (!sclp.has_64bscao)
3150 alloc_flags |= GFP_DMA;
3151 rwlock_init(&kvm->arch.sca_lock);
3152 /* start with basic SCA */
3153 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
3154 if (!kvm->arch.sca)
3155 goto out_err;
3156 mutex_lock(&kvm_lock);
3157 sca_offset += 16;
3158 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
3159 sca_offset = 0;
3160 kvm->arch.sca = (struct bsca_block *)
3161 ((char *) kvm->arch.sca + sca_offset);
3162 mutex_unlock(&kvm_lock);
3163
3164 sprintf(debug_name, "kvm-%u", current->pid);
3165
3166 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
3167 if (!kvm->arch.dbf)
3168 goto out_err;
3169
3170 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
3171 kvm->arch.sie_page2 =
3172 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
3173 if (!kvm->arch.sie_page2)
3174 goto out_err;
3175
3176 kvm->arch.sie_page2->kvm = kvm;
3177 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
3178
3179 for (i = 0; i < kvm_s390_fac_size(); i++) {
3180 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
3181 (kvm_s390_fac_base[i] |
3182 kvm_s390_fac_ext[i]);
3183 kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
3184 kvm_s390_fac_base[i];
3185 }
3186 kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
3187
3188 /* we are always in czam mode - even on pre z14 machines */
3189 set_kvm_facility(kvm->arch.model.fac_mask, 138);
3190 set_kvm_facility(kvm->arch.model.fac_list, 138);
3191 /* we emulate STHYI in kvm */
3192 set_kvm_facility(kvm->arch.model.fac_mask, 74);
3193 set_kvm_facility(kvm->arch.model.fac_list, 74);
3194 if (MACHINE_HAS_TLB_GUEST) {
3195 set_kvm_facility(kvm->arch.model.fac_mask, 147);
3196 set_kvm_facility(kvm->arch.model.fac_list, 147);
3197 }
3198
3199 if (css_general_characteristics.aiv && test_facility(65))
3200 set_kvm_facility(kvm->arch.model.fac_mask, 65);
3201
3202 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
3203 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
3204
3205 kvm_s390_crypto_init(kvm);
3206
3207 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
3208 mutex_lock(&kvm->lock);
3209 kvm_s390_pci_init_list(kvm);
3210 kvm_s390_vcpu_pci_enable_interp(kvm);
3211 mutex_unlock(&kvm->lock);
3212 }
3213
3214 mutex_init(&kvm->arch.float_int.ais_lock);
3215 spin_lock_init(&kvm->arch.float_int.lock);
3216 for (i = 0; i < FIRQ_LIST_COUNT; i++)
3217 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
3218 init_waitqueue_head(&kvm->arch.ipte_wq);
3219 mutex_init(&kvm->arch.ipte_mutex);
3220
3221 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
3222 VM_EVENT(kvm, 3, "vm created with type %lu", type);
3223
3224 if (type & KVM_VM_S390_UCONTROL) {
3225 kvm->arch.gmap = NULL;
3226 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
3227 } else {
3228 if (sclp.hamax == U64_MAX)
3229 kvm->arch.mem_limit = TASK_SIZE_MAX;
3230 else
3231 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
3232 sclp.hamax + 1);
3233 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
3234 if (!kvm->arch.gmap)
3235 goto out_err;
3236 kvm->arch.gmap->private = kvm;
3237 kvm->arch.gmap->pfault_enabled = 0;
3238 }
3239
3240 kvm->arch.use_pfmfi = sclp.has_pfmfi;
3241 kvm->arch.use_skf = sclp.has_skey;
3242 spin_lock_init(&kvm->arch.start_stop_lock);
3243 kvm_s390_vsie_init(kvm);
3244 if (use_gisa)
3245 kvm_s390_gisa_init(kvm);
3246 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
3247
3248 return 0;
3249 out_err:
3250 free_page((unsigned long)kvm->arch.sie_page2);
3251 debug_unregister(kvm->arch.dbf);
3252 sca_dispose(kvm);
3253 KVM_EVENT(3, "creation of vm failed: %d", rc);
3254 return rc;
3255 }
3256
kvm_arch_vcpu_destroy(struct kvm_vcpu * vcpu)3257 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
3258 {
3259 u16 rc, rrc;
3260
3261 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
3262 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
3263 kvm_s390_clear_local_irqs(vcpu);
3264 kvm_clear_async_pf_completion_queue(vcpu);
3265 if (!kvm_is_ucontrol(vcpu->kvm))
3266 sca_del_vcpu(vcpu);
3267 kvm_s390_update_topology_change_report(vcpu->kvm, 1);
3268
3269 if (kvm_is_ucontrol(vcpu->kvm))
3270 gmap_remove(vcpu->arch.gmap);
3271
3272 if (vcpu->kvm->arch.use_cmma)
3273 kvm_s390_vcpu_unsetup_cmma(vcpu);
3274 /* We can not hold the vcpu mutex here, we are already dying */
3275 if (kvm_s390_pv_cpu_get_handle(vcpu))
3276 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
3277 free_page((unsigned long)(vcpu->arch.sie_block));
3278 }
3279
kvm_arch_destroy_vm(struct kvm * kvm)3280 void kvm_arch_destroy_vm(struct kvm *kvm)
3281 {
3282 u16 rc, rrc;
3283
3284 kvm_destroy_vcpus(kvm);
3285 sca_dispose(kvm);
3286 kvm_s390_gisa_destroy(kvm);
3287 /*
3288 * We are already at the end of life and kvm->lock is not taken.
3289 * This is ok as the file descriptor is closed by now and nobody
3290 * can mess with the pv state. To avoid lockdep_assert_held from
3291 * complaining we do not use kvm_s390_pv_is_protected.
3292 */
3293 if (kvm_s390_pv_get_handle(kvm))
3294 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
3295 /*
3296 * Remove the mmu notifier only when the whole KVM VM is torn down,
3297 * and only if one was registered to begin with. If the VM is
3298 * currently not protected, but has been previously been protected,
3299 * then it's possible that the notifier is still registered.
3300 */
3301 if (kvm->arch.pv.mmu_notifier.ops)
3302 mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm);
3303
3304 debug_unregister(kvm->arch.dbf);
3305 free_page((unsigned long)kvm->arch.sie_page2);
3306 if (!kvm_is_ucontrol(kvm))
3307 gmap_remove(kvm->arch.gmap);
3308 kvm_s390_destroy_adapters(kvm);
3309 kvm_s390_clear_float_irqs(kvm);
3310 kvm_s390_vsie_destroy(kvm);
3311 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
3312 }
3313
3314 /* Section: vcpu related */
__kvm_ucontrol_vcpu_init(struct kvm_vcpu * vcpu)3315 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
3316 {
3317 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
3318 if (!vcpu->arch.gmap)
3319 return -ENOMEM;
3320 vcpu->arch.gmap->private = vcpu->kvm;
3321
3322 return 0;
3323 }
3324
sca_del_vcpu(struct kvm_vcpu * vcpu)3325 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
3326 {
3327 if (!kvm_s390_use_sca_entries())
3328 return;
3329 read_lock(&vcpu->kvm->arch.sca_lock);
3330 if (vcpu->kvm->arch.use_esca) {
3331 struct esca_block *sca = vcpu->kvm->arch.sca;
3332
3333 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3334 sca->cpu[vcpu->vcpu_id].sda = 0;
3335 } else {
3336 struct bsca_block *sca = vcpu->kvm->arch.sca;
3337
3338 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3339 sca->cpu[vcpu->vcpu_id].sda = 0;
3340 }
3341 read_unlock(&vcpu->kvm->arch.sca_lock);
3342 }
3343
sca_add_vcpu(struct kvm_vcpu * vcpu)3344 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
3345 {
3346 if (!kvm_s390_use_sca_entries()) {
3347 struct bsca_block *sca = vcpu->kvm->arch.sca;
3348
3349 /* we still need the basic sca for the ipte control */
3350 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
3351 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
3352 return;
3353 }
3354 read_lock(&vcpu->kvm->arch.sca_lock);
3355 if (vcpu->kvm->arch.use_esca) {
3356 struct esca_block *sca = vcpu->kvm->arch.sca;
3357
3358 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
3359 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
3360 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
3361 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3362 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3363 } else {
3364 struct bsca_block *sca = vcpu->kvm->arch.sca;
3365
3366 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
3367 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
3368 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
3369 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3370 }
3371 read_unlock(&vcpu->kvm->arch.sca_lock);
3372 }
3373
3374 /* Basic SCA to Extended SCA data copy routines */
sca_copy_entry(struct esca_entry * d,struct bsca_entry * s)3375 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
3376 {
3377 d->sda = s->sda;
3378 d->sigp_ctrl.c = s->sigp_ctrl.c;
3379 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
3380 }
3381
sca_copy_b_to_e(struct esca_block * d,struct bsca_block * s)3382 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
3383 {
3384 int i;
3385
3386 d->ipte_control = s->ipte_control;
3387 d->mcn[0] = s->mcn;
3388 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
3389 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
3390 }
3391
sca_switch_to_extended(struct kvm * kvm)3392 static int sca_switch_to_extended(struct kvm *kvm)
3393 {
3394 struct bsca_block *old_sca = kvm->arch.sca;
3395 struct esca_block *new_sca;
3396 struct kvm_vcpu *vcpu;
3397 unsigned long vcpu_idx;
3398 u32 scaol, scaoh;
3399
3400 if (kvm->arch.use_esca)
3401 return 0;
3402
3403 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
3404 if (!new_sca)
3405 return -ENOMEM;
3406
3407 scaoh = (u32)((u64)(new_sca) >> 32);
3408 scaol = (u32)(u64)(new_sca) & ~0x3fU;
3409
3410 kvm_s390_vcpu_block_all(kvm);
3411 write_lock(&kvm->arch.sca_lock);
3412
3413 sca_copy_b_to_e(new_sca, old_sca);
3414
3415 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
3416 vcpu->arch.sie_block->scaoh = scaoh;
3417 vcpu->arch.sie_block->scaol = scaol;
3418 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3419 }
3420 kvm->arch.sca = new_sca;
3421 kvm->arch.use_esca = 1;
3422
3423 write_unlock(&kvm->arch.sca_lock);
3424 kvm_s390_vcpu_unblock_all(kvm);
3425
3426 free_page((unsigned long)old_sca);
3427
3428 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
3429 old_sca, kvm->arch.sca);
3430 return 0;
3431 }
3432
sca_can_add_vcpu(struct kvm * kvm,unsigned int id)3433 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3434 {
3435 int rc;
3436
3437 if (!kvm_s390_use_sca_entries()) {
3438 if (id < KVM_MAX_VCPUS)
3439 return true;
3440 return false;
3441 }
3442 if (id < KVM_S390_BSCA_CPU_SLOTS)
3443 return true;
3444 if (!sclp.has_esca || !sclp.has_64bscao)
3445 return false;
3446
3447 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3448
3449 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3450 }
3451
3452 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__start_cpu_timer_accounting(struct kvm_vcpu * vcpu)3453 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3454 {
3455 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3456 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3457 vcpu->arch.cputm_start = get_tod_clock_fast();
3458 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3459 }
3460
3461 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__stop_cpu_timer_accounting(struct kvm_vcpu * vcpu)3462 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3463 {
3464 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3465 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3466 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3467 vcpu->arch.cputm_start = 0;
3468 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3469 }
3470
3471 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3472 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3473 {
3474 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3475 vcpu->arch.cputm_enabled = true;
3476 __start_cpu_timer_accounting(vcpu);
3477 }
3478
3479 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3480 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3481 {
3482 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3483 __stop_cpu_timer_accounting(vcpu);
3484 vcpu->arch.cputm_enabled = false;
3485 }
3486
enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3487 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3488 {
3489 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3490 __enable_cpu_timer_accounting(vcpu);
3491 preempt_enable();
3492 }
3493
disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3494 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3495 {
3496 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3497 __disable_cpu_timer_accounting(vcpu);
3498 preempt_enable();
3499 }
3500
3501 /* set the cpu timer - may only be called from the VCPU thread itself */
kvm_s390_set_cpu_timer(struct kvm_vcpu * vcpu,__u64 cputm)3502 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3503 {
3504 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3505 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3506 if (vcpu->arch.cputm_enabled)
3507 vcpu->arch.cputm_start = get_tod_clock_fast();
3508 vcpu->arch.sie_block->cputm = cputm;
3509 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3510 preempt_enable();
3511 }
3512
3513 /* update and get the cpu timer - can also be called from other VCPU threads */
kvm_s390_get_cpu_timer(struct kvm_vcpu * vcpu)3514 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3515 {
3516 unsigned int seq;
3517 __u64 value;
3518
3519 if (unlikely(!vcpu->arch.cputm_enabled))
3520 return vcpu->arch.sie_block->cputm;
3521
3522 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3523 do {
3524 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3525 /*
3526 * If the writer would ever execute a read in the critical
3527 * section, e.g. in irq context, we have a deadlock.
3528 */
3529 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3530 value = vcpu->arch.sie_block->cputm;
3531 /* if cputm_start is 0, accounting is being started/stopped */
3532 if (likely(vcpu->arch.cputm_start))
3533 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3534 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3535 preempt_enable();
3536 return value;
3537 }
3538
kvm_arch_vcpu_load(struct kvm_vcpu * vcpu,int cpu)3539 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3540 {
3541
3542 gmap_enable(vcpu->arch.enabled_gmap);
3543 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3544 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3545 __start_cpu_timer_accounting(vcpu);
3546 vcpu->cpu = cpu;
3547 }
3548
kvm_arch_vcpu_put(struct kvm_vcpu * vcpu)3549 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3550 {
3551 vcpu->cpu = -1;
3552 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3553 __stop_cpu_timer_accounting(vcpu);
3554 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3555 vcpu->arch.enabled_gmap = gmap_get_enabled();
3556 gmap_disable(vcpu->arch.enabled_gmap);
3557
3558 }
3559
kvm_arch_vcpu_postcreate(struct kvm_vcpu * vcpu)3560 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3561 {
3562 mutex_lock(&vcpu->kvm->lock);
3563 preempt_disable();
3564 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3565 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3566 preempt_enable();
3567 mutex_unlock(&vcpu->kvm->lock);
3568 if (!kvm_is_ucontrol(vcpu->kvm)) {
3569 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3570 sca_add_vcpu(vcpu);
3571 }
3572 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3573 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3574 /* make vcpu_load load the right gmap on the first trigger */
3575 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3576 }
3577
kvm_has_pckmo_subfunc(struct kvm * kvm,unsigned long nr)3578 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3579 {
3580 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3581 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3582 return true;
3583 return false;
3584 }
3585
kvm_has_pckmo_ecc(struct kvm * kvm)3586 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3587 {
3588 /* At least one ECC subfunction must be present */
3589 return kvm_has_pckmo_subfunc(kvm, 32) ||
3590 kvm_has_pckmo_subfunc(kvm, 33) ||
3591 kvm_has_pckmo_subfunc(kvm, 34) ||
3592 kvm_has_pckmo_subfunc(kvm, 40) ||
3593 kvm_has_pckmo_subfunc(kvm, 41);
3594
3595 }
3596
kvm_s390_vcpu_crypto_setup(struct kvm_vcpu * vcpu)3597 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3598 {
3599 /*
3600 * If the AP instructions are not being interpreted and the MSAX3
3601 * facility is not configured for the guest, there is nothing to set up.
3602 */
3603 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3604 return;
3605
3606 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3607 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3608 vcpu->arch.sie_block->eca &= ~ECA_APIE;
3609 vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3610
3611 if (vcpu->kvm->arch.crypto.apie)
3612 vcpu->arch.sie_block->eca |= ECA_APIE;
3613
3614 /* Set up protected key support */
3615 if (vcpu->kvm->arch.crypto.aes_kw) {
3616 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3617 /* ecc is also wrapped with AES key */
3618 if (kvm_has_pckmo_ecc(vcpu->kvm))
3619 vcpu->arch.sie_block->ecd |= ECD_ECC;
3620 }
3621
3622 if (vcpu->kvm->arch.crypto.dea_kw)
3623 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3624 }
3625
kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu * vcpu)3626 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3627 {
3628 free_page(vcpu->arch.sie_block->cbrlo);
3629 vcpu->arch.sie_block->cbrlo = 0;
3630 }
3631
kvm_s390_vcpu_setup_cmma(struct kvm_vcpu * vcpu)3632 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3633 {
3634 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3635 if (!vcpu->arch.sie_block->cbrlo)
3636 return -ENOMEM;
3637 return 0;
3638 }
3639
kvm_s390_vcpu_setup_model(struct kvm_vcpu * vcpu)3640 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3641 {
3642 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3643
3644 vcpu->arch.sie_block->ibc = model->ibc;
3645 if (test_kvm_facility(vcpu->kvm, 7))
3646 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3647 }
3648
kvm_s390_vcpu_setup(struct kvm_vcpu * vcpu)3649 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3650 {
3651 int rc = 0;
3652 u16 uvrc, uvrrc;
3653
3654 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3655 CPUSTAT_SM |
3656 CPUSTAT_STOPPED);
3657
3658 if (test_kvm_facility(vcpu->kvm, 78))
3659 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3660 else if (test_kvm_facility(vcpu->kvm, 8))
3661 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3662
3663 kvm_s390_vcpu_setup_model(vcpu);
3664
3665 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3666 if (MACHINE_HAS_ESOP)
3667 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3668 if (test_kvm_facility(vcpu->kvm, 9))
3669 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3670 if (test_kvm_facility(vcpu->kvm, 11))
3671 vcpu->arch.sie_block->ecb |= ECB_PTF;
3672 if (test_kvm_facility(vcpu->kvm, 73))
3673 vcpu->arch.sie_block->ecb |= ECB_TE;
3674 if (!kvm_is_ucontrol(vcpu->kvm))
3675 vcpu->arch.sie_block->ecb |= ECB_SPECI;
3676
3677 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3678 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3679 if (test_kvm_facility(vcpu->kvm, 130))
3680 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3681 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3682 if (sclp.has_cei)
3683 vcpu->arch.sie_block->eca |= ECA_CEI;
3684 if (sclp.has_ib)
3685 vcpu->arch.sie_block->eca |= ECA_IB;
3686 if (sclp.has_siif)
3687 vcpu->arch.sie_block->eca |= ECA_SII;
3688 if (sclp.has_sigpif)
3689 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3690 if (test_kvm_facility(vcpu->kvm, 129)) {
3691 vcpu->arch.sie_block->eca |= ECA_VX;
3692 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3693 }
3694 if (test_kvm_facility(vcpu->kvm, 139))
3695 vcpu->arch.sie_block->ecd |= ECD_MEF;
3696 if (test_kvm_facility(vcpu->kvm, 156))
3697 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3698 if (vcpu->arch.sie_block->gd) {
3699 vcpu->arch.sie_block->eca |= ECA_AIV;
3700 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3701 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3702 }
3703 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3704 | SDNXC;
3705 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3706
3707 if (sclp.has_kss)
3708 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3709 else
3710 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3711
3712 if (vcpu->kvm->arch.use_cmma) {
3713 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3714 if (rc)
3715 return rc;
3716 }
3717 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3718 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3719
3720 vcpu->arch.sie_block->hpid = HPID_KVM;
3721
3722 kvm_s390_vcpu_crypto_setup(vcpu);
3723
3724 kvm_s390_vcpu_pci_setup(vcpu);
3725
3726 mutex_lock(&vcpu->kvm->lock);
3727 if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3728 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3729 if (rc)
3730 kvm_s390_vcpu_unsetup_cmma(vcpu);
3731 }
3732 mutex_unlock(&vcpu->kvm->lock);
3733
3734 return rc;
3735 }
3736
kvm_arch_vcpu_precreate(struct kvm * kvm,unsigned int id)3737 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3738 {
3739 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3740 return -EINVAL;
3741 return 0;
3742 }
3743
kvm_arch_vcpu_create(struct kvm_vcpu * vcpu)3744 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3745 {
3746 struct sie_page *sie_page;
3747 int rc;
3748
3749 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3750 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3751 if (!sie_page)
3752 return -ENOMEM;
3753
3754 vcpu->arch.sie_block = &sie_page->sie_block;
3755 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3756
3757 /* the real guest size will always be smaller than msl */
3758 vcpu->arch.sie_block->mso = 0;
3759 vcpu->arch.sie_block->msl = sclp.hamax;
3760
3761 vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3762 spin_lock_init(&vcpu->arch.local_int.lock);
3763 vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
3764 seqcount_init(&vcpu->arch.cputm_seqcount);
3765
3766 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3767 kvm_clear_async_pf_completion_queue(vcpu);
3768 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3769 KVM_SYNC_GPRS |
3770 KVM_SYNC_ACRS |
3771 KVM_SYNC_CRS |
3772 KVM_SYNC_ARCH0 |
3773 KVM_SYNC_PFAULT |
3774 KVM_SYNC_DIAG318;
3775 kvm_s390_set_prefix(vcpu, 0);
3776 if (test_kvm_facility(vcpu->kvm, 64))
3777 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3778 if (test_kvm_facility(vcpu->kvm, 82))
3779 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3780 if (test_kvm_facility(vcpu->kvm, 133))
3781 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3782 if (test_kvm_facility(vcpu->kvm, 156))
3783 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3784 /* fprs can be synchronized via vrs, even if the guest has no vx. With
3785 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3786 */
3787 if (MACHINE_HAS_VX)
3788 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3789 else
3790 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3791
3792 if (kvm_is_ucontrol(vcpu->kvm)) {
3793 rc = __kvm_ucontrol_vcpu_init(vcpu);
3794 if (rc)
3795 goto out_free_sie_block;
3796 }
3797
3798 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3799 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3800 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3801
3802 rc = kvm_s390_vcpu_setup(vcpu);
3803 if (rc)
3804 goto out_ucontrol_uninit;
3805
3806 kvm_s390_update_topology_change_report(vcpu->kvm, 1);
3807 return 0;
3808
3809 out_ucontrol_uninit:
3810 if (kvm_is_ucontrol(vcpu->kvm))
3811 gmap_remove(vcpu->arch.gmap);
3812 out_free_sie_block:
3813 free_page((unsigned long)(vcpu->arch.sie_block));
3814 return rc;
3815 }
3816
kvm_arch_vcpu_runnable(struct kvm_vcpu * vcpu)3817 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3818 {
3819 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3820 return kvm_s390_vcpu_has_irq(vcpu, 0);
3821 }
3822
kvm_arch_vcpu_in_kernel(struct kvm_vcpu * vcpu)3823 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3824 {
3825 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3826 }
3827
kvm_s390_vcpu_block(struct kvm_vcpu * vcpu)3828 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3829 {
3830 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3831 exit_sie(vcpu);
3832 }
3833
kvm_s390_vcpu_unblock(struct kvm_vcpu * vcpu)3834 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3835 {
3836 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3837 }
3838
kvm_s390_vcpu_request(struct kvm_vcpu * vcpu)3839 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3840 {
3841 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3842 exit_sie(vcpu);
3843 }
3844
kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu * vcpu)3845 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3846 {
3847 return atomic_read(&vcpu->arch.sie_block->prog20) &
3848 (PROG_BLOCK_SIE | PROG_REQUEST);
3849 }
3850
kvm_s390_vcpu_request_handled(struct kvm_vcpu * vcpu)3851 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3852 {
3853 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3854 }
3855
3856 /*
3857 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3858 * If the CPU is not running (e.g. waiting as idle) the function will
3859 * return immediately. */
exit_sie(struct kvm_vcpu * vcpu)3860 void exit_sie(struct kvm_vcpu *vcpu)
3861 {
3862 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3863 kvm_s390_vsie_kick(vcpu);
3864 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3865 cpu_relax();
3866 }
3867
3868 /* Kick a guest cpu out of SIE to process a request synchronously */
kvm_s390_sync_request(int req,struct kvm_vcpu * vcpu)3869 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3870 {
3871 __kvm_make_request(req, vcpu);
3872 kvm_s390_vcpu_request(vcpu);
3873 }
3874
kvm_gmap_notifier(struct gmap * gmap,unsigned long start,unsigned long end)3875 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3876 unsigned long end)
3877 {
3878 struct kvm *kvm = gmap->private;
3879 struct kvm_vcpu *vcpu;
3880 unsigned long prefix;
3881 unsigned long i;
3882
3883 if (gmap_is_shadow(gmap))
3884 return;
3885 if (start >= 1UL << 31)
3886 /* We are only interested in prefix pages */
3887 return;
3888 kvm_for_each_vcpu(i, vcpu, kvm) {
3889 /* match against both prefix pages */
3890 prefix = kvm_s390_get_prefix(vcpu);
3891 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3892 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3893 start, end);
3894 kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
3895 }
3896 }
3897 }
3898
kvm_arch_no_poll(struct kvm_vcpu * vcpu)3899 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3900 {
3901 /* do not poll with more than halt_poll_max_steal percent of steal time */
3902 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3903 READ_ONCE(halt_poll_max_steal)) {
3904 vcpu->stat.halt_no_poll_steal++;
3905 return true;
3906 }
3907 return false;
3908 }
3909
kvm_arch_vcpu_should_kick(struct kvm_vcpu * vcpu)3910 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3911 {
3912 /* kvm common code refers to this, but never calls it */
3913 BUG();
3914 return 0;
3915 }
3916
kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)3917 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3918 struct kvm_one_reg *reg)
3919 {
3920 int r = -EINVAL;
3921
3922 switch (reg->id) {
3923 case KVM_REG_S390_TODPR:
3924 r = put_user(vcpu->arch.sie_block->todpr,
3925 (u32 __user *)reg->addr);
3926 break;
3927 case KVM_REG_S390_EPOCHDIFF:
3928 r = put_user(vcpu->arch.sie_block->epoch,
3929 (u64 __user *)reg->addr);
3930 break;
3931 case KVM_REG_S390_CPU_TIMER:
3932 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3933 (u64 __user *)reg->addr);
3934 break;
3935 case KVM_REG_S390_CLOCK_COMP:
3936 r = put_user(vcpu->arch.sie_block->ckc,
3937 (u64 __user *)reg->addr);
3938 break;
3939 case KVM_REG_S390_PFTOKEN:
3940 r = put_user(vcpu->arch.pfault_token,
3941 (u64 __user *)reg->addr);
3942 break;
3943 case KVM_REG_S390_PFCOMPARE:
3944 r = put_user(vcpu->arch.pfault_compare,
3945 (u64 __user *)reg->addr);
3946 break;
3947 case KVM_REG_S390_PFSELECT:
3948 r = put_user(vcpu->arch.pfault_select,
3949 (u64 __user *)reg->addr);
3950 break;
3951 case KVM_REG_S390_PP:
3952 r = put_user(vcpu->arch.sie_block->pp,
3953 (u64 __user *)reg->addr);
3954 break;
3955 case KVM_REG_S390_GBEA:
3956 r = put_user(vcpu->arch.sie_block->gbea,
3957 (u64 __user *)reg->addr);
3958 break;
3959 default:
3960 break;
3961 }
3962
3963 return r;
3964 }
3965
kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)3966 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3967 struct kvm_one_reg *reg)
3968 {
3969 int r = -EINVAL;
3970 __u64 val;
3971
3972 switch (reg->id) {
3973 case KVM_REG_S390_TODPR:
3974 r = get_user(vcpu->arch.sie_block->todpr,
3975 (u32 __user *)reg->addr);
3976 break;
3977 case KVM_REG_S390_EPOCHDIFF:
3978 r = get_user(vcpu->arch.sie_block->epoch,
3979 (u64 __user *)reg->addr);
3980 break;
3981 case KVM_REG_S390_CPU_TIMER:
3982 r = get_user(val, (u64 __user *)reg->addr);
3983 if (!r)
3984 kvm_s390_set_cpu_timer(vcpu, val);
3985 break;
3986 case KVM_REG_S390_CLOCK_COMP:
3987 r = get_user(vcpu->arch.sie_block->ckc,
3988 (u64 __user *)reg->addr);
3989 break;
3990 case KVM_REG_S390_PFTOKEN:
3991 r = get_user(vcpu->arch.pfault_token,
3992 (u64 __user *)reg->addr);
3993 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3994 kvm_clear_async_pf_completion_queue(vcpu);
3995 break;
3996 case KVM_REG_S390_PFCOMPARE:
3997 r = get_user(vcpu->arch.pfault_compare,
3998 (u64 __user *)reg->addr);
3999 break;
4000 case KVM_REG_S390_PFSELECT:
4001 r = get_user(vcpu->arch.pfault_select,
4002 (u64 __user *)reg->addr);
4003 break;
4004 case KVM_REG_S390_PP:
4005 r = get_user(vcpu->arch.sie_block->pp,
4006 (u64 __user *)reg->addr);
4007 break;
4008 case KVM_REG_S390_GBEA:
4009 r = get_user(vcpu->arch.sie_block->gbea,
4010 (u64 __user *)reg->addr);
4011 break;
4012 default:
4013 break;
4014 }
4015
4016 return r;
4017 }
4018
kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu * vcpu)4019 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
4020 {
4021 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
4022 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
4023 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
4024
4025 kvm_clear_async_pf_completion_queue(vcpu);
4026 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
4027 kvm_s390_vcpu_stop(vcpu);
4028 kvm_s390_clear_local_irqs(vcpu);
4029 }
4030
kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu * vcpu)4031 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
4032 {
4033 /* Initial reset is a superset of the normal reset */
4034 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4035
4036 /*
4037 * This equals initial cpu reset in pop, but we don't switch to ESA.
4038 * We do not only reset the internal data, but also ...
4039 */
4040 vcpu->arch.sie_block->gpsw.mask = 0;
4041 vcpu->arch.sie_block->gpsw.addr = 0;
4042 kvm_s390_set_prefix(vcpu, 0);
4043 kvm_s390_set_cpu_timer(vcpu, 0);
4044 vcpu->arch.sie_block->ckc = 0;
4045 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
4046 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
4047 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
4048
4049 /* ... the data in sync regs */
4050 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
4051 vcpu->run->s.regs.ckc = 0;
4052 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
4053 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
4054 vcpu->run->psw_addr = 0;
4055 vcpu->run->psw_mask = 0;
4056 vcpu->run->s.regs.todpr = 0;
4057 vcpu->run->s.regs.cputm = 0;
4058 vcpu->run->s.regs.ckc = 0;
4059 vcpu->run->s.regs.pp = 0;
4060 vcpu->run->s.regs.gbea = 1;
4061 vcpu->run->s.regs.fpc = 0;
4062 /*
4063 * Do not reset these registers in the protected case, as some of
4064 * them are overlayed and they are not accessible in this case
4065 * anyway.
4066 */
4067 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4068 vcpu->arch.sie_block->gbea = 1;
4069 vcpu->arch.sie_block->pp = 0;
4070 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4071 vcpu->arch.sie_block->todpr = 0;
4072 }
4073 }
4074
kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu * vcpu)4075 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
4076 {
4077 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
4078
4079 /* Clear reset is a superset of the initial reset */
4080 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4081
4082 memset(®s->gprs, 0, sizeof(regs->gprs));
4083 memset(®s->vrs, 0, sizeof(regs->vrs));
4084 memset(®s->acrs, 0, sizeof(regs->acrs));
4085 memset(®s->gscb, 0, sizeof(regs->gscb));
4086
4087 regs->etoken = 0;
4088 regs->etoken_extension = 0;
4089 }
4090
kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)4091 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4092 {
4093 vcpu_load(vcpu);
4094 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
4095 vcpu_put(vcpu);
4096 return 0;
4097 }
4098
kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)4099 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4100 {
4101 vcpu_load(vcpu);
4102 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
4103 vcpu_put(vcpu);
4104 return 0;
4105 }
4106
kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)4107 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4108 struct kvm_sregs *sregs)
4109 {
4110 vcpu_load(vcpu);
4111
4112 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
4113 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
4114
4115 vcpu_put(vcpu);
4116 return 0;
4117 }
4118
kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)4119 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4120 struct kvm_sregs *sregs)
4121 {
4122 vcpu_load(vcpu);
4123
4124 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
4125 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
4126
4127 vcpu_put(vcpu);
4128 return 0;
4129 }
4130
kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)4131 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4132 {
4133 int ret = 0;
4134
4135 vcpu_load(vcpu);
4136
4137 if (test_fp_ctl(fpu->fpc)) {
4138 ret = -EINVAL;
4139 goto out;
4140 }
4141 vcpu->run->s.regs.fpc = fpu->fpc;
4142 if (MACHINE_HAS_VX)
4143 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
4144 (freg_t *) fpu->fprs);
4145 else
4146 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
4147
4148 out:
4149 vcpu_put(vcpu);
4150 return ret;
4151 }
4152
kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)4153 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4154 {
4155 vcpu_load(vcpu);
4156
4157 /* make sure we have the latest values */
4158 save_fpu_regs();
4159 if (MACHINE_HAS_VX)
4160 convert_vx_to_fp((freg_t *) fpu->fprs,
4161 (__vector128 *) vcpu->run->s.regs.vrs);
4162 else
4163 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
4164 fpu->fpc = vcpu->run->s.regs.fpc;
4165
4166 vcpu_put(vcpu);
4167 return 0;
4168 }
4169
kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu * vcpu,psw_t psw)4170 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
4171 {
4172 int rc = 0;
4173
4174 if (!is_vcpu_stopped(vcpu))
4175 rc = -EBUSY;
4176 else {
4177 vcpu->run->psw_mask = psw.mask;
4178 vcpu->run->psw_addr = psw.addr;
4179 }
4180 return rc;
4181 }
4182
kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu * vcpu,struct kvm_translation * tr)4183 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
4184 struct kvm_translation *tr)
4185 {
4186 return -EINVAL; /* not implemented yet */
4187 }
4188
4189 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
4190 KVM_GUESTDBG_USE_HW_BP | \
4191 KVM_GUESTDBG_ENABLE)
4192
kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu * vcpu,struct kvm_guest_debug * dbg)4193 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
4194 struct kvm_guest_debug *dbg)
4195 {
4196 int rc = 0;
4197
4198 vcpu_load(vcpu);
4199
4200 vcpu->guest_debug = 0;
4201 kvm_s390_clear_bp_data(vcpu);
4202
4203 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
4204 rc = -EINVAL;
4205 goto out;
4206 }
4207 if (!sclp.has_gpere) {
4208 rc = -EINVAL;
4209 goto out;
4210 }
4211
4212 if (dbg->control & KVM_GUESTDBG_ENABLE) {
4213 vcpu->guest_debug = dbg->control;
4214 /* enforce guest PER */
4215 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
4216
4217 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
4218 rc = kvm_s390_import_bp_data(vcpu, dbg);
4219 } else {
4220 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4221 vcpu->arch.guestdbg.last_bp = 0;
4222 }
4223
4224 if (rc) {
4225 vcpu->guest_debug = 0;
4226 kvm_s390_clear_bp_data(vcpu);
4227 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4228 }
4229
4230 out:
4231 vcpu_put(vcpu);
4232 return rc;
4233 }
4234
kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)4235 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
4236 struct kvm_mp_state *mp_state)
4237 {
4238 int ret;
4239
4240 vcpu_load(vcpu);
4241
4242 /* CHECK_STOP and LOAD are not supported yet */
4243 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
4244 KVM_MP_STATE_OPERATING;
4245
4246 vcpu_put(vcpu);
4247 return ret;
4248 }
4249
kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)4250 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
4251 struct kvm_mp_state *mp_state)
4252 {
4253 int rc = 0;
4254
4255 vcpu_load(vcpu);
4256
4257 /* user space knows about this interface - let it control the state */
4258 kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
4259
4260 switch (mp_state->mp_state) {
4261 case KVM_MP_STATE_STOPPED:
4262 rc = kvm_s390_vcpu_stop(vcpu);
4263 break;
4264 case KVM_MP_STATE_OPERATING:
4265 rc = kvm_s390_vcpu_start(vcpu);
4266 break;
4267 case KVM_MP_STATE_LOAD:
4268 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4269 rc = -ENXIO;
4270 break;
4271 }
4272 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
4273 break;
4274 case KVM_MP_STATE_CHECK_STOP:
4275 fallthrough; /* CHECK_STOP and LOAD are not supported yet */
4276 default:
4277 rc = -ENXIO;
4278 }
4279
4280 vcpu_put(vcpu);
4281 return rc;
4282 }
4283
ibs_enabled(struct kvm_vcpu * vcpu)4284 static bool ibs_enabled(struct kvm_vcpu *vcpu)
4285 {
4286 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
4287 }
4288
kvm_s390_handle_requests(struct kvm_vcpu * vcpu)4289 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
4290 {
4291 retry:
4292 kvm_s390_vcpu_request_handled(vcpu);
4293 if (!kvm_request_pending(vcpu))
4294 return 0;
4295 /*
4296 * If the guest prefix changed, re-arm the ipte notifier for the
4297 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
4298 * This ensures that the ipte instruction for this request has
4299 * already finished. We might race against a second unmapper that
4300 * wants to set the blocking bit. Lets just retry the request loop.
4301 */
4302 if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
4303 int rc;
4304 rc = gmap_mprotect_notify(vcpu->arch.gmap,
4305 kvm_s390_get_prefix(vcpu),
4306 PAGE_SIZE * 2, PROT_WRITE);
4307 if (rc) {
4308 kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
4309 return rc;
4310 }
4311 goto retry;
4312 }
4313
4314 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
4315 vcpu->arch.sie_block->ihcpu = 0xffff;
4316 goto retry;
4317 }
4318
4319 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
4320 if (!ibs_enabled(vcpu)) {
4321 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
4322 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
4323 }
4324 goto retry;
4325 }
4326
4327 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
4328 if (ibs_enabled(vcpu)) {
4329 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
4330 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
4331 }
4332 goto retry;
4333 }
4334
4335 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
4336 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
4337 goto retry;
4338 }
4339
4340 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
4341 /*
4342 * Disable CMM virtualization; we will emulate the ESSA
4343 * instruction manually, in order to provide additional
4344 * functionalities needed for live migration.
4345 */
4346 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
4347 goto retry;
4348 }
4349
4350 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
4351 /*
4352 * Re-enable CMM virtualization if CMMA is available and
4353 * CMM has been used.
4354 */
4355 if ((vcpu->kvm->arch.use_cmma) &&
4356 (vcpu->kvm->mm->context.uses_cmm))
4357 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
4358 goto retry;
4359 }
4360
4361 /* we left the vsie handler, nothing to do, just clear the request */
4362 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
4363
4364 return 0;
4365 }
4366
__kvm_s390_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)4367 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4368 {
4369 struct kvm_vcpu *vcpu;
4370 union tod_clock clk;
4371 unsigned long i;
4372
4373 preempt_disable();
4374
4375 store_tod_clock_ext(&clk);
4376
4377 kvm->arch.epoch = gtod->tod - clk.tod;
4378 kvm->arch.epdx = 0;
4379 if (test_kvm_facility(kvm, 139)) {
4380 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
4381 if (kvm->arch.epoch > gtod->tod)
4382 kvm->arch.epdx -= 1;
4383 }
4384
4385 kvm_s390_vcpu_block_all(kvm);
4386 kvm_for_each_vcpu(i, vcpu, kvm) {
4387 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
4388 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
4389 }
4390
4391 kvm_s390_vcpu_unblock_all(kvm);
4392 preempt_enable();
4393 }
4394
kvm_s390_try_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)4395 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4396 {
4397 if (!mutex_trylock(&kvm->lock))
4398 return 0;
4399 __kvm_s390_set_tod_clock(kvm, gtod);
4400 mutex_unlock(&kvm->lock);
4401 return 1;
4402 }
4403
4404 /**
4405 * kvm_arch_fault_in_page - fault-in guest page if necessary
4406 * @vcpu: The corresponding virtual cpu
4407 * @gpa: Guest physical address
4408 * @writable: Whether the page should be writable or not
4409 *
4410 * Make sure that a guest page has been faulted-in on the host.
4411 *
4412 * Return: Zero on success, negative error code otherwise.
4413 */
kvm_arch_fault_in_page(struct kvm_vcpu * vcpu,gpa_t gpa,int writable)4414 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
4415 {
4416 return gmap_fault(vcpu->arch.gmap, gpa,
4417 writable ? FAULT_FLAG_WRITE : 0);
4418 }
4419
__kvm_inject_pfault_token(struct kvm_vcpu * vcpu,bool start_token,unsigned long token)4420 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
4421 unsigned long token)
4422 {
4423 struct kvm_s390_interrupt inti;
4424 struct kvm_s390_irq irq;
4425
4426 if (start_token) {
4427 irq.u.ext.ext_params2 = token;
4428 irq.type = KVM_S390_INT_PFAULT_INIT;
4429 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
4430 } else {
4431 inti.type = KVM_S390_INT_PFAULT_DONE;
4432 inti.parm64 = token;
4433 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4434 }
4435 }
4436
kvm_arch_async_page_not_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)4437 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4438 struct kvm_async_pf *work)
4439 {
4440 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4441 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4442
4443 return true;
4444 }
4445
kvm_arch_async_page_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)4446 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4447 struct kvm_async_pf *work)
4448 {
4449 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4450 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4451 }
4452
kvm_arch_async_page_ready(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)4453 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4454 struct kvm_async_pf *work)
4455 {
4456 /* s390 will always inject the page directly */
4457 }
4458
kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu * vcpu)4459 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4460 {
4461 /*
4462 * s390 will always inject the page directly,
4463 * but we still want check_async_completion to cleanup
4464 */
4465 return true;
4466 }
4467
kvm_arch_setup_async_pf(struct kvm_vcpu * vcpu)4468 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4469 {
4470 hva_t hva;
4471 struct kvm_arch_async_pf arch;
4472
4473 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4474 return false;
4475 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4476 vcpu->arch.pfault_compare)
4477 return false;
4478 if (psw_extint_disabled(vcpu))
4479 return false;
4480 if (kvm_s390_vcpu_has_irq(vcpu, 0))
4481 return false;
4482 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4483 return false;
4484 if (!vcpu->arch.gmap->pfault_enabled)
4485 return false;
4486
4487 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4488 hva += current->thread.gmap_addr & ~PAGE_MASK;
4489 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4490 return false;
4491
4492 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4493 }
4494
vcpu_pre_run(struct kvm_vcpu * vcpu)4495 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4496 {
4497 int rc, cpuflags;
4498
4499 /*
4500 * On s390 notifications for arriving pages will be delivered directly
4501 * to the guest but the house keeping for completed pfaults is
4502 * handled outside the worker.
4503 */
4504 kvm_check_async_pf_completion(vcpu);
4505
4506 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4507 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4508
4509 if (need_resched())
4510 schedule();
4511
4512 if (!kvm_is_ucontrol(vcpu->kvm)) {
4513 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4514 if (rc)
4515 return rc;
4516 }
4517
4518 rc = kvm_s390_handle_requests(vcpu);
4519 if (rc)
4520 return rc;
4521
4522 if (guestdbg_enabled(vcpu)) {
4523 kvm_s390_backup_guest_per_regs(vcpu);
4524 kvm_s390_patch_guest_per_regs(vcpu);
4525 }
4526
4527 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4528
4529 vcpu->arch.sie_block->icptcode = 0;
4530 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4531 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4532 trace_kvm_s390_sie_enter(vcpu, cpuflags);
4533
4534 return 0;
4535 }
4536
vcpu_post_run_fault_in_sie(struct kvm_vcpu * vcpu)4537 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4538 {
4539 struct kvm_s390_pgm_info pgm_info = {
4540 .code = PGM_ADDRESSING,
4541 };
4542 u8 opcode, ilen;
4543 int rc;
4544
4545 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4546 trace_kvm_s390_sie_fault(vcpu);
4547
4548 /*
4549 * We want to inject an addressing exception, which is defined as a
4550 * suppressing or terminating exception. However, since we came here
4551 * by a DAT access exception, the PSW still points to the faulting
4552 * instruction since DAT exceptions are nullifying. So we've got
4553 * to look up the current opcode to get the length of the instruction
4554 * to be able to forward the PSW.
4555 */
4556 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4557 ilen = insn_length(opcode);
4558 if (rc < 0) {
4559 return rc;
4560 } else if (rc) {
4561 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4562 * Forward by arbitrary ilc, injection will take care of
4563 * nullification if necessary.
4564 */
4565 pgm_info = vcpu->arch.pgm;
4566 ilen = 4;
4567 }
4568 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4569 kvm_s390_forward_psw(vcpu, ilen);
4570 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4571 }
4572
vcpu_post_run(struct kvm_vcpu * vcpu,int exit_reason)4573 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4574 {
4575 struct mcck_volatile_info *mcck_info;
4576 struct sie_page *sie_page;
4577
4578 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4579 vcpu->arch.sie_block->icptcode);
4580 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4581
4582 if (guestdbg_enabled(vcpu))
4583 kvm_s390_restore_guest_per_regs(vcpu);
4584
4585 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4586 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4587
4588 if (exit_reason == -EINTR) {
4589 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4590 sie_page = container_of(vcpu->arch.sie_block,
4591 struct sie_page, sie_block);
4592 mcck_info = &sie_page->mcck_info;
4593 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4594 return 0;
4595 }
4596
4597 if (vcpu->arch.sie_block->icptcode > 0) {
4598 int rc = kvm_handle_sie_intercept(vcpu);
4599
4600 if (rc != -EOPNOTSUPP)
4601 return rc;
4602 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4603 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4604 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4605 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4606 return -EREMOTE;
4607 } else if (exit_reason != -EFAULT) {
4608 vcpu->stat.exit_null++;
4609 return 0;
4610 } else if (kvm_is_ucontrol(vcpu->kvm)) {
4611 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4612 vcpu->run->s390_ucontrol.trans_exc_code =
4613 current->thread.gmap_addr;
4614 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4615 return -EREMOTE;
4616 } else if (current->thread.gmap_pfault) {
4617 trace_kvm_s390_major_guest_pfault(vcpu);
4618 current->thread.gmap_pfault = 0;
4619 if (kvm_arch_setup_async_pf(vcpu))
4620 return 0;
4621 vcpu->stat.pfault_sync++;
4622 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4623 }
4624 return vcpu_post_run_fault_in_sie(vcpu);
4625 }
4626
4627 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
__vcpu_run(struct kvm_vcpu * vcpu)4628 static int __vcpu_run(struct kvm_vcpu *vcpu)
4629 {
4630 int rc, exit_reason;
4631 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4632
4633 /*
4634 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4635 * ning the guest), so that memslots (and other stuff) are protected
4636 */
4637 kvm_vcpu_srcu_read_lock(vcpu);
4638
4639 do {
4640 rc = vcpu_pre_run(vcpu);
4641 if (rc)
4642 break;
4643
4644 kvm_vcpu_srcu_read_unlock(vcpu);
4645 /*
4646 * As PF_VCPU will be used in fault handler, between
4647 * guest_enter and guest_exit should be no uaccess.
4648 */
4649 local_irq_disable();
4650 guest_enter_irqoff();
4651 __disable_cpu_timer_accounting(vcpu);
4652 local_irq_enable();
4653 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4654 memcpy(sie_page->pv_grregs,
4655 vcpu->run->s.regs.gprs,
4656 sizeof(sie_page->pv_grregs));
4657 }
4658 if (test_cpu_flag(CIF_FPU))
4659 load_fpu_regs();
4660 exit_reason = sie64a(vcpu->arch.sie_block,
4661 vcpu->run->s.regs.gprs);
4662 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4663 memcpy(vcpu->run->s.regs.gprs,
4664 sie_page->pv_grregs,
4665 sizeof(sie_page->pv_grregs));
4666 /*
4667 * We're not allowed to inject interrupts on intercepts
4668 * that leave the guest state in an "in-between" state
4669 * where the next SIE entry will do a continuation.
4670 * Fence interrupts in our "internal" PSW.
4671 */
4672 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4673 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4674 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4675 }
4676 }
4677 local_irq_disable();
4678 __enable_cpu_timer_accounting(vcpu);
4679 guest_exit_irqoff();
4680 local_irq_enable();
4681 kvm_vcpu_srcu_read_lock(vcpu);
4682
4683 rc = vcpu_post_run(vcpu, exit_reason);
4684 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4685
4686 kvm_vcpu_srcu_read_unlock(vcpu);
4687 return rc;
4688 }
4689
sync_regs_fmt2(struct kvm_vcpu * vcpu)4690 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4691 {
4692 struct kvm_run *kvm_run = vcpu->run;
4693 struct runtime_instr_cb *riccb;
4694 struct gs_cb *gscb;
4695
4696 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4697 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4698 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4699 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4700 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4701 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4702 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4703 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4704 }
4705 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4706 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4707 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4708 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4709 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4710 kvm_clear_async_pf_completion_queue(vcpu);
4711 }
4712 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4713 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4714 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4715 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4716 }
4717 /*
4718 * If userspace sets the riccb (e.g. after migration) to a valid state,
4719 * we should enable RI here instead of doing the lazy enablement.
4720 */
4721 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4722 test_kvm_facility(vcpu->kvm, 64) &&
4723 riccb->v &&
4724 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4725 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4726 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4727 }
4728 /*
4729 * If userspace sets the gscb (e.g. after migration) to non-zero,
4730 * we should enable GS here instead of doing the lazy enablement.
4731 */
4732 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4733 test_kvm_facility(vcpu->kvm, 133) &&
4734 gscb->gssm &&
4735 !vcpu->arch.gs_enabled) {
4736 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4737 vcpu->arch.sie_block->ecb |= ECB_GS;
4738 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4739 vcpu->arch.gs_enabled = 1;
4740 }
4741 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4742 test_kvm_facility(vcpu->kvm, 82)) {
4743 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4744 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4745 }
4746 if (MACHINE_HAS_GS) {
4747 preempt_disable();
4748 __ctl_set_bit(2, 4);
4749 if (current->thread.gs_cb) {
4750 vcpu->arch.host_gscb = current->thread.gs_cb;
4751 save_gs_cb(vcpu->arch.host_gscb);
4752 }
4753 if (vcpu->arch.gs_enabled) {
4754 current->thread.gs_cb = (struct gs_cb *)
4755 &vcpu->run->s.regs.gscb;
4756 restore_gs_cb(current->thread.gs_cb);
4757 }
4758 preempt_enable();
4759 }
4760 /* SIE will load etoken directly from SDNX and therefore kvm_run */
4761 }
4762
sync_regs(struct kvm_vcpu * vcpu)4763 static void sync_regs(struct kvm_vcpu *vcpu)
4764 {
4765 struct kvm_run *kvm_run = vcpu->run;
4766
4767 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4768 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4769 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4770 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4771 /* some control register changes require a tlb flush */
4772 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4773 }
4774 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4775 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4776 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4777 }
4778 save_access_regs(vcpu->arch.host_acrs);
4779 restore_access_regs(vcpu->run->s.regs.acrs);
4780 /* save host (userspace) fprs/vrs */
4781 save_fpu_regs();
4782 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4783 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4784 if (MACHINE_HAS_VX)
4785 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4786 else
4787 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4788 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4789 if (test_fp_ctl(current->thread.fpu.fpc))
4790 /* User space provided an invalid FPC, let's clear it */
4791 current->thread.fpu.fpc = 0;
4792
4793 /* Sync fmt2 only data */
4794 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4795 sync_regs_fmt2(vcpu);
4796 } else {
4797 /*
4798 * In several places we have to modify our internal view to
4799 * not do things that are disallowed by the ultravisor. For
4800 * example we must not inject interrupts after specific exits
4801 * (e.g. 112 prefix page not secure). We do this by turning
4802 * off the machine check, external and I/O interrupt bits
4803 * of our PSW copy. To avoid getting validity intercepts, we
4804 * do only accept the condition code from userspace.
4805 */
4806 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4807 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4808 PSW_MASK_CC;
4809 }
4810
4811 kvm_run->kvm_dirty_regs = 0;
4812 }
4813
store_regs_fmt2(struct kvm_vcpu * vcpu)4814 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4815 {
4816 struct kvm_run *kvm_run = vcpu->run;
4817
4818 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4819 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4820 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4821 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4822 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4823 if (MACHINE_HAS_GS) {
4824 preempt_disable();
4825 __ctl_set_bit(2, 4);
4826 if (vcpu->arch.gs_enabled)
4827 save_gs_cb(current->thread.gs_cb);
4828 current->thread.gs_cb = vcpu->arch.host_gscb;
4829 restore_gs_cb(vcpu->arch.host_gscb);
4830 if (!vcpu->arch.host_gscb)
4831 __ctl_clear_bit(2, 4);
4832 vcpu->arch.host_gscb = NULL;
4833 preempt_enable();
4834 }
4835 /* SIE will save etoken directly into SDNX and therefore kvm_run */
4836 }
4837
store_regs(struct kvm_vcpu * vcpu)4838 static void store_regs(struct kvm_vcpu *vcpu)
4839 {
4840 struct kvm_run *kvm_run = vcpu->run;
4841
4842 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4843 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4844 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4845 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4846 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4847 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4848 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4849 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4850 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4851 save_access_regs(vcpu->run->s.regs.acrs);
4852 restore_access_regs(vcpu->arch.host_acrs);
4853 /* Save guest register state */
4854 save_fpu_regs();
4855 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4856 /* Restore will be done lazily at return */
4857 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4858 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4859 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4860 store_regs_fmt2(vcpu);
4861 }
4862
kvm_arch_vcpu_ioctl_run(struct kvm_vcpu * vcpu)4863 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4864 {
4865 struct kvm_run *kvm_run = vcpu->run;
4866 int rc;
4867
4868 /*
4869 * Running a VM while dumping always has the potential to
4870 * produce inconsistent dump data. But for PV vcpus a SIE
4871 * entry while dumping could also lead to a fatal validity
4872 * intercept which we absolutely want to avoid.
4873 */
4874 if (vcpu->kvm->arch.pv.dumping)
4875 return -EINVAL;
4876
4877 if (kvm_run->immediate_exit)
4878 return -EINTR;
4879
4880 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4881 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4882 return -EINVAL;
4883
4884 vcpu_load(vcpu);
4885
4886 if (guestdbg_exit_pending(vcpu)) {
4887 kvm_s390_prepare_debug_exit(vcpu);
4888 rc = 0;
4889 goto out;
4890 }
4891
4892 kvm_sigset_activate(vcpu);
4893
4894 /*
4895 * no need to check the return value of vcpu_start as it can only have
4896 * an error for protvirt, but protvirt means user cpu state
4897 */
4898 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4899 kvm_s390_vcpu_start(vcpu);
4900 } else if (is_vcpu_stopped(vcpu)) {
4901 pr_err_ratelimited("can't run stopped vcpu %d\n",
4902 vcpu->vcpu_id);
4903 rc = -EINVAL;
4904 goto out;
4905 }
4906
4907 sync_regs(vcpu);
4908 enable_cpu_timer_accounting(vcpu);
4909
4910 might_fault();
4911 rc = __vcpu_run(vcpu);
4912
4913 if (signal_pending(current) && !rc) {
4914 kvm_run->exit_reason = KVM_EXIT_INTR;
4915 rc = -EINTR;
4916 }
4917
4918 if (guestdbg_exit_pending(vcpu) && !rc) {
4919 kvm_s390_prepare_debug_exit(vcpu);
4920 rc = 0;
4921 }
4922
4923 if (rc == -EREMOTE) {
4924 /* userspace support is needed, kvm_run has been prepared */
4925 rc = 0;
4926 }
4927
4928 disable_cpu_timer_accounting(vcpu);
4929 store_regs(vcpu);
4930
4931 kvm_sigset_deactivate(vcpu);
4932
4933 vcpu->stat.exit_userspace++;
4934 out:
4935 vcpu_put(vcpu);
4936 return rc;
4937 }
4938
4939 /*
4940 * store status at address
4941 * we use have two special cases:
4942 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4943 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4944 */
kvm_s390_store_status_unloaded(struct kvm_vcpu * vcpu,unsigned long gpa)4945 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4946 {
4947 unsigned char archmode = 1;
4948 freg_t fprs[NUM_FPRS];
4949 unsigned int px;
4950 u64 clkcomp, cputm;
4951 int rc;
4952
4953 px = kvm_s390_get_prefix(vcpu);
4954 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4955 if (write_guest_abs(vcpu, 163, &archmode, 1))
4956 return -EFAULT;
4957 gpa = 0;
4958 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4959 if (write_guest_real(vcpu, 163, &archmode, 1))
4960 return -EFAULT;
4961 gpa = px;
4962 } else
4963 gpa -= __LC_FPREGS_SAVE_AREA;
4964
4965 /* manually convert vector registers if necessary */
4966 if (MACHINE_HAS_VX) {
4967 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4968 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4969 fprs, 128);
4970 } else {
4971 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4972 vcpu->run->s.regs.fprs, 128);
4973 }
4974 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4975 vcpu->run->s.regs.gprs, 128);
4976 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4977 &vcpu->arch.sie_block->gpsw, 16);
4978 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4979 &px, 4);
4980 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4981 &vcpu->run->s.regs.fpc, 4);
4982 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4983 &vcpu->arch.sie_block->todpr, 4);
4984 cputm = kvm_s390_get_cpu_timer(vcpu);
4985 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4986 &cputm, 8);
4987 clkcomp = vcpu->arch.sie_block->ckc >> 8;
4988 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4989 &clkcomp, 8);
4990 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4991 &vcpu->run->s.regs.acrs, 64);
4992 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4993 &vcpu->arch.sie_block->gcr, 128);
4994 return rc ? -EFAULT : 0;
4995 }
4996
kvm_s390_vcpu_store_status(struct kvm_vcpu * vcpu,unsigned long addr)4997 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4998 {
4999 /*
5000 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
5001 * switch in the run ioctl. Let's update our copies before we save
5002 * it into the save area
5003 */
5004 save_fpu_regs();
5005 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
5006 save_access_regs(vcpu->run->s.regs.acrs);
5007
5008 return kvm_s390_store_status_unloaded(vcpu, addr);
5009 }
5010
__disable_ibs_on_vcpu(struct kvm_vcpu * vcpu)5011 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5012 {
5013 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
5014 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
5015 }
5016
__disable_ibs_on_all_vcpus(struct kvm * kvm)5017 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
5018 {
5019 unsigned long i;
5020 struct kvm_vcpu *vcpu;
5021
5022 kvm_for_each_vcpu(i, vcpu, kvm) {
5023 __disable_ibs_on_vcpu(vcpu);
5024 }
5025 }
5026
__enable_ibs_on_vcpu(struct kvm_vcpu * vcpu)5027 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5028 {
5029 if (!sclp.has_ibs)
5030 return;
5031 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
5032 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
5033 }
5034
kvm_s390_vcpu_start(struct kvm_vcpu * vcpu)5035 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
5036 {
5037 int i, online_vcpus, r = 0, started_vcpus = 0;
5038
5039 if (!is_vcpu_stopped(vcpu))
5040 return 0;
5041
5042 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
5043 /* Only one cpu at a time may enter/leave the STOPPED state. */
5044 spin_lock(&vcpu->kvm->arch.start_stop_lock);
5045 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5046
5047 /* Let's tell the UV that we want to change into the operating state */
5048 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5049 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
5050 if (r) {
5051 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5052 return r;
5053 }
5054 }
5055
5056 for (i = 0; i < online_vcpus; i++) {
5057 if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
5058 started_vcpus++;
5059 }
5060
5061 if (started_vcpus == 0) {
5062 /* we're the only active VCPU -> speed it up */
5063 __enable_ibs_on_vcpu(vcpu);
5064 } else if (started_vcpus == 1) {
5065 /*
5066 * As we are starting a second VCPU, we have to disable
5067 * the IBS facility on all VCPUs to remove potentially
5068 * outstanding ENABLE requests.
5069 */
5070 __disable_ibs_on_all_vcpus(vcpu->kvm);
5071 }
5072
5073 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
5074 /*
5075 * The real PSW might have changed due to a RESTART interpreted by the
5076 * ultravisor. We block all interrupts and let the next sie exit
5077 * refresh our view.
5078 */
5079 if (kvm_s390_pv_cpu_is_protected(vcpu))
5080 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
5081 /*
5082 * Another VCPU might have used IBS while we were offline.
5083 * Let's play safe and flush the VCPU at startup.
5084 */
5085 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
5086 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5087 return 0;
5088 }
5089
kvm_s390_vcpu_stop(struct kvm_vcpu * vcpu)5090 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
5091 {
5092 int i, online_vcpus, r = 0, started_vcpus = 0;
5093 struct kvm_vcpu *started_vcpu = NULL;
5094
5095 if (is_vcpu_stopped(vcpu))
5096 return 0;
5097
5098 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
5099 /* Only one cpu at a time may enter/leave the STOPPED state. */
5100 spin_lock(&vcpu->kvm->arch.start_stop_lock);
5101 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5102
5103 /* Let's tell the UV that we want to change into the stopped state */
5104 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5105 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
5106 if (r) {
5107 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5108 return r;
5109 }
5110 }
5111
5112 /*
5113 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
5114 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
5115 * have been fully processed. This will ensure that the VCPU
5116 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
5117 */
5118 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
5119 kvm_s390_clear_stop_irq(vcpu);
5120
5121 __disable_ibs_on_vcpu(vcpu);
5122
5123 for (i = 0; i < online_vcpus; i++) {
5124 struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
5125
5126 if (!is_vcpu_stopped(tmp)) {
5127 started_vcpus++;
5128 started_vcpu = tmp;
5129 }
5130 }
5131
5132 if (started_vcpus == 1) {
5133 /*
5134 * As we only have one VCPU left, we want to enable the
5135 * IBS facility for that VCPU to speed it up.
5136 */
5137 __enable_ibs_on_vcpu(started_vcpu);
5138 }
5139
5140 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5141 return 0;
5142 }
5143
kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu * vcpu,struct kvm_enable_cap * cap)5144 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
5145 struct kvm_enable_cap *cap)
5146 {
5147 int r;
5148
5149 if (cap->flags)
5150 return -EINVAL;
5151
5152 switch (cap->cap) {
5153 case KVM_CAP_S390_CSS_SUPPORT:
5154 if (!vcpu->kvm->arch.css_support) {
5155 vcpu->kvm->arch.css_support = 1;
5156 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
5157 trace_kvm_s390_enable_css(vcpu->kvm);
5158 }
5159 r = 0;
5160 break;
5161 default:
5162 r = -EINVAL;
5163 break;
5164 }
5165 return r;
5166 }
5167
kvm_s390_vcpu_sida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)5168 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
5169 struct kvm_s390_mem_op *mop)
5170 {
5171 void __user *uaddr = (void __user *)mop->buf;
5172 int r = 0;
5173
5174 if (mop->flags || !mop->size)
5175 return -EINVAL;
5176 if (mop->size + mop->sida_offset < mop->size)
5177 return -EINVAL;
5178 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
5179 return -E2BIG;
5180 if (!kvm_s390_pv_cpu_is_protected(vcpu))
5181 return -EINVAL;
5182
5183 switch (mop->op) {
5184 case KVM_S390_MEMOP_SIDA_READ:
5185 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
5186 mop->sida_offset), mop->size))
5187 r = -EFAULT;
5188
5189 break;
5190 case KVM_S390_MEMOP_SIDA_WRITE:
5191 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
5192 mop->sida_offset), uaddr, mop->size))
5193 r = -EFAULT;
5194 break;
5195 }
5196 return r;
5197 }
5198
kvm_s390_vcpu_mem_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)5199 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
5200 struct kvm_s390_mem_op *mop)
5201 {
5202 void __user *uaddr = (void __user *)mop->buf;
5203 void *tmpbuf = NULL;
5204 int r = 0;
5205 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
5206 | KVM_S390_MEMOP_F_CHECK_ONLY
5207 | KVM_S390_MEMOP_F_SKEY_PROTECTION;
5208
5209 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
5210 return -EINVAL;
5211 if (mop->size > MEM_OP_MAX_SIZE)
5212 return -E2BIG;
5213 if (kvm_s390_pv_cpu_is_protected(vcpu))
5214 return -EINVAL;
5215 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
5216 if (access_key_invalid(mop->key))
5217 return -EINVAL;
5218 } else {
5219 mop->key = 0;
5220 }
5221 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
5222 tmpbuf = vmalloc(mop->size);
5223 if (!tmpbuf)
5224 return -ENOMEM;
5225 }
5226
5227 switch (mop->op) {
5228 case KVM_S390_MEMOP_LOGICAL_READ:
5229 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
5230 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
5231 GACC_FETCH, mop->key);
5232 break;
5233 }
5234 r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5235 mop->size, mop->key);
5236 if (r == 0) {
5237 if (copy_to_user(uaddr, tmpbuf, mop->size))
5238 r = -EFAULT;
5239 }
5240 break;
5241 case KVM_S390_MEMOP_LOGICAL_WRITE:
5242 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
5243 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
5244 GACC_STORE, mop->key);
5245 break;
5246 }
5247 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
5248 r = -EFAULT;
5249 break;
5250 }
5251 r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5252 mop->size, mop->key);
5253 break;
5254 }
5255
5256 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
5257 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
5258
5259 vfree(tmpbuf);
5260 return r;
5261 }
5262
kvm_s390_vcpu_memsida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)5263 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
5264 struct kvm_s390_mem_op *mop)
5265 {
5266 int r, srcu_idx;
5267
5268 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5269
5270 switch (mop->op) {
5271 case KVM_S390_MEMOP_LOGICAL_READ:
5272 case KVM_S390_MEMOP_LOGICAL_WRITE:
5273 r = kvm_s390_vcpu_mem_op(vcpu, mop);
5274 break;
5275 case KVM_S390_MEMOP_SIDA_READ:
5276 case KVM_S390_MEMOP_SIDA_WRITE:
5277 /* we are locked against sida going away by the vcpu->mutex */
5278 r = kvm_s390_vcpu_sida_op(vcpu, mop);
5279 break;
5280 default:
5281 r = -EINVAL;
5282 }
5283
5284 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
5285 return r;
5286 }
5287
kvm_arch_vcpu_async_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)5288 long kvm_arch_vcpu_async_ioctl(struct file *filp,
5289 unsigned int ioctl, unsigned long arg)
5290 {
5291 struct kvm_vcpu *vcpu = filp->private_data;
5292 void __user *argp = (void __user *)arg;
5293
5294 switch (ioctl) {
5295 case KVM_S390_IRQ: {
5296 struct kvm_s390_irq s390irq;
5297
5298 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
5299 return -EFAULT;
5300 return kvm_s390_inject_vcpu(vcpu, &s390irq);
5301 }
5302 case KVM_S390_INTERRUPT: {
5303 struct kvm_s390_interrupt s390int;
5304 struct kvm_s390_irq s390irq = {};
5305
5306 if (copy_from_user(&s390int, argp, sizeof(s390int)))
5307 return -EFAULT;
5308 if (s390int_to_s390irq(&s390int, &s390irq))
5309 return -EINVAL;
5310 return kvm_s390_inject_vcpu(vcpu, &s390irq);
5311 }
5312 }
5313 return -ENOIOCTLCMD;
5314 }
5315
kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu * vcpu,struct kvm_pv_cmd * cmd)5316 static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
5317 struct kvm_pv_cmd *cmd)
5318 {
5319 struct kvm_s390_pv_dmp dmp;
5320 void *data;
5321 int ret;
5322
5323 /* Dump initialization is a prerequisite */
5324 if (!vcpu->kvm->arch.pv.dumping)
5325 return -EINVAL;
5326
5327 if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp)))
5328 return -EFAULT;
5329
5330 /* We only handle this subcmd right now */
5331 if (dmp.subcmd != KVM_PV_DUMP_CPU)
5332 return -EINVAL;
5333
5334 /* CPU dump length is the same as create cpu storage donation. */
5335 if (dmp.buff_len != uv_info.guest_cpu_stor_len)
5336 return -EINVAL;
5337
5338 data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL);
5339 if (!data)
5340 return -ENOMEM;
5341
5342 ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc);
5343
5344 VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x",
5345 vcpu->vcpu_id, cmd->rc, cmd->rrc);
5346
5347 if (ret)
5348 ret = -EINVAL;
5349
5350 /* On success copy over the dump data */
5351 if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len))
5352 ret = -EFAULT;
5353
5354 kvfree(data);
5355 return ret;
5356 }
5357
kvm_arch_vcpu_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)5358 long kvm_arch_vcpu_ioctl(struct file *filp,
5359 unsigned int ioctl, unsigned long arg)
5360 {
5361 struct kvm_vcpu *vcpu = filp->private_data;
5362 void __user *argp = (void __user *)arg;
5363 int idx;
5364 long r;
5365 u16 rc, rrc;
5366
5367 vcpu_load(vcpu);
5368
5369 switch (ioctl) {
5370 case KVM_S390_STORE_STATUS:
5371 idx = srcu_read_lock(&vcpu->kvm->srcu);
5372 r = kvm_s390_store_status_unloaded(vcpu, arg);
5373 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5374 break;
5375 case KVM_S390_SET_INITIAL_PSW: {
5376 psw_t psw;
5377
5378 r = -EFAULT;
5379 if (copy_from_user(&psw, argp, sizeof(psw)))
5380 break;
5381 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
5382 break;
5383 }
5384 case KVM_S390_CLEAR_RESET:
5385 r = 0;
5386 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
5387 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5388 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5389 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
5390 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
5391 rc, rrc);
5392 }
5393 break;
5394 case KVM_S390_INITIAL_RESET:
5395 r = 0;
5396 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
5397 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5398 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5399 UVC_CMD_CPU_RESET_INITIAL,
5400 &rc, &rrc);
5401 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
5402 rc, rrc);
5403 }
5404 break;
5405 case KVM_S390_NORMAL_RESET:
5406 r = 0;
5407 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
5408 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5409 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5410 UVC_CMD_CPU_RESET, &rc, &rrc);
5411 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
5412 rc, rrc);
5413 }
5414 break;
5415 case KVM_SET_ONE_REG:
5416 case KVM_GET_ONE_REG: {
5417 struct kvm_one_reg reg;
5418 r = -EINVAL;
5419 if (kvm_s390_pv_cpu_is_protected(vcpu))
5420 break;
5421 r = -EFAULT;
5422 if (copy_from_user(®, argp, sizeof(reg)))
5423 break;
5424 if (ioctl == KVM_SET_ONE_REG)
5425 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
5426 else
5427 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
5428 break;
5429 }
5430 #ifdef CONFIG_KVM_S390_UCONTROL
5431 case KVM_S390_UCAS_MAP: {
5432 struct kvm_s390_ucas_mapping ucasmap;
5433
5434 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5435 r = -EFAULT;
5436 break;
5437 }
5438
5439 if (!kvm_is_ucontrol(vcpu->kvm)) {
5440 r = -EINVAL;
5441 break;
5442 }
5443
5444 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
5445 ucasmap.vcpu_addr, ucasmap.length);
5446 break;
5447 }
5448 case KVM_S390_UCAS_UNMAP: {
5449 struct kvm_s390_ucas_mapping ucasmap;
5450
5451 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5452 r = -EFAULT;
5453 break;
5454 }
5455
5456 if (!kvm_is_ucontrol(vcpu->kvm)) {
5457 r = -EINVAL;
5458 break;
5459 }
5460
5461 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
5462 ucasmap.length);
5463 break;
5464 }
5465 #endif
5466 case KVM_S390_VCPU_FAULT: {
5467 r = gmap_fault(vcpu->arch.gmap, arg, 0);
5468 break;
5469 }
5470 case KVM_ENABLE_CAP:
5471 {
5472 struct kvm_enable_cap cap;
5473 r = -EFAULT;
5474 if (copy_from_user(&cap, argp, sizeof(cap)))
5475 break;
5476 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5477 break;
5478 }
5479 case KVM_S390_MEM_OP: {
5480 struct kvm_s390_mem_op mem_op;
5481
5482 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
5483 r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
5484 else
5485 r = -EFAULT;
5486 break;
5487 }
5488 case KVM_S390_SET_IRQ_STATE: {
5489 struct kvm_s390_irq_state irq_state;
5490
5491 r = -EFAULT;
5492 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5493 break;
5494 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
5495 irq_state.len == 0 ||
5496 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5497 r = -EINVAL;
5498 break;
5499 }
5500 /* do not use irq_state.flags, it will break old QEMUs */
5501 r = kvm_s390_set_irq_state(vcpu,
5502 (void __user *) irq_state.buf,
5503 irq_state.len);
5504 break;
5505 }
5506 case KVM_S390_GET_IRQ_STATE: {
5507 struct kvm_s390_irq_state irq_state;
5508
5509 r = -EFAULT;
5510 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5511 break;
5512 if (irq_state.len == 0) {
5513 r = -EINVAL;
5514 break;
5515 }
5516 /* do not use irq_state.flags, it will break old QEMUs */
5517 r = kvm_s390_get_irq_state(vcpu,
5518 (__u8 __user *) irq_state.buf,
5519 irq_state.len);
5520 break;
5521 }
5522 case KVM_S390_PV_CPU_COMMAND: {
5523 struct kvm_pv_cmd cmd;
5524
5525 r = -EINVAL;
5526 if (!is_prot_virt_host())
5527 break;
5528
5529 r = -EFAULT;
5530 if (copy_from_user(&cmd, argp, sizeof(cmd)))
5531 break;
5532
5533 r = -EINVAL;
5534 if (cmd.flags)
5535 break;
5536
5537 /* We only handle this cmd right now */
5538 if (cmd.cmd != KVM_PV_DUMP)
5539 break;
5540
5541 r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd);
5542
5543 /* Always copy over UV rc / rrc data */
5544 if (copy_to_user((__u8 __user *)argp, &cmd.rc,
5545 sizeof(cmd.rc) + sizeof(cmd.rrc)))
5546 r = -EFAULT;
5547 break;
5548 }
5549 default:
5550 r = -ENOTTY;
5551 }
5552
5553 vcpu_put(vcpu);
5554 return r;
5555 }
5556
kvm_arch_vcpu_fault(struct kvm_vcpu * vcpu,struct vm_fault * vmf)5557 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5558 {
5559 #ifdef CONFIG_KVM_S390_UCONTROL
5560 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5561 && (kvm_is_ucontrol(vcpu->kvm))) {
5562 vmf->page = virt_to_page(vcpu->arch.sie_block);
5563 get_page(vmf->page);
5564 return 0;
5565 }
5566 #endif
5567 return VM_FAULT_SIGBUS;
5568 }
5569
5570 /* Section: memory related */
kvm_arch_prepare_memory_region(struct kvm * kvm,const struct kvm_memory_slot * old,struct kvm_memory_slot * new,enum kvm_mr_change change)5571 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5572 const struct kvm_memory_slot *old,
5573 struct kvm_memory_slot *new,
5574 enum kvm_mr_change change)
5575 {
5576 gpa_t size;
5577
5578 /* When we are protected, we should not change the memory slots */
5579 if (kvm_s390_pv_get_handle(kvm))
5580 return -EINVAL;
5581
5582 if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
5583 return 0;
5584
5585 /* A few sanity checks. We can have memory slots which have to be
5586 located/ended at a segment boundary (1MB). The memory in userland is
5587 ok to be fragmented into various different vmas. It is okay to mmap()
5588 and munmap() stuff in this slot after doing this call at any time */
5589
5590 if (new->userspace_addr & 0xffffful)
5591 return -EINVAL;
5592
5593 size = new->npages * PAGE_SIZE;
5594 if (size & 0xffffful)
5595 return -EINVAL;
5596
5597 if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
5598 return -EINVAL;
5599
5600 return 0;
5601 }
5602
kvm_arch_commit_memory_region(struct kvm * kvm,struct kvm_memory_slot * old,const struct kvm_memory_slot * new,enum kvm_mr_change change)5603 void kvm_arch_commit_memory_region(struct kvm *kvm,
5604 struct kvm_memory_slot *old,
5605 const struct kvm_memory_slot *new,
5606 enum kvm_mr_change change)
5607 {
5608 int rc = 0;
5609
5610 switch (change) {
5611 case KVM_MR_DELETE:
5612 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5613 old->npages * PAGE_SIZE);
5614 break;
5615 case KVM_MR_MOVE:
5616 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5617 old->npages * PAGE_SIZE);
5618 if (rc)
5619 break;
5620 fallthrough;
5621 case KVM_MR_CREATE:
5622 rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
5623 new->base_gfn * PAGE_SIZE,
5624 new->npages * PAGE_SIZE);
5625 break;
5626 case KVM_MR_FLAGS_ONLY:
5627 break;
5628 default:
5629 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5630 }
5631 if (rc)
5632 pr_warn("failed to commit memory region\n");
5633 return;
5634 }
5635
nonhyp_mask(int i)5636 static inline unsigned long nonhyp_mask(int i)
5637 {
5638 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5639
5640 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5641 }
5642
kvm_s390_init(void)5643 static int __init kvm_s390_init(void)
5644 {
5645 int i;
5646
5647 if (!sclp.has_sief2) {
5648 pr_info("SIE is not available\n");
5649 return -ENODEV;
5650 }
5651
5652 if (nested && hpage) {
5653 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5654 return -EINVAL;
5655 }
5656
5657 for (i = 0; i < 16; i++)
5658 kvm_s390_fac_base[i] |=
5659 stfle_fac_list[i] & nonhyp_mask(i);
5660
5661 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5662 }
5663
kvm_s390_exit(void)5664 static void __exit kvm_s390_exit(void)
5665 {
5666 kvm_exit();
5667 }
5668
5669 module_init(kvm_s390_init);
5670 module_exit(kvm_s390_exit);
5671
5672 /*
5673 * Enable autoloading of the kvm module.
5674 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5675 * since x86 takes a different approach.
5676 */
5677 #include <linux/miscdevice.h>
5678 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5679 MODULE_ALIAS("devname:kvm");
5680