1 // SPDX-License-Identifier: GPL-2.0-only
2 /******************************************************************************
3 * emulate.c
4 *
5 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
6 *
7 * Copyright (c) 2005 Keir Fraser
8 *
9 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
10 * privileged instructions:
11 *
12 * Copyright (C) 2006 Qumranet
13 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
14 *
15 * Avi Kivity <avi@qumranet.com>
16 * Yaniv Kamay <yaniv@qumranet.com>
17 *
18 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
19 */
20
21 #include <linux/kvm_host.h>
22 #include "kvm_cache_regs.h"
23 #include "kvm_emulate.h"
24 #include <linux/stringify.h>
25 #include <asm/debugreg.h>
26 #include <asm/nospec-branch.h>
27 #include <asm/ibt.h>
28
29 #include "x86.h"
30 #include "tss.h"
31 #include "mmu.h"
32 #include "pmu.h"
33
34 /*
35 * Operand types
36 */
37 #define OpNone 0ull
38 #define OpImplicit 1ull /* No generic decode */
39 #define OpReg 2ull /* Register */
40 #define OpMem 3ull /* Memory */
41 #define OpAcc 4ull /* Accumulator: AL/AX/EAX/RAX */
42 #define OpDI 5ull /* ES:DI/EDI/RDI */
43 #define OpMem64 6ull /* Memory, 64-bit */
44 #define OpImmUByte 7ull /* Zero-extended 8-bit immediate */
45 #define OpDX 8ull /* DX register */
46 #define OpCL 9ull /* CL register (for shifts) */
47 #define OpImmByte 10ull /* 8-bit sign extended immediate */
48 #define OpOne 11ull /* Implied 1 */
49 #define OpImm 12ull /* Sign extended up to 32-bit immediate */
50 #define OpMem16 13ull /* Memory operand (16-bit). */
51 #define OpMem32 14ull /* Memory operand (32-bit). */
52 #define OpImmU 15ull /* Immediate operand, zero extended */
53 #define OpSI 16ull /* SI/ESI/RSI */
54 #define OpImmFAddr 17ull /* Immediate far address */
55 #define OpMemFAddr 18ull /* Far address in memory */
56 #define OpImmU16 19ull /* Immediate operand, 16 bits, zero extended */
57 #define OpES 20ull /* ES */
58 #define OpCS 21ull /* CS */
59 #define OpSS 22ull /* SS */
60 #define OpDS 23ull /* DS */
61 #define OpFS 24ull /* FS */
62 #define OpGS 25ull /* GS */
63 #define OpMem8 26ull /* 8-bit zero extended memory operand */
64 #define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
65 #define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */
66 #define OpAccLo 29ull /* Low part of extended acc (AX/AX/EAX/RAX) */
67 #define OpAccHi 30ull /* High part of extended acc (-/DX/EDX/RDX) */
68
69 #define OpBits 5 /* Width of operand field */
70 #define OpMask ((1ull << OpBits) - 1)
71
72 /*
73 * Opcode effective-address decode tables.
74 * Note that we only emulate instructions that have at least one memory
75 * operand (excluding implicit stack references). We assume that stack
76 * references and instruction fetches will never occur in special memory
77 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
78 * not be handled.
79 */
80
81 /* Operand sizes: 8-bit operands or specified/overridden size. */
82 #define ByteOp (1<<0) /* 8-bit operands. */
83 /* Destination operand type. */
84 #define DstShift 1
85 #define ImplicitOps (OpImplicit << DstShift)
86 #define DstReg (OpReg << DstShift)
87 #define DstMem (OpMem << DstShift)
88 #define DstAcc (OpAcc << DstShift)
89 #define DstDI (OpDI << DstShift)
90 #define DstMem64 (OpMem64 << DstShift)
91 #define DstMem16 (OpMem16 << DstShift)
92 #define DstImmUByte (OpImmUByte << DstShift)
93 #define DstDX (OpDX << DstShift)
94 #define DstAccLo (OpAccLo << DstShift)
95 #define DstMask (OpMask << DstShift)
96 /* Source operand type. */
97 #define SrcShift 6
98 #define SrcNone (OpNone << SrcShift)
99 #define SrcReg (OpReg << SrcShift)
100 #define SrcMem (OpMem << SrcShift)
101 #define SrcMem16 (OpMem16 << SrcShift)
102 #define SrcMem32 (OpMem32 << SrcShift)
103 #define SrcImm (OpImm << SrcShift)
104 #define SrcImmByte (OpImmByte << SrcShift)
105 #define SrcOne (OpOne << SrcShift)
106 #define SrcImmUByte (OpImmUByte << SrcShift)
107 #define SrcImmU (OpImmU << SrcShift)
108 #define SrcSI (OpSI << SrcShift)
109 #define SrcXLat (OpXLat << SrcShift)
110 #define SrcImmFAddr (OpImmFAddr << SrcShift)
111 #define SrcMemFAddr (OpMemFAddr << SrcShift)
112 #define SrcAcc (OpAcc << SrcShift)
113 #define SrcImmU16 (OpImmU16 << SrcShift)
114 #define SrcImm64 (OpImm64 << SrcShift)
115 #define SrcDX (OpDX << SrcShift)
116 #define SrcMem8 (OpMem8 << SrcShift)
117 #define SrcAccHi (OpAccHi << SrcShift)
118 #define SrcMask (OpMask << SrcShift)
119 #define BitOp (1<<11)
120 #define MemAbs (1<<12) /* Memory operand is absolute displacement */
121 #define String (1<<13) /* String instruction (rep capable) */
122 #define Stack (1<<14) /* Stack instruction (push/pop) */
123 #define GroupMask (7<<15) /* Opcode uses one of the group mechanisms */
124 #define Group (1<<15) /* Bits 3:5 of modrm byte extend opcode */
125 #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
126 #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
127 #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
128 #define Escape (5<<15) /* Escape to coprocessor instruction */
129 #define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */
130 #define ModeDual (7<<15) /* Different instruction for 32/64 bit */
131 #define Sse (1<<18) /* SSE Vector instruction */
132 /* Generic ModRM decode. */
133 #define ModRM (1<<19)
134 /* Destination is only written; never read. */
135 #define Mov (1<<20)
136 /* Misc flags */
137 #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
138 #define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
139 #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
140 #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
141 #define Undefined (1<<25) /* No Such Instruction */
142 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
143 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
144 #define No64 (1<<28)
145 #define PageTable (1 << 29) /* instruction used to write page table */
146 #define NotImpl (1 << 30) /* instruction is not implemented */
147 /* Source 2 operand type */
148 #define Src2Shift (31)
149 #define Src2None (OpNone << Src2Shift)
150 #define Src2Mem (OpMem << Src2Shift)
151 #define Src2CL (OpCL << Src2Shift)
152 #define Src2ImmByte (OpImmByte << Src2Shift)
153 #define Src2One (OpOne << Src2Shift)
154 #define Src2Imm (OpImm << Src2Shift)
155 #define Src2ES (OpES << Src2Shift)
156 #define Src2CS (OpCS << Src2Shift)
157 #define Src2SS (OpSS << Src2Shift)
158 #define Src2DS (OpDS << Src2Shift)
159 #define Src2FS (OpFS << Src2Shift)
160 #define Src2GS (OpGS << Src2Shift)
161 #define Src2Mask (OpMask << Src2Shift)
162 #define Mmx ((u64)1 << 40) /* MMX Vector instruction */
163 #define AlignMask ((u64)7 << 41)
164 #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
165 #define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */
166 #define Avx ((u64)3 << 41) /* Advanced Vector Extensions */
167 #define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */
168 #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
169 #define NoWrite ((u64)1 << 45) /* No writeback */
170 #define SrcWrite ((u64)1 << 46) /* Write back src operand */
171 #define NoMod ((u64)1 << 47) /* Mod field is ignored */
172 #define Intercept ((u64)1 << 48) /* Has valid intercept field */
173 #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
174 #define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
175 #define NearBranch ((u64)1 << 52) /* Near branches */
176 #define No16 ((u64)1 << 53) /* No 16 bit operand */
177 #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
178 #define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */
179 #define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */
180
181 #define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
182
183 #define X2(x...) x, x
184 #define X3(x...) X2(x), x
185 #define X4(x...) X2(x), X2(x)
186 #define X5(x...) X4(x), x
187 #define X6(x...) X4(x), X2(x)
188 #define X7(x...) X4(x), X3(x)
189 #define X8(x...) X4(x), X4(x)
190 #define X16(x...) X8(x), X8(x)
191
192 struct opcode {
193 u64 flags;
194 u8 intercept;
195 u8 pad[7];
196 union {
197 int (*execute)(struct x86_emulate_ctxt *ctxt);
198 const struct opcode *group;
199 const struct group_dual *gdual;
200 const struct gprefix *gprefix;
201 const struct escape *esc;
202 const struct instr_dual *idual;
203 const struct mode_dual *mdual;
204 void (*fastop)(struct fastop *fake);
205 } u;
206 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
207 };
208
209 struct group_dual {
210 struct opcode mod012[8];
211 struct opcode mod3[8];
212 };
213
214 struct gprefix {
215 struct opcode pfx_no;
216 struct opcode pfx_66;
217 struct opcode pfx_f2;
218 struct opcode pfx_f3;
219 };
220
221 struct escape {
222 struct opcode op[8];
223 struct opcode high[64];
224 };
225
226 struct instr_dual {
227 struct opcode mod012;
228 struct opcode mod3;
229 };
230
231 struct mode_dual {
232 struct opcode mode32;
233 struct opcode mode64;
234 };
235
236 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
237
238 enum x86_transfer_type {
239 X86_TRANSFER_NONE,
240 X86_TRANSFER_CALL_JMP,
241 X86_TRANSFER_RET,
242 X86_TRANSFER_TASK_SWITCH,
243 };
244
reg_read(struct x86_emulate_ctxt * ctxt,unsigned nr)245 static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
246 {
247 if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt))
248 nr &= NR_EMULATOR_GPRS - 1;
249
250 if (!(ctxt->regs_valid & (1 << nr))) {
251 ctxt->regs_valid |= 1 << nr;
252 ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
253 }
254 return ctxt->_regs[nr];
255 }
256
reg_write(struct x86_emulate_ctxt * ctxt,unsigned nr)257 static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
258 {
259 if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt))
260 nr &= NR_EMULATOR_GPRS - 1;
261
262 BUILD_BUG_ON(sizeof(ctxt->regs_dirty) * BITS_PER_BYTE < NR_EMULATOR_GPRS);
263 BUILD_BUG_ON(sizeof(ctxt->regs_valid) * BITS_PER_BYTE < NR_EMULATOR_GPRS);
264
265 ctxt->regs_valid |= 1 << nr;
266 ctxt->regs_dirty |= 1 << nr;
267 return &ctxt->_regs[nr];
268 }
269
reg_rmw(struct x86_emulate_ctxt * ctxt,unsigned nr)270 static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
271 {
272 reg_read(ctxt, nr);
273 return reg_write(ctxt, nr);
274 }
275
writeback_registers(struct x86_emulate_ctxt * ctxt)276 static void writeback_registers(struct x86_emulate_ctxt *ctxt)
277 {
278 unsigned long dirty = ctxt->regs_dirty;
279 unsigned reg;
280
281 for_each_set_bit(reg, &dirty, NR_EMULATOR_GPRS)
282 ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
283 }
284
invalidate_registers(struct x86_emulate_ctxt * ctxt)285 static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
286 {
287 ctxt->regs_dirty = 0;
288 ctxt->regs_valid = 0;
289 }
290
291 /*
292 * These EFLAGS bits are restored from saved value during emulation, and
293 * any changes are written back to the saved value after emulation.
294 */
295 #define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
296 X86_EFLAGS_PF|X86_EFLAGS_CF)
297
298 #ifdef CONFIG_X86_64
299 #define ON64(x) x
300 #else
301 #define ON64(x)
302 #endif
303
304 /*
305 * fastop functions have a special calling convention:
306 *
307 * dst: rax (in/out)
308 * src: rdx (in/out)
309 * src2: rcx (in)
310 * flags: rflags (in/out)
311 * ex: rsi (in:fastop pointer, out:zero if exception)
312 *
313 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
314 * different operand sizes can be reached by calculation, rather than a jump
315 * table (which would be bigger than the code).
316 *
317 * The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR
318 * and 1 for the straight line speculation INT3, leaves 7 bytes for the
319 * body of the function. Currently none is larger than 4.
320 */
321 static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
322
323 #define FASTOP_SIZE 16
324
325 #define __FOP_FUNC(name) \
326 ".align " __stringify(FASTOP_SIZE) " \n\t" \
327 ".type " name ", @function \n\t" \
328 name ":\n\t" \
329 ASM_ENDBR \
330 IBT_NOSEAL(name)
331
332 #define FOP_FUNC(name) \
333 __FOP_FUNC(#name)
334
335 #define __FOP_RET(name) \
336 "11: " ASM_RET \
337 ".size " name ", .-" name "\n\t"
338
339 #define FOP_RET(name) \
340 __FOP_RET(#name)
341
342 #define __FOP_START(op, align) \
343 extern void em_##op(struct fastop *fake); \
344 asm(".pushsection .text, \"ax\" \n\t" \
345 ".global em_" #op " \n\t" \
346 ".align " __stringify(align) " \n\t" \
347 "em_" #op ":\n\t"
348
349 #define FOP_START(op) __FOP_START(op, FASTOP_SIZE)
350
351 #define FOP_END \
352 ".popsection")
353
354 #define __FOPNOP(name) \
355 __FOP_FUNC(name) \
356 __FOP_RET(name)
357
358 #define FOPNOP() \
359 __FOPNOP(__stringify(__UNIQUE_ID(nop)))
360
361 #define FOP1E(op, dst) \
362 __FOP_FUNC(#op "_" #dst) \
363 "10: " #op " %" #dst " \n\t" \
364 __FOP_RET(#op "_" #dst)
365
366 #define FOP1EEX(op, dst) \
367 FOP1E(op, dst) _ASM_EXTABLE_TYPE_REG(10b, 11b, EX_TYPE_ZERO_REG, %%esi)
368
369 #define FASTOP1(op) \
370 FOP_START(op) \
371 FOP1E(op##b, al) \
372 FOP1E(op##w, ax) \
373 FOP1E(op##l, eax) \
374 ON64(FOP1E(op##q, rax)) \
375 FOP_END
376
377 /* 1-operand, using src2 (for MUL/DIV r/m) */
378 #define FASTOP1SRC2(op, name) \
379 FOP_START(name) \
380 FOP1E(op, cl) \
381 FOP1E(op, cx) \
382 FOP1E(op, ecx) \
383 ON64(FOP1E(op, rcx)) \
384 FOP_END
385
386 /* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
387 #define FASTOP1SRC2EX(op, name) \
388 FOP_START(name) \
389 FOP1EEX(op, cl) \
390 FOP1EEX(op, cx) \
391 FOP1EEX(op, ecx) \
392 ON64(FOP1EEX(op, rcx)) \
393 FOP_END
394
395 #define FOP2E(op, dst, src) \
396 __FOP_FUNC(#op "_" #dst "_" #src) \
397 #op " %" #src ", %" #dst " \n\t" \
398 __FOP_RET(#op "_" #dst "_" #src)
399
400 #define FASTOP2(op) \
401 FOP_START(op) \
402 FOP2E(op##b, al, dl) \
403 FOP2E(op##w, ax, dx) \
404 FOP2E(op##l, eax, edx) \
405 ON64(FOP2E(op##q, rax, rdx)) \
406 FOP_END
407
408 /* 2 operand, word only */
409 #define FASTOP2W(op) \
410 FOP_START(op) \
411 FOPNOP() \
412 FOP2E(op##w, ax, dx) \
413 FOP2E(op##l, eax, edx) \
414 ON64(FOP2E(op##q, rax, rdx)) \
415 FOP_END
416
417 /* 2 operand, src is CL */
418 #define FASTOP2CL(op) \
419 FOP_START(op) \
420 FOP2E(op##b, al, cl) \
421 FOP2E(op##w, ax, cl) \
422 FOP2E(op##l, eax, cl) \
423 ON64(FOP2E(op##q, rax, cl)) \
424 FOP_END
425
426 /* 2 operand, src and dest are reversed */
427 #define FASTOP2R(op, name) \
428 FOP_START(name) \
429 FOP2E(op##b, dl, al) \
430 FOP2E(op##w, dx, ax) \
431 FOP2E(op##l, edx, eax) \
432 ON64(FOP2E(op##q, rdx, rax)) \
433 FOP_END
434
435 #define FOP3E(op, dst, src, src2) \
436 __FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
437 #op " %" #src2 ", %" #src ", %" #dst " \n\t"\
438 __FOP_RET(#op "_" #dst "_" #src "_" #src2)
439
440 /* 3-operand, word-only, src2=cl */
441 #define FASTOP3WCL(op) \
442 FOP_START(op) \
443 FOPNOP() \
444 FOP3E(op##w, ax, dx, cl) \
445 FOP3E(op##l, eax, edx, cl) \
446 ON64(FOP3E(op##q, rax, rdx, cl)) \
447 FOP_END
448
449 /* Special case for SETcc - 1 instruction per cc */
450 #define FOP_SETCC(op) \
451 FOP_FUNC(op) \
452 #op " %al \n\t" \
453 FOP_RET(op)
454
455 FOP_START(setcc)
456 FOP_SETCC(seto)
457 FOP_SETCC(setno)
458 FOP_SETCC(setc)
459 FOP_SETCC(setnc)
460 FOP_SETCC(setz)
461 FOP_SETCC(setnz)
462 FOP_SETCC(setbe)
463 FOP_SETCC(setnbe)
464 FOP_SETCC(sets)
465 FOP_SETCC(setns)
466 FOP_SETCC(setp)
467 FOP_SETCC(setnp)
468 FOP_SETCC(setl)
469 FOP_SETCC(setnl)
470 FOP_SETCC(setle)
471 FOP_SETCC(setnle)
472 FOP_END;
473
474 FOP_START(salc)
475 FOP_FUNC(salc)
476 "pushf; sbb %al, %al; popf \n\t"
477 FOP_RET(salc)
478 FOP_END;
479
480 /*
481 * XXX: inoutclob user must know where the argument is being expanded.
482 * Using asm goto would allow us to remove _fault.
483 */
484 #define asm_safe(insn, inoutclob...) \
485 ({ \
486 int _fault = 0; \
487 \
488 asm volatile("1:" insn "\n" \
489 "2:\n" \
490 _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_ONE_REG, %[_fault]) \
491 : [_fault] "+r"(_fault) inoutclob ); \
492 \
493 _fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \
494 })
495
emulator_check_intercept(struct x86_emulate_ctxt * ctxt,enum x86_intercept intercept,enum x86_intercept_stage stage)496 static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
497 enum x86_intercept intercept,
498 enum x86_intercept_stage stage)
499 {
500 struct x86_instruction_info info = {
501 .intercept = intercept,
502 .rep_prefix = ctxt->rep_prefix,
503 .modrm_mod = ctxt->modrm_mod,
504 .modrm_reg = ctxt->modrm_reg,
505 .modrm_rm = ctxt->modrm_rm,
506 .src_val = ctxt->src.val64,
507 .dst_val = ctxt->dst.val64,
508 .src_bytes = ctxt->src.bytes,
509 .dst_bytes = ctxt->dst.bytes,
510 .ad_bytes = ctxt->ad_bytes,
511 .next_rip = ctxt->eip,
512 };
513
514 return ctxt->ops->intercept(ctxt, &info, stage);
515 }
516
assign_masked(ulong * dest,ulong src,ulong mask)517 static void assign_masked(ulong *dest, ulong src, ulong mask)
518 {
519 *dest = (*dest & ~mask) | (src & mask);
520 }
521
assign_register(unsigned long * reg,u64 val,int bytes)522 static void assign_register(unsigned long *reg, u64 val, int bytes)
523 {
524 /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
525 switch (bytes) {
526 case 1:
527 *(u8 *)reg = (u8)val;
528 break;
529 case 2:
530 *(u16 *)reg = (u16)val;
531 break;
532 case 4:
533 *reg = (u32)val;
534 break; /* 64b: zero-extend */
535 case 8:
536 *reg = val;
537 break;
538 }
539 }
540
ad_mask(struct x86_emulate_ctxt * ctxt)541 static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
542 {
543 return (1UL << (ctxt->ad_bytes << 3)) - 1;
544 }
545
stack_mask(struct x86_emulate_ctxt * ctxt)546 static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
547 {
548 u16 sel;
549 struct desc_struct ss;
550
551 if (ctxt->mode == X86EMUL_MODE_PROT64)
552 return ~0UL;
553 ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS);
554 return ~0U >> ((ss.d ^ 1) * 16); /* d=0: 0xffff; d=1: 0xffffffff */
555 }
556
stack_size(struct x86_emulate_ctxt * ctxt)557 static int stack_size(struct x86_emulate_ctxt *ctxt)
558 {
559 return (__fls(stack_mask(ctxt)) + 1) >> 3;
560 }
561
562 /* Access/update address held in a register, based on addressing mode. */
563 static inline unsigned long
address_mask(struct x86_emulate_ctxt * ctxt,unsigned long reg)564 address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
565 {
566 if (ctxt->ad_bytes == sizeof(unsigned long))
567 return reg;
568 else
569 return reg & ad_mask(ctxt);
570 }
571
572 static inline unsigned long
register_address(struct x86_emulate_ctxt * ctxt,int reg)573 register_address(struct x86_emulate_ctxt *ctxt, int reg)
574 {
575 return address_mask(ctxt, reg_read(ctxt, reg));
576 }
577
masked_increment(ulong * reg,ulong mask,int inc)578 static void masked_increment(ulong *reg, ulong mask, int inc)
579 {
580 assign_masked(reg, *reg + inc, mask);
581 }
582
583 static inline void
register_address_increment(struct x86_emulate_ctxt * ctxt,int reg,int inc)584 register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
585 {
586 ulong *preg = reg_rmw(ctxt, reg);
587
588 assign_register(preg, *preg + inc, ctxt->ad_bytes);
589 }
590
rsp_increment(struct x86_emulate_ctxt * ctxt,int inc)591 static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
592 {
593 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
594 }
595
desc_limit_scaled(struct desc_struct * desc)596 static u32 desc_limit_scaled(struct desc_struct *desc)
597 {
598 u32 limit = get_desc_limit(desc);
599
600 return desc->g ? (limit << 12) | 0xfff : limit;
601 }
602
seg_base(struct x86_emulate_ctxt * ctxt,int seg)603 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
604 {
605 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
606 return 0;
607
608 return ctxt->ops->get_cached_segment_base(ctxt, seg);
609 }
610
emulate_exception(struct x86_emulate_ctxt * ctxt,int vec,u32 error,bool valid)611 static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
612 u32 error, bool valid)
613 {
614 if (KVM_EMULATOR_BUG_ON(vec > 0x1f, ctxt))
615 return X86EMUL_UNHANDLEABLE;
616
617 ctxt->exception.vector = vec;
618 ctxt->exception.error_code = error;
619 ctxt->exception.error_code_valid = valid;
620 return X86EMUL_PROPAGATE_FAULT;
621 }
622
emulate_db(struct x86_emulate_ctxt * ctxt)623 static int emulate_db(struct x86_emulate_ctxt *ctxt)
624 {
625 return emulate_exception(ctxt, DB_VECTOR, 0, false);
626 }
627
emulate_gp(struct x86_emulate_ctxt * ctxt,int err)628 static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
629 {
630 return emulate_exception(ctxt, GP_VECTOR, err, true);
631 }
632
emulate_ss(struct x86_emulate_ctxt * ctxt,int err)633 static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
634 {
635 return emulate_exception(ctxt, SS_VECTOR, err, true);
636 }
637
emulate_ud(struct x86_emulate_ctxt * ctxt)638 static int emulate_ud(struct x86_emulate_ctxt *ctxt)
639 {
640 return emulate_exception(ctxt, UD_VECTOR, 0, false);
641 }
642
emulate_ts(struct x86_emulate_ctxt * ctxt,int err)643 static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
644 {
645 return emulate_exception(ctxt, TS_VECTOR, err, true);
646 }
647
emulate_de(struct x86_emulate_ctxt * ctxt)648 static int emulate_de(struct x86_emulate_ctxt *ctxt)
649 {
650 return emulate_exception(ctxt, DE_VECTOR, 0, false);
651 }
652
emulate_nm(struct x86_emulate_ctxt * ctxt)653 static int emulate_nm(struct x86_emulate_ctxt *ctxt)
654 {
655 return emulate_exception(ctxt, NM_VECTOR, 0, false);
656 }
657
get_segment_selector(struct x86_emulate_ctxt * ctxt,unsigned seg)658 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
659 {
660 u16 selector;
661 struct desc_struct desc;
662
663 ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
664 return selector;
665 }
666
set_segment_selector(struct x86_emulate_ctxt * ctxt,u16 selector,unsigned seg)667 static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
668 unsigned seg)
669 {
670 u16 dummy;
671 u32 base3;
672 struct desc_struct desc;
673
674 ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
675 ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
676 }
677
ctxt_virt_addr_bits(struct x86_emulate_ctxt * ctxt)678 static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt)
679 {
680 return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48;
681 }
682
emul_is_noncanonical_address(u64 la,struct x86_emulate_ctxt * ctxt)683 static inline bool emul_is_noncanonical_address(u64 la,
684 struct x86_emulate_ctxt *ctxt)
685 {
686 return !__is_canonical_address(la, ctxt_virt_addr_bits(ctxt));
687 }
688
689 /*
690 * x86 defines three classes of vector instructions: explicitly
691 * aligned, explicitly unaligned, and the rest, which change behaviour
692 * depending on whether they're AVX encoded or not.
693 *
694 * Also included is CMPXCHG16B which is not a vector instruction, yet it is
695 * subject to the same check. FXSAVE and FXRSTOR are checked here too as their
696 * 512 bytes of data must be aligned to a 16 byte boundary.
697 */
insn_alignment(struct x86_emulate_ctxt * ctxt,unsigned size)698 static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
699 {
700 u64 alignment = ctxt->d & AlignMask;
701
702 if (likely(size < 16))
703 return 1;
704
705 switch (alignment) {
706 case Unaligned:
707 case Avx:
708 return 1;
709 case Aligned16:
710 return 16;
711 case Aligned:
712 default:
713 return size;
714 }
715 }
716
__linearize(struct x86_emulate_ctxt * ctxt,struct segmented_address addr,unsigned * max_size,unsigned size,bool write,bool fetch,enum x86emul_mode mode,ulong * linear)717 static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
718 struct segmented_address addr,
719 unsigned *max_size, unsigned size,
720 bool write, bool fetch,
721 enum x86emul_mode mode, ulong *linear)
722 {
723 struct desc_struct desc;
724 bool usable;
725 ulong la;
726 u32 lim;
727 u16 sel;
728 u8 va_bits;
729
730 la = seg_base(ctxt, addr.seg) + addr.ea;
731 *max_size = 0;
732 switch (mode) {
733 case X86EMUL_MODE_PROT64:
734 *linear = la;
735 va_bits = ctxt_virt_addr_bits(ctxt);
736 if (!__is_canonical_address(la, va_bits))
737 goto bad;
738
739 *max_size = min_t(u64, ~0u, (1ull << va_bits) - la);
740 if (size > *max_size)
741 goto bad;
742 break;
743 default:
744 *linear = la = (u32)la;
745 usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
746 addr.seg);
747 if (!usable)
748 goto bad;
749 /* code segment in protected mode or read-only data segment */
750 if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
751 || !(desc.type & 2)) && write)
752 goto bad;
753 /* unreadable code segment */
754 if (!fetch && (desc.type & 8) && !(desc.type & 2))
755 goto bad;
756 lim = desc_limit_scaled(&desc);
757 if (!(desc.type & 8) && (desc.type & 4)) {
758 /* expand-down segment */
759 if (addr.ea <= lim)
760 goto bad;
761 lim = desc.d ? 0xffffffff : 0xffff;
762 }
763 if (addr.ea > lim)
764 goto bad;
765 if (lim == 0xffffffff)
766 *max_size = ~0u;
767 else {
768 *max_size = (u64)lim + 1 - addr.ea;
769 if (size > *max_size)
770 goto bad;
771 }
772 break;
773 }
774 if (la & (insn_alignment(ctxt, size) - 1))
775 return emulate_gp(ctxt, 0);
776 return X86EMUL_CONTINUE;
777 bad:
778 if (addr.seg == VCPU_SREG_SS)
779 return emulate_ss(ctxt, 0);
780 else
781 return emulate_gp(ctxt, 0);
782 }
783
linearize(struct x86_emulate_ctxt * ctxt,struct segmented_address addr,unsigned size,bool write,ulong * linear)784 static int linearize(struct x86_emulate_ctxt *ctxt,
785 struct segmented_address addr,
786 unsigned size, bool write,
787 ulong *linear)
788 {
789 unsigned max_size;
790 return __linearize(ctxt, addr, &max_size, size, write, false,
791 ctxt->mode, linear);
792 }
793
assign_eip(struct x86_emulate_ctxt * ctxt,ulong dst)794 static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
795 {
796 ulong linear;
797 int rc;
798 unsigned max_size;
799 struct segmented_address addr = { .seg = VCPU_SREG_CS,
800 .ea = dst };
801
802 if (ctxt->op_bytes != sizeof(unsigned long))
803 addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
804 rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear);
805 if (rc == X86EMUL_CONTINUE)
806 ctxt->_eip = addr.ea;
807 return rc;
808 }
809
emulator_recalc_and_set_mode(struct x86_emulate_ctxt * ctxt)810 static inline int emulator_recalc_and_set_mode(struct x86_emulate_ctxt *ctxt)
811 {
812 u64 efer;
813 struct desc_struct cs;
814 u16 selector;
815 u32 base3;
816
817 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
818
819 if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) {
820 /* Real mode. cpu must not have long mode active */
821 if (efer & EFER_LMA)
822 return X86EMUL_UNHANDLEABLE;
823 ctxt->mode = X86EMUL_MODE_REAL;
824 return X86EMUL_CONTINUE;
825 }
826
827 if (ctxt->eflags & X86_EFLAGS_VM) {
828 /* Protected/VM86 mode. cpu must not have long mode active */
829 if (efer & EFER_LMA)
830 return X86EMUL_UNHANDLEABLE;
831 ctxt->mode = X86EMUL_MODE_VM86;
832 return X86EMUL_CONTINUE;
833 }
834
835 if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS))
836 return X86EMUL_UNHANDLEABLE;
837
838 if (efer & EFER_LMA) {
839 if (cs.l) {
840 /* Proper long mode */
841 ctxt->mode = X86EMUL_MODE_PROT64;
842 } else if (cs.d) {
843 /* 32 bit compatibility mode*/
844 ctxt->mode = X86EMUL_MODE_PROT32;
845 } else {
846 ctxt->mode = X86EMUL_MODE_PROT16;
847 }
848 } else {
849 /* Legacy 32 bit / 16 bit mode */
850 ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
851 }
852
853 return X86EMUL_CONTINUE;
854 }
855
assign_eip_near(struct x86_emulate_ctxt * ctxt,ulong dst)856 static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
857 {
858 return assign_eip(ctxt, dst);
859 }
860
assign_eip_far(struct x86_emulate_ctxt * ctxt,ulong dst)861 static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst)
862 {
863 int rc = emulator_recalc_and_set_mode(ctxt);
864
865 if (rc != X86EMUL_CONTINUE)
866 return rc;
867
868 return assign_eip(ctxt, dst);
869 }
870
jmp_rel(struct x86_emulate_ctxt * ctxt,int rel)871 static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
872 {
873 return assign_eip_near(ctxt, ctxt->_eip + rel);
874 }
875
linear_read_system(struct x86_emulate_ctxt * ctxt,ulong linear,void * data,unsigned size)876 static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear,
877 void *data, unsigned size)
878 {
879 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, true);
880 }
881
linear_write_system(struct x86_emulate_ctxt * ctxt,ulong linear,void * data,unsigned int size)882 static int linear_write_system(struct x86_emulate_ctxt *ctxt,
883 ulong linear, void *data,
884 unsigned int size)
885 {
886 return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, true);
887 }
888
segmented_read_std(struct x86_emulate_ctxt * ctxt,struct segmented_address addr,void * data,unsigned size)889 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
890 struct segmented_address addr,
891 void *data,
892 unsigned size)
893 {
894 int rc;
895 ulong linear;
896
897 rc = linearize(ctxt, addr, size, false, &linear);
898 if (rc != X86EMUL_CONTINUE)
899 return rc;
900 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, false);
901 }
902
segmented_write_std(struct x86_emulate_ctxt * ctxt,struct segmented_address addr,void * data,unsigned int size)903 static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
904 struct segmented_address addr,
905 void *data,
906 unsigned int size)
907 {
908 int rc;
909 ulong linear;
910
911 rc = linearize(ctxt, addr, size, true, &linear);
912 if (rc != X86EMUL_CONTINUE)
913 return rc;
914 return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, false);
915 }
916
917 /*
918 * Prefetch the remaining bytes of the instruction without crossing page
919 * boundary if they are not in fetch_cache yet.
920 */
__do_insn_fetch_bytes(struct x86_emulate_ctxt * ctxt,int op_size)921 static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
922 {
923 int rc;
924 unsigned size, max_size;
925 unsigned long linear;
926 int cur_size = ctxt->fetch.end - ctxt->fetch.data;
927 struct segmented_address addr = { .seg = VCPU_SREG_CS,
928 .ea = ctxt->eip + cur_size };
929
930 /*
931 * We do not know exactly how many bytes will be needed, and
932 * __linearize is expensive, so fetch as much as possible. We
933 * just have to avoid going beyond the 15 byte limit, the end
934 * of the segment, or the end of the page.
935 *
936 * __linearize is called with size 0 so that it does not do any
937 * boundary check itself. Instead, we use max_size to check
938 * against op_size.
939 */
940 rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
941 &linear);
942 if (unlikely(rc != X86EMUL_CONTINUE))
943 return rc;
944
945 size = min_t(unsigned, 15UL ^ cur_size, max_size);
946 size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
947
948 /*
949 * One instruction can only straddle two pages,
950 * and one has been loaded at the beginning of
951 * x86_decode_insn. So, if not enough bytes
952 * still, we must have hit the 15-byte boundary.
953 */
954 if (unlikely(size < op_size))
955 return emulate_gp(ctxt, 0);
956
957 rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
958 size, &ctxt->exception);
959 if (unlikely(rc != X86EMUL_CONTINUE))
960 return rc;
961 ctxt->fetch.end += size;
962 return X86EMUL_CONTINUE;
963 }
964
do_insn_fetch_bytes(struct x86_emulate_ctxt * ctxt,unsigned size)965 static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
966 unsigned size)
967 {
968 unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
969
970 if (unlikely(done_size < size))
971 return __do_insn_fetch_bytes(ctxt, size - done_size);
972 else
973 return X86EMUL_CONTINUE;
974 }
975
976 /* Fetch next part of the instruction being emulated. */
977 #define insn_fetch(_type, _ctxt) \
978 ({ _type _x; \
979 \
980 rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \
981 if (rc != X86EMUL_CONTINUE) \
982 goto done; \
983 ctxt->_eip += sizeof(_type); \
984 memcpy(&_x, ctxt->fetch.ptr, sizeof(_type)); \
985 ctxt->fetch.ptr += sizeof(_type); \
986 _x; \
987 })
988
989 #define insn_fetch_arr(_arr, _size, _ctxt) \
990 ({ \
991 rc = do_insn_fetch_bytes(_ctxt, _size); \
992 if (rc != X86EMUL_CONTINUE) \
993 goto done; \
994 ctxt->_eip += (_size); \
995 memcpy(_arr, ctxt->fetch.ptr, _size); \
996 ctxt->fetch.ptr += (_size); \
997 })
998
999 /*
1000 * Given the 'reg' portion of a ModRM byte, and a register block, return a
1001 * pointer into the block that addresses the relevant register.
1002 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
1003 */
decode_register(struct x86_emulate_ctxt * ctxt,u8 modrm_reg,int byteop)1004 static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
1005 int byteop)
1006 {
1007 void *p;
1008 int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
1009
1010 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
1011 p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
1012 else
1013 p = reg_rmw(ctxt, modrm_reg);
1014 return p;
1015 }
1016
read_descriptor(struct x86_emulate_ctxt * ctxt,struct segmented_address addr,u16 * size,unsigned long * address,int op_bytes)1017 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
1018 struct segmented_address addr,
1019 u16 *size, unsigned long *address, int op_bytes)
1020 {
1021 int rc;
1022
1023 if (op_bytes == 2)
1024 op_bytes = 3;
1025 *address = 0;
1026 rc = segmented_read_std(ctxt, addr, size, 2);
1027 if (rc != X86EMUL_CONTINUE)
1028 return rc;
1029 addr.ea += 2;
1030 rc = segmented_read_std(ctxt, addr, address, op_bytes);
1031 return rc;
1032 }
1033
1034 FASTOP2(add);
1035 FASTOP2(or);
1036 FASTOP2(adc);
1037 FASTOP2(sbb);
1038 FASTOP2(and);
1039 FASTOP2(sub);
1040 FASTOP2(xor);
1041 FASTOP2(cmp);
1042 FASTOP2(test);
1043
1044 FASTOP1SRC2(mul, mul_ex);
1045 FASTOP1SRC2(imul, imul_ex);
1046 FASTOP1SRC2EX(div, div_ex);
1047 FASTOP1SRC2EX(idiv, idiv_ex);
1048
1049 FASTOP3WCL(shld);
1050 FASTOP3WCL(shrd);
1051
1052 FASTOP2W(imul);
1053
1054 FASTOP1(not);
1055 FASTOP1(neg);
1056 FASTOP1(inc);
1057 FASTOP1(dec);
1058
1059 FASTOP2CL(rol);
1060 FASTOP2CL(ror);
1061 FASTOP2CL(rcl);
1062 FASTOP2CL(rcr);
1063 FASTOP2CL(shl);
1064 FASTOP2CL(shr);
1065 FASTOP2CL(sar);
1066
1067 FASTOP2W(bsf);
1068 FASTOP2W(bsr);
1069 FASTOP2W(bt);
1070 FASTOP2W(bts);
1071 FASTOP2W(btr);
1072 FASTOP2W(btc);
1073
1074 FASTOP2(xadd);
1075
1076 FASTOP2R(cmp, cmp_r);
1077
em_bsf_c(struct x86_emulate_ctxt * ctxt)1078 static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
1079 {
1080 /* If src is zero, do not writeback, but update flags */
1081 if (ctxt->src.val == 0)
1082 ctxt->dst.type = OP_NONE;
1083 return fastop(ctxt, em_bsf);
1084 }
1085
em_bsr_c(struct x86_emulate_ctxt * ctxt)1086 static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
1087 {
1088 /* If src is zero, do not writeback, but update flags */
1089 if (ctxt->src.val == 0)
1090 ctxt->dst.type = OP_NONE;
1091 return fastop(ctxt, em_bsr);
1092 }
1093
test_cc(unsigned int condition,unsigned long flags)1094 static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
1095 {
1096 u8 rc;
1097 void (*fop)(void) = (void *)em_setcc + FASTOP_SIZE * (condition & 0xf);
1098
1099 flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
1100 asm("push %[flags]; popf; " CALL_NOSPEC
1101 : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags));
1102 return rc;
1103 }
1104
fetch_register_operand(struct operand * op)1105 static void fetch_register_operand(struct operand *op)
1106 {
1107 switch (op->bytes) {
1108 case 1:
1109 op->val = *(u8 *)op->addr.reg;
1110 break;
1111 case 2:
1112 op->val = *(u16 *)op->addr.reg;
1113 break;
1114 case 4:
1115 op->val = *(u32 *)op->addr.reg;
1116 break;
1117 case 8:
1118 op->val = *(u64 *)op->addr.reg;
1119 break;
1120 }
1121 }
1122
em_fninit(struct x86_emulate_ctxt * ctxt)1123 static int em_fninit(struct x86_emulate_ctxt *ctxt)
1124 {
1125 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1126 return emulate_nm(ctxt);
1127
1128 kvm_fpu_get();
1129 asm volatile("fninit");
1130 kvm_fpu_put();
1131 return X86EMUL_CONTINUE;
1132 }
1133
em_fnstcw(struct x86_emulate_ctxt * ctxt)1134 static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1135 {
1136 u16 fcw;
1137
1138 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1139 return emulate_nm(ctxt);
1140
1141 kvm_fpu_get();
1142 asm volatile("fnstcw %0": "+m"(fcw));
1143 kvm_fpu_put();
1144
1145 ctxt->dst.val = fcw;
1146
1147 return X86EMUL_CONTINUE;
1148 }
1149
em_fnstsw(struct x86_emulate_ctxt * ctxt)1150 static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1151 {
1152 u16 fsw;
1153
1154 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1155 return emulate_nm(ctxt);
1156
1157 kvm_fpu_get();
1158 asm volatile("fnstsw %0": "+m"(fsw));
1159 kvm_fpu_put();
1160
1161 ctxt->dst.val = fsw;
1162
1163 return X86EMUL_CONTINUE;
1164 }
1165
decode_register_operand(struct x86_emulate_ctxt * ctxt,struct operand * op)1166 static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1167 struct operand *op)
1168 {
1169 unsigned int reg;
1170
1171 if (ctxt->d & ModRM)
1172 reg = ctxt->modrm_reg;
1173 else
1174 reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
1175
1176 if (ctxt->d & Sse) {
1177 op->type = OP_XMM;
1178 op->bytes = 16;
1179 op->addr.xmm = reg;
1180 kvm_read_sse_reg(reg, &op->vec_val);
1181 return;
1182 }
1183 if (ctxt->d & Mmx) {
1184 reg &= 7;
1185 op->type = OP_MM;
1186 op->bytes = 8;
1187 op->addr.mm = reg;
1188 return;
1189 }
1190
1191 op->type = OP_REG;
1192 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1193 op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);
1194
1195 fetch_register_operand(op);
1196 op->orig_val = op->val;
1197 }
1198
adjust_modrm_seg(struct x86_emulate_ctxt * ctxt,int base_reg)1199 static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg)
1200 {
1201 if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP)
1202 ctxt->modrm_seg = VCPU_SREG_SS;
1203 }
1204
decode_modrm(struct x86_emulate_ctxt * ctxt,struct operand * op)1205 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1206 struct operand *op)
1207 {
1208 u8 sib;
1209 int index_reg, base_reg, scale;
1210 int rc = X86EMUL_CONTINUE;
1211 ulong modrm_ea = 0;
1212
1213 ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
1214 index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
1215 base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
1216
1217 ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
1218 ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
1219 ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
1220 ctxt->modrm_seg = VCPU_SREG_DS;
1221
1222 if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
1223 op->type = OP_REG;
1224 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1225 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
1226 ctxt->d & ByteOp);
1227 if (ctxt->d & Sse) {
1228 op->type = OP_XMM;
1229 op->bytes = 16;
1230 op->addr.xmm = ctxt->modrm_rm;
1231 kvm_read_sse_reg(ctxt->modrm_rm, &op->vec_val);
1232 return rc;
1233 }
1234 if (ctxt->d & Mmx) {
1235 op->type = OP_MM;
1236 op->bytes = 8;
1237 op->addr.mm = ctxt->modrm_rm & 7;
1238 return rc;
1239 }
1240 fetch_register_operand(op);
1241 return rc;
1242 }
1243
1244 op->type = OP_MEM;
1245
1246 if (ctxt->ad_bytes == 2) {
1247 unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
1248 unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
1249 unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
1250 unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
1251
1252 /* 16-bit ModR/M decode. */
1253 switch (ctxt->modrm_mod) {
1254 case 0:
1255 if (ctxt->modrm_rm == 6)
1256 modrm_ea += insn_fetch(u16, ctxt);
1257 break;
1258 case 1:
1259 modrm_ea += insn_fetch(s8, ctxt);
1260 break;
1261 case 2:
1262 modrm_ea += insn_fetch(u16, ctxt);
1263 break;
1264 }
1265 switch (ctxt->modrm_rm) {
1266 case 0:
1267 modrm_ea += bx + si;
1268 break;
1269 case 1:
1270 modrm_ea += bx + di;
1271 break;
1272 case 2:
1273 modrm_ea += bp + si;
1274 break;
1275 case 3:
1276 modrm_ea += bp + di;
1277 break;
1278 case 4:
1279 modrm_ea += si;
1280 break;
1281 case 5:
1282 modrm_ea += di;
1283 break;
1284 case 6:
1285 if (ctxt->modrm_mod != 0)
1286 modrm_ea += bp;
1287 break;
1288 case 7:
1289 modrm_ea += bx;
1290 break;
1291 }
1292 if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
1293 (ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
1294 ctxt->modrm_seg = VCPU_SREG_SS;
1295 modrm_ea = (u16)modrm_ea;
1296 } else {
1297 /* 32/64-bit ModR/M decode. */
1298 if ((ctxt->modrm_rm & 7) == 4) {
1299 sib = insn_fetch(u8, ctxt);
1300 index_reg |= (sib >> 3) & 7;
1301 base_reg |= sib & 7;
1302 scale = sib >> 6;
1303
1304 if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
1305 modrm_ea += insn_fetch(s32, ctxt);
1306 else {
1307 modrm_ea += reg_read(ctxt, base_reg);
1308 adjust_modrm_seg(ctxt, base_reg);
1309 /* Increment ESP on POP [ESP] */
1310 if ((ctxt->d & IncSP) &&
1311 base_reg == VCPU_REGS_RSP)
1312 modrm_ea += ctxt->op_bytes;
1313 }
1314 if (index_reg != 4)
1315 modrm_ea += reg_read(ctxt, index_reg) << scale;
1316 } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
1317 modrm_ea += insn_fetch(s32, ctxt);
1318 if (ctxt->mode == X86EMUL_MODE_PROT64)
1319 ctxt->rip_relative = 1;
1320 } else {
1321 base_reg = ctxt->modrm_rm;
1322 modrm_ea += reg_read(ctxt, base_reg);
1323 adjust_modrm_seg(ctxt, base_reg);
1324 }
1325 switch (ctxt->modrm_mod) {
1326 case 1:
1327 modrm_ea += insn_fetch(s8, ctxt);
1328 break;
1329 case 2:
1330 modrm_ea += insn_fetch(s32, ctxt);
1331 break;
1332 }
1333 }
1334 op->addr.mem.ea = modrm_ea;
1335 if (ctxt->ad_bytes != 8)
1336 ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
1337
1338 done:
1339 return rc;
1340 }
1341
decode_abs(struct x86_emulate_ctxt * ctxt,struct operand * op)1342 static int decode_abs(struct x86_emulate_ctxt *ctxt,
1343 struct operand *op)
1344 {
1345 int rc = X86EMUL_CONTINUE;
1346
1347 op->type = OP_MEM;
1348 switch (ctxt->ad_bytes) {
1349 case 2:
1350 op->addr.mem.ea = insn_fetch(u16, ctxt);
1351 break;
1352 case 4:
1353 op->addr.mem.ea = insn_fetch(u32, ctxt);
1354 break;
1355 case 8:
1356 op->addr.mem.ea = insn_fetch(u64, ctxt);
1357 break;
1358 }
1359 done:
1360 return rc;
1361 }
1362
fetch_bit_operand(struct x86_emulate_ctxt * ctxt)1363 static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1364 {
1365 long sv = 0, mask;
1366
1367 if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1368 mask = ~((long)ctxt->dst.bytes * 8 - 1);
1369
1370 if (ctxt->src.bytes == 2)
1371 sv = (s16)ctxt->src.val & (s16)mask;
1372 else if (ctxt->src.bytes == 4)
1373 sv = (s32)ctxt->src.val & (s32)mask;
1374 else
1375 sv = (s64)ctxt->src.val & (s64)mask;
1376
1377 ctxt->dst.addr.mem.ea = address_mask(ctxt,
1378 ctxt->dst.addr.mem.ea + (sv >> 3));
1379 }
1380
1381 /* only subword offset */
1382 ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
1383 }
1384
read_emulated(struct x86_emulate_ctxt * ctxt,unsigned long addr,void * dest,unsigned size)1385 static int read_emulated(struct x86_emulate_ctxt *ctxt,
1386 unsigned long addr, void *dest, unsigned size)
1387 {
1388 int rc;
1389 struct read_cache *mc = &ctxt->mem_read;
1390
1391 if (mc->pos < mc->end)
1392 goto read_cached;
1393
1394 if (KVM_EMULATOR_BUG_ON((mc->end + size) >= sizeof(mc->data), ctxt))
1395 return X86EMUL_UNHANDLEABLE;
1396
1397 rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
1398 &ctxt->exception);
1399 if (rc != X86EMUL_CONTINUE)
1400 return rc;
1401
1402 mc->end += size;
1403
1404 read_cached:
1405 memcpy(dest, mc->data + mc->pos, size);
1406 mc->pos += size;
1407 return X86EMUL_CONTINUE;
1408 }
1409
segmented_read(struct x86_emulate_ctxt * ctxt,struct segmented_address addr,void * data,unsigned size)1410 static int segmented_read(struct x86_emulate_ctxt *ctxt,
1411 struct segmented_address addr,
1412 void *data,
1413 unsigned size)
1414 {
1415 int rc;
1416 ulong linear;
1417
1418 rc = linearize(ctxt, addr, size, false, &linear);
1419 if (rc != X86EMUL_CONTINUE)
1420 return rc;
1421 return read_emulated(ctxt, linear, data, size);
1422 }
1423
segmented_write(struct x86_emulate_ctxt * ctxt,struct segmented_address addr,const void * data,unsigned size)1424 static int segmented_write(struct x86_emulate_ctxt *ctxt,
1425 struct segmented_address addr,
1426 const void *data,
1427 unsigned size)
1428 {
1429 int rc;
1430 ulong linear;
1431
1432 rc = linearize(ctxt, addr, size, true, &linear);
1433 if (rc != X86EMUL_CONTINUE)
1434 return rc;
1435 return ctxt->ops->write_emulated(ctxt, linear, data, size,
1436 &ctxt->exception);
1437 }
1438
segmented_cmpxchg(struct x86_emulate_ctxt * ctxt,struct segmented_address addr,const void * orig_data,const void * data,unsigned size)1439 static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
1440 struct segmented_address addr,
1441 const void *orig_data, const void *data,
1442 unsigned size)
1443 {
1444 int rc;
1445 ulong linear;
1446
1447 rc = linearize(ctxt, addr, size, true, &linear);
1448 if (rc != X86EMUL_CONTINUE)
1449 return rc;
1450 return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
1451 size, &ctxt->exception);
1452 }
1453
pio_in_emulated(struct x86_emulate_ctxt * ctxt,unsigned int size,unsigned short port,void * dest)1454 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1455 unsigned int size, unsigned short port,
1456 void *dest)
1457 {
1458 struct read_cache *rc = &ctxt->io_read;
1459
1460 if (rc->pos == rc->end) { /* refill pio read ahead */
1461 unsigned int in_page, n;
1462 unsigned int count = ctxt->rep_prefix ?
1463 address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
1464 in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
1465 offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
1466 PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1467 n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
1468 if (n == 0)
1469 n = 1;
1470 rc->pos = rc->end = 0;
1471 if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
1472 return 0;
1473 rc->end = n * size;
1474 }
1475
1476 if (ctxt->rep_prefix && (ctxt->d & String) &&
1477 !(ctxt->eflags & X86_EFLAGS_DF)) {
1478 ctxt->dst.data = rc->data + rc->pos;
1479 ctxt->dst.type = OP_MEM_STR;
1480 ctxt->dst.count = (rc->end - rc->pos) / size;
1481 rc->pos = rc->end;
1482 } else {
1483 memcpy(dest, rc->data + rc->pos, size);
1484 rc->pos += size;
1485 }
1486 return 1;
1487 }
1488
read_interrupt_descriptor(struct x86_emulate_ctxt * ctxt,u16 index,struct desc_struct * desc)1489 static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
1490 u16 index, struct desc_struct *desc)
1491 {
1492 struct desc_ptr dt;
1493 ulong addr;
1494
1495 ctxt->ops->get_idt(ctxt, &dt);
1496
1497 if (dt.size < index * 8 + 7)
1498 return emulate_gp(ctxt, index << 3 | 0x2);
1499
1500 addr = dt.address + index * 8;
1501 return linear_read_system(ctxt, addr, desc, sizeof(*desc));
1502 }
1503
get_descriptor_table_ptr(struct x86_emulate_ctxt * ctxt,u16 selector,struct desc_ptr * dt)1504 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1505 u16 selector, struct desc_ptr *dt)
1506 {
1507 const struct x86_emulate_ops *ops = ctxt->ops;
1508 u32 base3 = 0;
1509
1510 if (selector & 1 << 2) {
1511 struct desc_struct desc;
1512 u16 sel;
1513
1514 memset(dt, 0, sizeof(*dt));
1515 if (!ops->get_segment(ctxt, &sel, &desc, &base3,
1516 VCPU_SREG_LDTR))
1517 return;
1518
1519 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1520 dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
1521 } else
1522 ops->get_gdt(ctxt, dt);
1523 }
1524
get_descriptor_ptr(struct x86_emulate_ctxt * ctxt,u16 selector,ulong * desc_addr_p)1525 static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
1526 u16 selector, ulong *desc_addr_p)
1527 {
1528 struct desc_ptr dt;
1529 u16 index = selector >> 3;
1530 ulong addr;
1531
1532 get_descriptor_table_ptr(ctxt, selector, &dt);
1533
1534 if (dt.size < index * 8 + 7)
1535 return emulate_gp(ctxt, selector & 0xfffc);
1536
1537 addr = dt.address + index * 8;
1538
1539 #ifdef CONFIG_X86_64
1540 if (addr >> 32 != 0) {
1541 u64 efer = 0;
1542
1543 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1544 if (!(efer & EFER_LMA))
1545 addr &= (u32)-1;
1546 }
1547 #endif
1548
1549 *desc_addr_p = addr;
1550 return X86EMUL_CONTINUE;
1551 }
1552
1553 /* allowed just for 8 bytes segments */
read_segment_descriptor(struct x86_emulate_ctxt * ctxt,u16 selector,struct desc_struct * desc,ulong * desc_addr_p)1554 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1555 u16 selector, struct desc_struct *desc,
1556 ulong *desc_addr_p)
1557 {
1558 int rc;
1559
1560 rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
1561 if (rc != X86EMUL_CONTINUE)
1562 return rc;
1563
1564 return linear_read_system(ctxt, *desc_addr_p, desc, sizeof(*desc));
1565 }
1566
1567 /* allowed just for 8 bytes segments */
write_segment_descriptor(struct x86_emulate_ctxt * ctxt,u16 selector,struct desc_struct * desc)1568 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1569 u16 selector, struct desc_struct *desc)
1570 {
1571 int rc;
1572 ulong addr;
1573
1574 rc = get_descriptor_ptr(ctxt, selector, &addr);
1575 if (rc != X86EMUL_CONTINUE)
1576 return rc;
1577
1578 return linear_write_system(ctxt, addr, desc, sizeof(*desc));
1579 }
1580
__load_segment_descriptor(struct x86_emulate_ctxt * ctxt,u16 selector,int seg,u8 cpl,enum x86_transfer_type transfer,struct desc_struct * desc)1581 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1582 u16 selector, int seg, u8 cpl,
1583 enum x86_transfer_type transfer,
1584 struct desc_struct *desc)
1585 {
1586 struct desc_struct seg_desc, old_desc;
1587 u8 dpl, rpl;
1588 unsigned err_vec = GP_VECTOR;
1589 u32 err_code = 0;
1590 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1591 ulong desc_addr;
1592 int ret;
1593 u16 dummy;
1594 u32 base3 = 0;
1595
1596 memset(&seg_desc, 0, sizeof(seg_desc));
1597
1598 if (ctxt->mode == X86EMUL_MODE_REAL) {
1599 /* set real mode segment descriptor (keep limit etc. for
1600 * unreal mode) */
1601 ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
1602 set_desc_base(&seg_desc, selector << 4);
1603 goto load;
1604 } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
1605 /* VM86 needs a clean new segment descriptor */
1606 set_desc_base(&seg_desc, selector << 4);
1607 set_desc_limit(&seg_desc, 0xffff);
1608 seg_desc.type = 3;
1609 seg_desc.p = 1;
1610 seg_desc.s = 1;
1611 seg_desc.dpl = 3;
1612 goto load;
1613 }
1614
1615 rpl = selector & 3;
1616
1617 /* TR should be in GDT only */
1618 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1619 goto exception;
1620
1621 /* NULL selector is not valid for TR, CS and (except for long mode) SS */
1622 if (null_selector) {
1623 if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR)
1624 goto exception;
1625
1626 if (seg == VCPU_SREG_SS) {
1627 if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)
1628 goto exception;
1629
1630 /*
1631 * ctxt->ops->set_segment expects the CPL to be in
1632 * SS.DPL, so fake an expand-up 32-bit data segment.
1633 */
1634 seg_desc.type = 3;
1635 seg_desc.p = 1;
1636 seg_desc.s = 1;
1637 seg_desc.dpl = cpl;
1638 seg_desc.d = 1;
1639 seg_desc.g = 1;
1640 }
1641
1642 /* Skip all following checks */
1643 goto load;
1644 }
1645
1646 ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
1647 if (ret != X86EMUL_CONTINUE)
1648 return ret;
1649
1650 err_code = selector & 0xfffc;
1651 err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
1652 GP_VECTOR;
1653
1654 /* can't load system descriptor into segment selector */
1655 if (seg <= VCPU_SREG_GS && !seg_desc.s) {
1656 if (transfer == X86_TRANSFER_CALL_JMP)
1657 return X86EMUL_UNHANDLEABLE;
1658 goto exception;
1659 }
1660
1661 dpl = seg_desc.dpl;
1662
1663 switch (seg) {
1664 case VCPU_SREG_SS:
1665 /*
1666 * segment is not a writable data segment or segment
1667 * selector's RPL != CPL or segment selector's RPL != CPL
1668 */
1669 if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1670 goto exception;
1671 break;
1672 case VCPU_SREG_CS:
1673 if (!(seg_desc.type & 8))
1674 goto exception;
1675
1676 if (transfer == X86_TRANSFER_RET) {
1677 /* RET can never return to an inner privilege level. */
1678 if (rpl < cpl)
1679 goto exception;
1680 /* Outer-privilege level return is not implemented */
1681 if (rpl > cpl)
1682 return X86EMUL_UNHANDLEABLE;
1683 }
1684 if (transfer == X86_TRANSFER_RET || transfer == X86_TRANSFER_TASK_SWITCH) {
1685 if (seg_desc.type & 4) {
1686 /* conforming */
1687 if (dpl > rpl)
1688 goto exception;
1689 } else {
1690 /* nonconforming */
1691 if (dpl != rpl)
1692 goto exception;
1693 }
1694 } else { /* X86_TRANSFER_CALL_JMP */
1695 if (seg_desc.type & 4) {
1696 /* conforming */
1697 if (dpl > cpl)
1698 goto exception;
1699 } else {
1700 /* nonconforming */
1701 if (rpl > cpl || dpl != cpl)
1702 goto exception;
1703 }
1704 }
1705 /* in long-mode d/b must be clear if l is set */
1706 if (seg_desc.d && seg_desc.l) {
1707 u64 efer = 0;
1708
1709 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1710 if (efer & EFER_LMA)
1711 goto exception;
1712 }
1713
1714 /* CS(RPL) <- CPL */
1715 selector = (selector & 0xfffc) | cpl;
1716 break;
1717 case VCPU_SREG_TR:
1718 if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1719 goto exception;
1720 break;
1721 case VCPU_SREG_LDTR:
1722 if (seg_desc.s || seg_desc.type != 2)
1723 goto exception;
1724 break;
1725 default: /* DS, ES, FS, or GS */
1726 /*
1727 * segment is not a data or readable code segment or
1728 * ((segment is a data or nonconforming code segment)
1729 * and (both RPL and CPL > DPL))
1730 */
1731 if ((seg_desc.type & 0xa) == 0x8 ||
1732 (((seg_desc.type & 0xc) != 0xc) &&
1733 (rpl > dpl && cpl > dpl)))
1734 goto exception;
1735 break;
1736 }
1737
1738 if (!seg_desc.p) {
1739 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1740 goto exception;
1741 }
1742
1743 if (seg_desc.s) {
1744 /* mark segment as accessed */
1745 if (!(seg_desc.type & 1)) {
1746 seg_desc.type |= 1;
1747 ret = write_segment_descriptor(ctxt, selector,
1748 &seg_desc);
1749 if (ret != X86EMUL_CONTINUE)
1750 return ret;
1751 }
1752 } else if (ctxt->mode == X86EMUL_MODE_PROT64) {
1753 ret = linear_read_system(ctxt, desc_addr+8, &base3, sizeof(base3));
1754 if (ret != X86EMUL_CONTINUE)
1755 return ret;
1756 if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
1757 ((u64)base3 << 32), ctxt))
1758 return emulate_gp(ctxt, err_code);
1759 }
1760
1761 if (seg == VCPU_SREG_TR) {
1762 old_desc = seg_desc;
1763 seg_desc.type |= 2; /* busy */
1764 ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
1765 sizeof(seg_desc), &ctxt->exception);
1766 if (ret != X86EMUL_CONTINUE)
1767 return ret;
1768 }
1769 load:
1770 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1771 if (desc)
1772 *desc = seg_desc;
1773 return X86EMUL_CONTINUE;
1774 exception:
1775 return emulate_exception(ctxt, err_vec, err_code, true);
1776 }
1777
load_segment_descriptor(struct x86_emulate_ctxt * ctxt,u16 selector,int seg)1778 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1779 u16 selector, int seg)
1780 {
1781 u8 cpl = ctxt->ops->cpl(ctxt);
1782
1783 /*
1784 * None of MOV, POP and LSS can load a NULL selector in CPL=3, but
1785 * they can load it at CPL<3 (Intel's manual says only LSS can,
1786 * but it's wrong).
1787 *
1788 * However, the Intel manual says that putting IST=1/DPL=3 in
1789 * an interrupt gate will result in SS=3 (the AMD manual instead
1790 * says it doesn't), so allow SS=3 in __load_segment_descriptor
1791 * and only forbid it here.
1792 */
1793 if (seg == VCPU_SREG_SS && selector == 3 &&
1794 ctxt->mode == X86EMUL_MODE_PROT64)
1795 return emulate_exception(ctxt, GP_VECTOR, 0, true);
1796
1797 return __load_segment_descriptor(ctxt, selector, seg, cpl,
1798 X86_TRANSFER_NONE, NULL);
1799 }
1800
write_register_operand(struct operand * op)1801 static void write_register_operand(struct operand *op)
1802 {
1803 return assign_register(op->addr.reg, op->val, op->bytes);
1804 }
1805
writeback(struct x86_emulate_ctxt * ctxt,struct operand * op)1806 static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1807 {
1808 switch (op->type) {
1809 case OP_REG:
1810 write_register_operand(op);
1811 break;
1812 case OP_MEM:
1813 if (ctxt->lock_prefix)
1814 return segmented_cmpxchg(ctxt,
1815 op->addr.mem,
1816 &op->orig_val,
1817 &op->val,
1818 op->bytes);
1819 else
1820 return segmented_write(ctxt,
1821 op->addr.mem,
1822 &op->val,
1823 op->bytes);
1824 break;
1825 case OP_MEM_STR:
1826 return segmented_write(ctxt,
1827 op->addr.mem,
1828 op->data,
1829 op->bytes * op->count);
1830 break;
1831 case OP_XMM:
1832 kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
1833 break;
1834 case OP_MM:
1835 kvm_write_mmx_reg(op->addr.mm, &op->mm_val);
1836 break;
1837 case OP_NONE:
1838 /* no writeback */
1839 break;
1840 default:
1841 break;
1842 }
1843 return X86EMUL_CONTINUE;
1844 }
1845
push(struct x86_emulate_ctxt * ctxt,void * data,int bytes)1846 static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
1847 {
1848 struct segmented_address addr;
1849
1850 rsp_increment(ctxt, -bytes);
1851 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1852 addr.seg = VCPU_SREG_SS;
1853
1854 return segmented_write(ctxt, addr, data, bytes);
1855 }
1856
em_push(struct x86_emulate_ctxt * ctxt)1857 static int em_push(struct x86_emulate_ctxt *ctxt)
1858 {
1859 /* Disable writeback. */
1860 ctxt->dst.type = OP_NONE;
1861 return push(ctxt, &ctxt->src.val, ctxt->op_bytes);
1862 }
1863
emulate_pop(struct x86_emulate_ctxt * ctxt,void * dest,int len)1864 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1865 void *dest, int len)
1866 {
1867 int rc;
1868 struct segmented_address addr;
1869
1870 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1871 addr.seg = VCPU_SREG_SS;
1872 rc = segmented_read(ctxt, addr, dest, len);
1873 if (rc != X86EMUL_CONTINUE)
1874 return rc;
1875
1876 rsp_increment(ctxt, len);
1877 return rc;
1878 }
1879
em_pop(struct x86_emulate_ctxt * ctxt)1880 static int em_pop(struct x86_emulate_ctxt *ctxt)
1881 {
1882 return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1883 }
1884
emulate_popf(struct x86_emulate_ctxt * ctxt,void * dest,int len)1885 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1886 void *dest, int len)
1887 {
1888 int rc;
1889 unsigned long val, change_mask;
1890 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
1891 int cpl = ctxt->ops->cpl(ctxt);
1892
1893 rc = emulate_pop(ctxt, &val, len);
1894 if (rc != X86EMUL_CONTINUE)
1895 return rc;
1896
1897 change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
1898 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
1899 X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
1900 X86_EFLAGS_AC | X86_EFLAGS_ID;
1901
1902 switch(ctxt->mode) {
1903 case X86EMUL_MODE_PROT64:
1904 case X86EMUL_MODE_PROT32:
1905 case X86EMUL_MODE_PROT16:
1906 if (cpl == 0)
1907 change_mask |= X86_EFLAGS_IOPL;
1908 if (cpl <= iopl)
1909 change_mask |= X86_EFLAGS_IF;
1910 break;
1911 case X86EMUL_MODE_VM86:
1912 if (iopl < 3)
1913 return emulate_gp(ctxt, 0);
1914 change_mask |= X86_EFLAGS_IF;
1915 break;
1916 default: /* real mode */
1917 change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
1918 break;
1919 }
1920
1921 *(unsigned long *)dest =
1922 (ctxt->eflags & ~change_mask) | (val & change_mask);
1923
1924 return rc;
1925 }
1926
em_popf(struct x86_emulate_ctxt * ctxt)1927 static int em_popf(struct x86_emulate_ctxt *ctxt)
1928 {
1929 ctxt->dst.type = OP_REG;
1930 ctxt->dst.addr.reg = &ctxt->eflags;
1931 ctxt->dst.bytes = ctxt->op_bytes;
1932 return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1933 }
1934
em_enter(struct x86_emulate_ctxt * ctxt)1935 static int em_enter(struct x86_emulate_ctxt *ctxt)
1936 {
1937 int rc;
1938 unsigned frame_size = ctxt->src.val;
1939 unsigned nesting_level = ctxt->src2.val & 31;
1940 ulong rbp;
1941
1942 if (nesting_level)
1943 return X86EMUL_UNHANDLEABLE;
1944
1945 rbp = reg_read(ctxt, VCPU_REGS_RBP);
1946 rc = push(ctxt, &rbp, stack_size(ctxt));
1947 if (rc != X86EMUL_CONTINUE)
1948 return rc;
1949 assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
1950 stack_mask(ctxt));
1951 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
1952 reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
1953 stack_mask(ctxt));
1954 return X86EMUL_CONTINUE;
1955 }
1956
em_leave(struct x86_emulate_ctxt * ctxt)1957 static int em_leave(struct x86_emulate_ctxt *ctxt)
1958 {
1959 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
1960 stack_mask(ctxt));
1961 return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
1962 }
1963
em_push_sreg(struct x86_emulate_ctxt * ctxt)1964 static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
1965 {
1966 int seg = ctxt->src2.val;
1967
1968 ctxt->src.val = get_segment_selector(ctxt, seg);
1969 if (ctxt->op_bytes == 4) {
1970 rsp_increment(ctxt, -2);
1971 ctxt->op_bytes = 2;
1972 }
1973
1974 return em_push(ctxt);
1975 }
1976
em_pop_sreg(struct x86_emulate_ctxt * ctxt)1977 static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1978 {
1979 int seg = ctxt->src2.val;
1980 unsigned long selector;
1981 int rc;
1982
1983 rc = emulate_pop(ctxt, &selector, 2);
1984 if (rc != X86EMUL_CONTINUE)
1985 return rc;
1986
1987 if (seg == VCPU_SREG_SS)
1988 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
1989 if (ctxt->op_bytes > 2)
1990 rsp_increment(ctxt, ctxt->op_bytes - 2);
1991
1992 rc = load_segment_descriptor(ctxt, (u16)selector, seg);
1993 return rc;
1994 }
1995
em_pusha(struct x86_emulate_ctxt * ctxt)1996 static int em_pusha(struct x86_emulate_ctxt *ctxt)
1997 {
1998 unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
1999 int rc = X86EMUL_CONTINUE;
2000 int reg = VCPU_REGS_RAX;
2001
2002 while (reg <= VCPU_REGS_RDI) {
2003 (reg == VCPU_REGS_RSP) ?
2004 (ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
2005
2006 rc = em_push(ctxt);
2007 if (rc != X86EMUL_CONTINUE)
2008 return rc;
2009
2010 ++reg;
2011 }
2012
2013 return rc;
2014 }
2015
em_pushf(struct x86_emulate_ctxt * ctxt)2016 static int em_pushf(struct x86_emulate_ctxt *ctxt)
2017 {
2018 ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
2019 return em_push(ctxt);
2020 }
2021
em_popa(struct x86_emulate_ctxt * ctxt)2022 static int em_popa(struct x86_emulate_ctxt *ctxt)
2023 {
2024 int rc = X86EMUL_CONTINUE;
2025 int reg = VCPU_REGS_RDI;
2026 u32 val;
2027
2028 while (reg >= VCPU_REGS_RAX) {
2029 if (reg == VCPU_REGS_RSP) {
2030 rsp_increment(ctxt, ctxt->op_bytes);
2031 --reg;
2032 }
2033
2034 rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
2035 if (rc != X86EMUL_CONTINUE)
2036 break;
2037 assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
2038 --reg;
2039 }
2040 return rc;
2041 }
2042
__emulate_int_real(struct x86_emulate_ctxt * ctxt,int irq)2043 static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2044 {
2045 const struct x86_emulate_ops *ops = ctxt->ops;
2046 int rc;
2047 struct desc_ptr dt;
2048 gva_t cs_addr;
2049 gva_t eip_addr;
2050 u16 cs, eip;
2051
2052 /* TODO: Add limit checks */
2053 ctxt->src.val = ctxt->eflags;
2054 rc = em_push(ctxt);
2055 if (rc != X86EMUL_CONTINUE)
2056 return rc;
2057
2058 ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
2059
2060 ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
2061 rc = em_push(ctxt);
2062 if (rc != X86EMUL_CONTINUE)
2063 return rc;
2064
2065 ctxt->src.val = ctxt->_eip;
2066 rc = em_push(ctxt);
2067 if (rc != X86EMUL_CONTINUE)
2068 return rc;
2069
2070 ops->get_idt(ctxt, &dt);
2071
2072 eip_addr = dt.address + (irq << 2);
2073 cs_addr = dt.address + (irq << 2) + 2;
2074
2075 rc = linear_read_system(ctxt, cs_addr, &cs, 2);
2076 if (rc != X86EMUL_CONTINUE)
2077 return rc;
2078
2079 rc = linear_read_system(ctxt, eip_addr, &eip, 2);
2080 if (rc != X86EMUL_CONTINUE)
2081 return rc;
2082
2083 rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
2084 if (rc != X86EMUL_CONTINUE)
2085 return rc;
2086
2087 ctxt->_eip = eip;
2088
2089 return rc;
2090 }
2091
emulate_int_real(struct x86_emulate_ctxt * ctxt,int irq)2092 int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2093 {
2094 int rc;
2095
2096 invalidate_registers(ctxt);
2097 rc = __emulate_int_real(ctxt, irq);
2098 if (rc == X86EMUL_CONTINUE)
2099 writeback_registers(ctxt);
2100 return rc;
2101 }
2102
emulate_int(struct x86_emulate_ctxt * ctxt,int irq)2103 static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
2104 {
2105 switch(ctxt->mode) {
2106 case X86EMUL_MODE_REAL:
2107 return __emulate_int_real(ctxt, irq);
2108 case X86EMUL_MODE_VM86:
2109 case X86EMUL_MODE_PROT16:
2110 case X86EMUL_MODE_PROT32:
2111 case X86EMUL_MODE_PROT64:
2112 default:
2113 /* Protected mode interrupts unimplemented yet */
2114 return X86EMUL_UNHANDLEABLE;
2115 }
2116 }
2117
emulate_iret_real(struct x86_emulate_ctxt * ctxt)2118 static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
2119 {
2120 int rc = X86EMUL_CONTINUE;
2121 unsigned long temp_eip = 0;
2122 unsigned long temp_eflags = 0;
2123 unsigned long cs = 0;
2124 unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
2125 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
2126 X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
2127 X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
2128 X86_EFLAGS_AC | X86_EFLAGS_ID |
2129 X86_EFLAGS_FIXED;
2130 unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
2131 X86_EFLAGS_VIP;
2132
2133 /* TODO: Add stack limit check */
2134
2135 rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
2136
2137 if (rc != X86EMUL_CONTINUE)
2138 return rc;
2139
2140 if (temp_eip & ~0xffff)
2141 return emulate_gp(ctxt, 0);
2142
2143 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2144
2145 if (rc != X86EMUL_CONTINUE)
2146 return rc;
2147
2148 rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
2149
2150 if (rc != X86EMUL_CONTINUE)
2151 return rc;
2152
2153 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2154
2155 if (rc != X86EMUL_CONTINUE)
2156 return rc;
2157
2158 ctxt->_eip = temp_eip;
2159
2160 if (ctxt->op_bytes == 4)
2161 ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
2162 else if (ctxt->op_bytes == 2) {
2163 ctxt->eflags &= ~0xffff;
2164 ctxt->eflags |= temp_eflags;
2165 }
2166
2167 ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
2168 ctxt->eflags |= X86_EFLAGS_FIXED;
2169 ctxt->ops->set_nmi_mask(ctxt, false);
2170
2171 return rc;
2172 }
2173
em_iret(struct x86_emulate_ctxt * ctxt)2174 static int em_iret(struct x86_emulate_ctxt *ctxt)
2175 {
2176 switch(ctxt->mode) {
2177 case X86EMUL_MODE_REAL:
2178 return emulate_iret_real(ctxt);
2179 case X86EMUL_MODE_VM86:
2180 case X86EMUL_MODE_PROT16:
2181 case X86EMUL_MODE_PROT32:
2182 case X86EMUL_MODE_PROT64:
2183 default:
2184 /* iret from protected mode unimplemented yet */
2185 return X86EMUL_UNHANDLEABLE;
2186 }
2187 }
2188
em_jmp_far(struct x86_emulate_ctxt * ctxt)2189 static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2190 {
2191 int rc;
2192 unsigned short sel;
2193 struct desc_struct new_desc;
2194 u8 cpl = ctxt->ops->cpl(ctxt);
2195
2196 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2197
2198 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
2199 X86_TRANSFER_CALL_JMP,
2200 &new_desc);
2201 if (rc != X86EMUL_CONTINUE)
2202 return rc;
2203
2204 rc = assign_eip_far(ctxt, ctxt->src.val);
2205 /* Error handling is not implemented. */
2206 if (rc != X86EMUL_CONTINUE)
2207 return X86EMUL_UNHANDLEABLE;
2208
2209 return rc;
2210 }
2211
em_jmp_abs(struct x86_emulate_ctxt * ctxt)2212 static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
2213 {
2214 return assign_eip_near(ctxt, ctxt->src.val);
2215 }
2216
em_call_near_abs(struct x86_emulate_ctxt * ctxt)2217 static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
2218 {
2219 int rc;
2220 long int old_eip;
2221
2222 old_eip = ctxt->_eip;
2223 rc = assign_eip_near(ctxt, ctxt->src.val);
2224 if (rc != X86EMUL_CONTINUE)
2225 return rc;
2226 ctxt->src.val = old_eip;
2227 rc = em_push(ctxt);
2228 return rc;
2229 }
2230
em_cmpxchg8b(struct x86_emulate_ctxt * ctxt)2231 static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2232 {
2233 u64 old = ctxt->dst.orig_val64;
2234
2235 if (ctxt->dst.bytes == 16)
2236 return X86EMUL_UNHANDLEABLE;
2237
2238 if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
2239 ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
2240 *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
2241 *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
2242 ctxt->eflags &= ~X86_EFLAGS_ZF;
2243 } else {
2244 ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
2245 (u32) reg_read(ctxt, VCPU_REGS_RBX);
2246
2247 ctxt->eflags |= X86_EFLAGS_ZF;
2248 }
2249 return X86EMUL_CONTINUE;
2250 }
2251
em_ret(struct x86_emulate_ctxt * ctxt)2252 static int em_ret(struct x86_emulate_ctxt *ctxt)
2253 {
2254 int rc;
2255 unsigned long eip;
2256
2257 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2258 if (rc != X86EMUL_CONTINUE)
2259 return rc;
2260
2261 return assign_eip_near(ctxt, eip);
2262 }
2263
em_ret_far(struct x86_emulate_ctxt * ctxt)2264 static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2265 {
2266 int rc;
2267 unsigned long eip, cs;
2268 int cpl = ctxt->ops->cpl(ctxt);
2269 struct desc_struct new_desc;
2270
2271 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2272 if (rc != X86EMUL_CONTINUE)
2273 return rc;
2274 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2275 if (rc != X86EMUL_CONTINUE)
2276 return rc;
2277 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
2278 X86_TRANSFER_RET,
2279 &new_desc);
2280 if (rc != X86EMUL_CONTINUE)
2281 return rc;
2282 rc = assign_eip_far(ctxt, eip);
2283 /* Error handling is not implemented. */
2284 if (rc != X86EMUL_CONTINUE)
2285 return X86EMUL_UNHANDLEABLE;
2286
2287 return rc;
2288 }
2289
em_ret_far_imm(struct x86_emulate_ctxt * ctxt)2290 static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
2291 {
2292 int rc;
2293
2294 rc = em_ret_far(ctxt);
2295 if (rc != X86EMUL_CONTINUE)
2296 return rc;
2297 rsp_increment(ctxt, ctxt->src.val);
2298 return X86EMUL_CONTINUE;
2299 }
2300
em_cmpxchg(struct x86_emulate_ctxt * ctxt)2301 static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2302 {
2303 /* Save real source value, then compare EAX against destination. */
2304 ctxt->dst.orig_val = ctxt->dst.val;
2305 ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
2306 ctxt->src.orig_val = ctxt->src.val;
2307 ctxt->src.val = ctxt->dst.orig_val;
2308 fastop(ctxt, em_cmp);
2309
2310 if (ctxt->eflags & X86_EFLAGS_ZF) {
2311 /* Success: write back to memory; no update of EAX */
2312 ctxt->src.type = OP_NONE;
2313 ctxt->dst.val = ctxt->src.orig_val;
2314 } else {
2315 /* Failure: write the value we saw to EAX. */
2316 ctxt->src.type = OP_REG;
2317 ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
2318 ctxt->src.val = ctxt->dst.orig_val;
2319 /* Create write-cycle to dest by writing the same value */
2320 ctxt->dst.val = ctxt->dst.orig_val;
2321 }
2322 return X86EMUL_CONTINUE;
2323 }
2324
em_lseg(struct x86_emulate_ctxt * ctxt)2325 static int em_lseg(struct x86_emulate_ctxt *ctxt)
2326 {
2327 int seg = ctxt->src2.val;
2328 unsigned short sel;
2329 int rc;
2330
2331 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2332
2333 rc = load_segment_descriptor(ctxt, sel, seg);
2334 if (rc != X86EMUL_CONTINUE)
2335 return rc;
2336
2337 ctxt->dst.val = ctxt->src.val;
2338 return rc;
2339 }
2340
emulator_has_longmode(struct x86_emulate_ctxt * ctxt)2341 static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
2342 {
2343 #ifdef CONFIG_X86_64
2344 return ctxt->ops->guest_has_long_mode(ctxt);
2345 #else
2346 return false;
2347 #endif
2348 }
2349
rsm_set_desc_flags(struct desc_struct * desc,u32 flags)2350 static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
2351 {
2352 desc->g = (flags >> 23) & 1;
2353 desc->d = (flags >> 22) & 1;
2354 desc->l = (flags >> 21) & 1;
2355 desc->avl = (flags >> 20) & 1;
2356 desc->p = (flags >> 15) & 1;
2357 desc->dpl = (flags >> 13) & 3;
2358 desc->s = (flags >> 12) & 1;
2359 desc->type = (flags >> 8) & 15;
2360 }
2361
rsm_load_seg_32(struct x86_emulate_ctxt * ctxt,const char * smstate,int n)2362 static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
2363 int n)
2364 {
2365 struct desc_struct desc;
2366 int offset;
2367 u16 selector;
2368
2369 selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
2370
2371 if (n < 3)
2372 offset = 0x7f84 + n * 12;
2373 else
2374 offset = 0x7f2c + (n - 3) * 12;
2375
2376 set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
2377 set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
2378 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
2379 ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
2380 return X86EMUL_CONTINUE;
2381 }
2382
2383 #ifdef CONFIG_X86_64
rsm_load_seg_64(struct x86_emulate_ctxt * ctxt,const char * smstate,int n)2384 static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
2385 int n)
2386 {
2387 struct desc_struct desc;
2388 int offset;
2389 u16 selector;
2390 u32 base3;
2391
2392 offset = 0x7e00 + n * 16;
2393
2394 selector = GET_SMSTATE(u16, smstate, offset);
2395 rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
2396 set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
2397 set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
2398 base3 = GET_SMSTATE(u32, smstate, offset + 12);
2399
2400 ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
2401 return X86EMUL_CONTINUE;
2402 }
2403 #endif
2404
rsm_enter_protected_mode(struct x86_emulate_ctxt * ctxt,u64 cr0,u64 cr3,u64 cr4)2405 static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
2406 u64 cr0, u64 cr3, u64 cr4)
2407 {
2408 int bad;
2409 u64 pcid;
2410
2411 /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
2412 pcid = 0;
2413 if (cr4 & X86_CR4_PCIDE) {
2414 pcid = cr3 & 0xfff;
2415 cr3 &= ~0xfff;
2416 }
2417
2418 bad = ctxt->ops->set_cr(ctxt, 3, cr3);
2419 if (bad)
2420 return X86EMUL_UNHANDLEABLE;
2421
2422 /*
2423 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
2424 * Then enable protected mode. However, PCID cannot be enabled
2425 * if EFER.LMA=0, so set it separately.
2426 */
2427 bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
2428 if (bad)
2429 return X86EMUL_UNHANDLEABLE;
2430
2431 bad = ctxt->ops->set_cr(ctxt, 0, cr0);
2432 if (bad)
2433 return X86EMUL_UNHANDLEABLE;
2434
2435 if (cr4 & X86_CR4_PCIDE) {
2436 bad = ctxt->ops->set_cr(ctxt, 4, cr4);
2437 if (bad)
2438 return X86EMUL_UNHANDLEABLE;
2439 if (pcid) {
2440 bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
2441 if (bad)
2442 return X86EMUL_UNHANDLEABLE;
2443 }
2444
2445 }
2446
2447 return X86EMUL_CONTINUE;
2448 }
2449
rsm_load_state_32(struct x86_emulate_ctxt * ctxt,const char * smstate)2450 static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
2451 const char *smstate)
2452 {
2453 struct desc_struct desc;
2454 struct desc_ptr dt;
2455 u16 selector;
2456 u32 val, cr0, cr3, cr4;
2457 int i;
2458
2459 cr0 = GET_SMSTATE(u32, smstate, 0x7ffc);
2460 cr3 = GET_SMSTATE(u32, smstate, 0x7ff8);
2461 ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
2462 ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0);
2463
2464 for (i = 0; i < 8; i++)
2465 *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
2466
2467 val = GET_SMSTATE(u32, smstate, 0x7fcc);
2468
2469 if (ctxt->ops->set_dr(ctxt, 6, val))
2470 return X86EMUL_UNHANDLEABLE;
2471
2472 val = GET_SMSTATE(u32, smstate, 0x7fc8);
2473
2474 if (ctxt->ops->set_dr(ctxt, 7, val))
2475 return X86EMUL_UNHANDLEABLE;
2476
2477 selector = GET_SMSTATE(u32, smstate, 0x7fc4);
2478 set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64));
2479 set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60));
2480 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c));
2481 ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
2482
2483 selector = GET_SMSTATE(u32, smstate, 0x7fc0);
2484 set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80));
2485 set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c));
2486 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78));
2487 ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
2488
2489 dt.address = GET_SMSTATE(u32, smstate, 0x7f74);
2490 dt.size = GET_SMSTATE(u32, smstate, 0x7f70);
2491 ctxt->ops->set_gdt(ctxt, &dt);
2492
2493 dt.address = GET_SMSTATE(u32, smstate, 0x7f58);
2494 dt.size = GET_SMSTATE(u32, smstate, 0x7f54);
2495 ctxt->ops->set_idt(ctxt, &dt);
2496
2497 for (i = 0; i < 6; i++) {
2498 int r = rsm_load_seg_32(ctxt, smstate, i);
2499 if (r != X86EMUL_CONTINUE)
2500 return r;
2501 }
2502
2503 cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
2504
2505 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
2506
2507 return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
2508 }
2509
2510 #ifdef CONFIG_X86_64
rsm_load_state_64(struct x86_emulate_ctxt * ctxt,const char * smstate)2511 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
2512 const char *smstate)
2513 {
2514 struct desc_struct desc;
2515 struct desc_ptr dt;
2516 u64 val, cr0, cr3, cr4;
2517 u32 base3;
2518 u16 selector;
2519 int i, r;
2520
2521 for (i = 0; i < 16; i++)
2522 *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
2523
2524 ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
2525 ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
2526
2527 val = GET_SMSTATE(u64, smstate, 0x7f68);
2528
2529 if (ctxt->ops->set_dr(ctxt, 6, val))
2530 return X86EMUL_UNHANDLEABLE;
2531
2532 val = GET_SMSTATE(u64, smstate, 0x7f60);
2533
2534 if (ctxt->ops->set_dr(ctxt, 7, val))
2535 return X86EMUL_UNHANDLEABLE;
2536
2537 cr0 = GET_SMSTATE(u64, smstate, 0x7f58);
2538 cr3 = GET_SMSTATE(u64, smstate, 0x7f50);
2539 cr4 = GET_SMSTATE(u64, smstate, 0x7f48);
2540 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
2541 val = GET_SMSTATE(u64, smstate, 0x7ed0);
2542
2543 if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
2544 return X86EMUL_UNHANDLEABLE;
2545
2546 selector = GET_SMSTATE(u32, smstate, 0x7e90);
2547 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8);
2548 set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94));
2549 set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98));
2550 base3 = GET_SMSTATE(u32, smstate, 0x7e9c);
2551 ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
2552
2553 dt.size = GET_SMSTATE(u32, smstate, 0x7e84);
2554 dt.address = GET_SMSTATE(u64, smstate, 0x7e88);
2555 ctxt->ops->set_idt(ctxt, &dt);
2556
2557 selector = GET_SMSTATE(u32, smstate, 0x7e70);
2558 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8);
2559 set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74));
2560 set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78));
2561 base3 = GET_SMSTATE(u32, smstate, 0x7e7c);
2562 ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
2563
2564 dt.size = GET_SMSTATE(u32, smstate, 0x7e64);
2565 dt.address = GET_SMSTATE(u64, smstate, 0x7e68);
2566 ctxt->ops->set_gdt(ctxt, &dt);
2567
2568 r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
2569 if (r != X86EMUL_CONTINUE)
2570 return r;
2571
2572 for (i = 0; i < 6; i++) {
2573 r = rsm_load_seg_64(ctxt, smstate, i);
2574 if (r != X86EMUL_CONTINUE)
2575 return r;
2576 }
2577
2578 return X86EMUL_CONTINUE;
2579 }
2580 #endif
2581
em_rsm(struct x86_emulate_ctxt * ctxt)2582 static int em_rsm(struct x86_emulate_ctxt *ctxt)
2583 {
2584 unsigned long cr0, cr4, efer;
2585 char buf[512];
2586 u64 smbase;
2587 int ret;
2588
2589 if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
2590 return emulate_ud(ctxt);
2591
2592 smbase = ctxt->ops->get_smbase(ctxt);
2593
2594 ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
2595 if (ret != X86EMUL_CONTINUE)
2596 return X86EMUL_UNHANDLEABLE;
2597
2598 if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
2599 ctxt->ops->set_nmi_mask(ctxt, false);
2600
2601 ctxt->ops->exiting_smm(ctxt);
2602
2603 /*
2604 * Get back to real mode, to prepare a safe state in which to load
2605 * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
2606 * supports long mode.
2607 */
2608 if (emulator_has_longmode(ctxt)) {
2609 struct desc_struct cs_desc;
2610
2611 /* Zero CR4.PCIDE before CR0.PG. */
2612 cr4 = ctxt->ops->get_cr(ctxt, 4);
2613 if (cr4 & X86_CR4_PCIDE)
2614 ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
2615
2616 /* A 32-bit code segment is required to clear EFER.LMA. */
2617 memset(&cs_desc, 0, sizeof(cs_desc));
2618 cs_desc.type = 0xb;
2619 cs_desc.s = cs_desc.g = cs_desc.p = 1;
2620 ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
2621 }
2622
2623 /* For the 64-bit case, this will clear EFER.LMA. */
2624 cr0 = ctxt->ops->get_cr(ctxt, 0);
2625 if (cr0 & X86_CR0_PE)
2626 ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
2627
2628 if (emulator_has_longmode(ctxt)) {
2629 /* Clear CR4.PAE before clearing EFER.LME. */
2630 cr4 = ctxt->ops->get_cr(ctxt, 4);
2631 if (cr4 & X86_CR4_PAE)
2632 ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
2633
2634 /* And finally go back to 32-bit mode. */
2635 efer = 0;
2636 ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
2637 }
2638
2639 /*
2640 * Give leave_smm() a chance to make ISA-specific changes to the vCPU
2641 * state (e.g. enter guest mode) before loading state from the SMM
2642 * state-save area.
2643 */
2644 if (ctxt->ops->leave_smm(ctxt, buf))
2645 goto emulate_shutdown;
2646
2647 #ifdef CONFIG_X86_64
2648 if (emulator_has_longmode(ctxt))
2649 ret = rsm_load_state_64(ctxt, buf);
2650 else
2651 #endif
2652 ret = rsm_load_state_32(ctxt, buf);
2653
2654 if (ret != X86EMUL_CONTINUE)
2655 goto emulate_shutdown;
2656
2657 /*
2658 * Note, the ctxt->ops callbacks are responsible for handling side
2659 * effects when writing MSRs and CRs, e.g. MMU context resets, CPUID
2660 * runtime updates, etc... If that changes, e.g. this flow is moved
2661 * out of the emulator to make it look more like enter_smm(), then
2662 * those side effects need to be explicitly handled for both success
2663 * and shutdown.
2664 */
2665 return emulator_recalc_and_set_mode(ctxt);
2666
2667 emulate_shutdown:
2668 ctxt->ops->triple_fault(ctxt);
2669 return X86EMUL_CONTINUE;
2670 }
2671
2672 static void
setup_syscalls_segments(struct desc_struct * cs,struct desc_struct * ss)2673 setup_syscalls_segments(struct desc_struct *cs, struct desc_struct *ss)
2674 {
2675 cs->l = 0; /* will be adjusted later */
2676 set_desc_base(cs, 0); /* flat segment */
2677 cs->g = 1; /* 4kb granularity */
2678 set_desc_limit(cs, 0xfffff); /* 4GB limit */
2679 cs->type = 0x0b; /* Read, Execute, Accessed */
2680 cs->s = 1;
2681 cs->dpl = 0; /* will be adjusted later */
2682 cs->p = 1;
2683 cs->d = 1;
2684 cs->avl = 0;
2685
2686 set_desc_base(ss, 0); /* flat segment */
2687 set_desc_limit(ss, 0xfffff); /* 4GB limit */
2688 ss->g = 1; /* 4kb granularity */
2689 ss->s = 1;
2690 ss->type = 0x03; /* Read/Write, Accessed */
2691 ss->d = 1; /* 32bit stack segment */
2692 ss->dpl = 0;
2693 ss->p = 1;
2694 ss->l = 0;
2695 ss->avl = 0;
2696 }
2697
vendor_intel(struct x86_emulate_ctxt * ctxt)2698 static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
2699 {
2700 u32 eax, ebx, ecx, edx;
2701
2702 eax = ecx = 0;
2703 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
2704 return is_guest_vendor_intel(ebx, ecx, edx);
2705 }
2706
em_syscall_is_enabled(struct x86_emulate_ctxt * ctxt)2707 static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
2708 {
2709 const struct x86_emulate_ops *ops = ctxt->ops;
2710 u32 eax, ebx, ecx, edx;
2711
2712 /*
2713 * syscall should always be enabled in longmode - so only become
2714 * vendor specific (cpuid) if other modes are active...
2715 */
2716 if (ctxt->mode == X86EMUL_MODE_PROT64)
2717 return true;
2718
2719 eax = 0x00000000;
2720 ecx = 0x00000000;
2721 ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
2722 /*
2723 * remark: Intel CPUs only support "syscall" in 64bit longmode. Also a
2724 * 64bit guest with a 32bit compat-app running will #UD !! While this
2725 * behaviour can be fixed (by emulating) into AMD response - CPUs of
2726 * AMD can't behave like Intel.
2727 */
2728 if (is_guest_vendor_intel(ebx, ecx, edx))
2729 return false;
2730
2731 if (is_guest_vendor_amd(ebx, ecx, edx) ||
2732 is_guest_vendor_hygon(ebx, ecx, edx))
2733 return true;
2734
2735 /*
2736 * default: (not Intel, not AMD, not Hygon), apply Intel's
2737 * stricter rules...
2738 */
2739 return false;
2740 }
2741
em_syscall(struct x86_emulate_ctxt * ctxt)2742 static int em_syscall(struct x86_emulate_ctxt *ctxt)
2743 {
2744 const struct x86_emulate_ops *ops = ctxt->ops;
2745 struct desc_struct cs, ss;
2746 u64 msr_data;
2747 u16 cs_sel, ss_sel;
2748 u64 efer = 0;
2749
2750 /* syscall is not available in real mode */
2751 if (ctxt->mode == X86EMUL_MODE_REAL ||
2752 ctxt->mode == X86EMUL_MODE_VM86)
2753 return emulate_ud(ctxt);
2754
2755 if (!(em_syscall_is_enabled(ctxt)))
2756 return emulate_ud(ctxt);
2757
2758 ops->get_msr(ctxt, MSR_EFER, &efer);
2759 if (!(efer & EFER_SCE))
2760 return emulate_ud(ctxt);
2761
2762 setup_syscalls_segments(&cs, &ss);
2763 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2764 msr_data >>= 32;
2765 cs_sel = (u16)(msr_data & 0xfffc);
2766 ss_sel = (u16)(msr_data + 8);
2767
2768 if (efer & EFER_LMA) {
2769 cs.d = 0;
2770 cs.l = 1;
2771 }
2772 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2773 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2774
2775 *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2776 if (efer & EFER_LMA) {
2777 #ifdef CONFIG_X86_64
2778 *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
2779
2780 ops->get_msr(ctxt,
2781 ctxt->mode == X86EMUL_MODE_PROT64 ?
2782 MSR_LSTAR : MSR_CSTAR, &msr_data);
2783 ctxt->_eip = msr_data;
2784
2785 ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2786 ctxt->eflags &= ~msr_data;
2787 ctxt->eflags |= X86_EFLAGS_FIXED;
2788 #endif
2789 } else {
2790 /* legacy mode */
2791 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2792 ctxt->_eip = (u32)msr_data;
2793
2794 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2795 }
2796
2797 ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
2798 return X86EMUL_CONTINUE;
2799 }
2800
em_sysenter(struct x86_emulate_ctxt * ctxt)2801 static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2802 {
2803 const struct x86_emulate_ops *ops = ctxt->ops;
2804 struct desc_struct cs, ss;
2805 u64 msr_data;
2806 u16 cs_sel, ss_sel;
2807 u64 efer = 0;
2808
2809 ops->get_msr(ctxt, MSR_EFER, &efer);
2810 /* inject #GP if in real mode */
2811 if (ctxt->mode == X86EMUL_MODE_REAL)
2812 return emulate_gp(ctxt, 0);
2813
2814 /*
2815 * Not recognized on AMD in compat mode (but is recognized in legacy
2816 * mode).
2817 */
2818 if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
2819 && !vendor_intel(ctxt))
2820 return emulate_ud(ctxt);
2821
2822 /* sysenter/sysexit have not been tested in 64bit mode. */
2823 if (ctxt->mode == X86EMUL_MODE_PROT64)
2824 return X86EMUL_UNHANDLEABLE;
2825
2826 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2827 if ((msr_data & 0xfffc) == 0x0)
2828 return emulate_gp(ctxt, 0);
2829
2830 setup_syscalls_segments(&cs, &ss);
2831 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2832 cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
2833 ss_sel = cs_sel + 8;
2834 if (efer & EFER_LMA) {
2835 cs.d = 0;
2836 cs.l = 1;
2837 }
2838
2839 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2840 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2841
2842 ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
2843 ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
2844
2845 ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
2846 *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
2847 (u32)msr_data;
2848 if (efer & EFER_LMA)
2849 ctxt->mode = X86EMUL_MODE_PROT64;
2850
2851 return X86EMUL_CONTINUE;
2852 }
2853
em_sysexit(struct x86_emulate_ctxt * ctxt)2854 static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2855 {
2856 const struct x86_emulate_ops *ops = ctxt->ops;
2857 struct desc_struct cs, ss;
2858 u64 msr_data, rcx, rdx;
2859 int usermode;
2860 u16 cs_sel = 0, ss_sel = 0;
2861
2862 /* inject #GP if in real mode or Virtual 8086 mode */
2863 if (ctxt->mode == X86EMUL_MODE_REAL ||
2864 ctxt->mode == X86EMUL_MODE_VM86)
2865 return emulate_gp(ctxt, 0);
2866
2867 setup_syscalls_segments(&cs, &ss);
2868
2869 if ((ctxt->rex_prefix & 0x8) != 0x0)
2870 usermode = X86EMUL_MODE_PROT64;
2871 else
2872 usermode = X86EMUL_MODE_PROT32;
2873
2874 rcx = reg_read(ctxt, VCPU_REGS_RCX);
2875 rdx = reg_read(ctxt, VCPU_REGS_RDX);
2876
2877 cs.dpl = 3;
2878 ss.dpl = 3;
2879 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2880 switch (usermode) {
2881 case X86EMUL_MODE_PROT32:
2882 cs_sel = (u16)(msr_data + 16);
2883 if ((msr_data & 0xfffc) == 0x0)
2884 return emulate_gp(ctxt, 0);
2885 ss_sel = (u16)(msr_data + 24);
2886 rcx = (u32)rcx;
2887 rdx = (u32)rdx;
2888 break;
2889 case X86EMUL_MODE_PROT64:
2890 cs_sel = (u16)(msr_data + 32);
2891 if (msr_data == 0x0)
2892 return emulate_gp(ctxt, 0);
2893 ss_sel = cs_sel + 8;
2894 cs.d = 0;
2895 cs.l = 1;
2896 if (emul_is_noncanonical_address(rcx, ctxt) ||
2897 emul_is_noncanonical_address(rdx, ctxt))
2898 return emulate_gp(ctxt, 0);
2899 break;
2900 }
2901 cs_sel |= SEGMENT_RPL_MASK;
2902 ss_sel |= SEGMENT_RPL_MASK;
2903
2904 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2905 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2906
2907 ctxt->_eip = rdx;
2908 ctxt->mode = usermode;
2909 *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2910
2911 return X86EMUL_CONTINUE;
2912 }
2913
emulator_bad_iopl(struct x86_emulate_ctxt * ctxt)2914 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
2915 {
2916 int iopl;
2917 if (ctxt->mode == X86EMUL_MODE_REAL)
2918 return false;
2919 if (ctxt->mode == X86EMUL_MODE_VM86)
2920 return true;
2921 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
2922 return ctxt->ops->cpl(ctxt) > iopl;
2923 }
2924
2925 #define VMWARE_PORT_VMPORT (0x5658)
2926 #define VMWARE_PORT_VMRPC (0x5659)
2927
emulator_io_port_access_allowed(struct x86_emulate_ctxt * ctxt,u16 port,u16 len)2928 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2929 u16 port, u16 len)
2930 {
2931 const struct x86_emulate_ops *ops = ctxt->ops;
2932 struct desc_struct tr_seg;
2933 u32 base3;
2934 int r;
2935 u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
2936 unsigned mask = (1 << len) - 1;
2937 unsigned long base;
2938
2939 /*
2940 * VMware allows access to these ports even if denied
2941 * by TSS I/O permission bitmap. Mimic behavior.
2942 */
2943 if (enable_vmware_backdoor &&
2944 ((port == VMWARE_PORT_VMPORT) || (port == VMWARE_PORT_VMRPC)))
2945 return true;
2946
2947 ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
2948 if (!tr_seg.p)
2949 return false;
2950 if (desc_limit_scaled(&tr_seg) < 103)
2951 return false;
2952 base = get_desc_base(&tr_seg);
2953 #ifdef CONFIG_X86_64
2954 base |= ((u64)base3) << 32;
2955 #endif
2956 r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL, true);
2957 if (r != X86EMUL_CONTINUE)
2958 return false;
2959 if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
2960 return false;
2961 r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL, true);
2962 if (r != X86EMUL_CONTINUE)
2963 return false;
2964 if ((perm >> bit_idx) & mask)
2965 return false;
2966 return true;
2967 }
2968
emulator_io_permited(struct x86_emulate_ctxt * ctxt,u16 port,u16 len)2969 static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
2970 u16 port, u16 len)
2971 {
2972 if (ctxt->perm_ok)
2973 return true;
2974
2975 if (emulator_bad_iopl(ctxt))
2976 if (!emulator_io_port_access_allowed(ctxt, port, len))
2977 return false;
2978
2979 ctxt->perm_ok = true;
2980
2981 return true;
2982 }
2983
string_registers_quirk(struct x86_emulate_ctxt * ctxt)2984 static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
2985 {
2986 /*
2987 * Intel CPUs mask the counter and pointers in quite strange
2988 * manner when ECX is zero due to REP-string optimizations.
2989 */
2990 #ifdef CONFIG_X86_64
2991 if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt))
2992 return;
2993
2994 *reg_write(ctxt, VCPU_REGS_RCX) = 0;
2995
2996 switch (ctxt->b) {
2997 case 0xa4: /* movsb */
2998 case 0xa5: /* movsd/w */
2999 *reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
3000 fallthrough;
3001 case 0xaa: /* stosb */
3002 case 0xab: /* stosd/w */
3003 *reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
3004 }
3005 #endif
3006 }
3007
save_state_to_tss16(struct x86_emulate_ctxt * ctxt,struct tss_segment_16 * tss)3008 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
3009 struct tss_segment_16 *tss)
3010 {
3011 tss->ip = ctxt->_eip;
3012 tss->flag = ctxt->eflags;
3013 tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
3014 tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
3015 tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
3016 tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
3017 tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
3018 tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
3019 tss->si = reg_read(ctxt, VCPU_REGS_RSI);
3020 tss->di = reg_read(ctxt, VCPU_REGS_RDI);
3021
3022 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
3023 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
3024 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
3025 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
3026 tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
3027 }
3028
load_state_from_tss16(struct x86_emulate_ctxt * ctxt,struct tss_segment_16 * tss)3029 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
3030 struct tss_segment_16 *tss)
3031 {
3032 int ret;
3033 u8 cpl;
3034
3035 ctxt->_eip = tss->ip;
3036 ctxt->eflags = tss->flag | 2;
3037 *reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
3038 *reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
3039 *reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
3040 *reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
3041 *reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
3042 *reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
3043 *reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
3044 *reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
3045
3046 /*
3047 * SDM says that segment selectors are loaded before segment
3048 * descriptors
3049 */
3050 set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
3051 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
3052 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
3053 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
3054 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
3055
3056 cpl = tss->cs & 3;
3057
3058 /*
3059 * Now load segment descriptors. If fault happens at this stage
3060 * it is handled in a context of new task
3061 */
3062 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
3063 X86_TRANSFER_TASK_SWITCH, NULL);
3064 if (ret != X86EMUL_CONTINUE)
3065 return ret;
3066 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
3067 X86_TRANSFER_TASK_SWITCH, NULL);
3068 if (ret != X86EMUL_CONTINUE)
3069 return ret;
3070 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
3071 X86_TRANSFER_TASK_SWITCH, NULL);
3072 if (ret != X86EMUL_CONTINUE)
3073 return ret;
3074 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
3075 X86_TRANSFER_TASK_SWITCH, NULL);
3076 if (ret != X86EMUL_CONTINUE)
3077 return ret;
3078 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
3079 X86_TRANSFER_TASK_SWITCH, NULL);
3080 if (ret != X86EMUL_CONTINUE)
3081 return ret;
3082
3083 return X86EMUL_CONTINUE;
3084 }
3085
task_switch_16(struct x86_emulate_ctxt * ctxt,u16 old_tss_sel,ulong old_tss_base,struct desc_struct * new_desc)3086 static int task_switch_16(struct x86_emulate_ctxt *ctxt, u16 old_tss_sel,
3087 ulong old_tss_base, struct desc_struct *new_desc)
3088 {
3089 struct tss_segment_16 tss_seg;
3090 int ret;
3091 u32 new_tss_base = get_desc_base(new_desc);
3092
3093 ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
3094 if (ret != X86EMUL_CONTINUE)
3095 return ret;
3096
3097 save_state_to_tss16(ctxt, &tss_seg);
3098
3099 ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
3100 if (ret != X86EMUL_CONTINUE)
3101 return ret;
3102
3103 ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
3104 if (ret != X86EMUL_CONTINUE)
3105 return ret;
3106
3107 if (old_tss_sel != 0xffff) {
3108 tss_seg.prev_task_link = old_tss_sel;
3109
3110 ret = linear_write_system(ctxt, new_tss_base,
3111 &tss_seg.prev_task_link,
3112 sizeof(tss_seg.prev_task_link));
3113 if (ret != X86EMUL_CONTINUE)
3114 return ret;
3115 }
3116
3117 return load_state_from_tss16(ctxt, &tss_seg);
3118 }
3119
save_state_to_tss32(struct x86_emulate_ctxt * ctxt,struct tss_segment_32 * tss)3120 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
3121 struct tss_segment_32 *tss)
3122 {
3123 /* CR3 and ldt selector are not saved intentionally */
3124 tss->eip = ctxt->_eip;
3125 tss->eflags = ctxt->eflags;
3126 tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
3127 tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
3128 tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
3129 tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
3130 tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
3131 tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
3132 tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
3133 tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
3134
3135 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
3136 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
3137 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
3138 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
3139 tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
3140 tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
3141 }
3142
load_state_from_tss32(struct x86_emulate_ctxt * ctxt,struct tss_segment_32 * tss)3143 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
3144 struct tss_segment_32 *tss)
3145 {
3146 int ret;
3147 u8 cpl;
3148
3149 if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
3150 return emulate_gp(ctxt, 0);
3151 ctxt->_eip = tss->eip;
3152 ctxt->eflags = tss->eflags | 2;
3153
3154 /* General purpose registers */
3155 *reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
3156 *reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
3157 *reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
3158 *reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
3159 *reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
3160 *reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
3161 *reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
3162 *reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
3163
3164 /*
3165 * SDM says that segment selectors are loaded before segment
3166 * descriptors. This is important because CPL checks will
3167 * use CS.RPL.
3168 */
3169 set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
3170 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
3171 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
3172 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
3173 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
3174 set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
3175 set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
3176
3177 /*
3178 * If we're switching between Protected Mode and VM86, we need to make
3179 * sure to update the mode before loading the segment descriptors so
3180 * that the selectors are interpreted correctly.
3181 */
3182 if (ctxt->eflags & X86_EFLAGS_VM) {
3183 ctxt->mode = X86EMUL_MODE_VM86;
3184 cpl = 3;
3185 } else {
3186 ctxt->mode = X86EMUL_MODE_PROT32;
3187 cpl = tss->cs & 3;
3188 }
3189
3190 /*
3191 * Now load segment descriptors. If fault happens at this stage
3192 * it is handled in a context of new task
3193 */
3194 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
3195 cpl, X86_TRANSFER_TASK_SWITCH, NULL);
3196 if (ret != X86EMUL_CONTINUE)
3197 return ret;
3198 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
3199 X86_TRANSFER_TASK_SWITCH, NULL);
3200 if (ret != X86EMUL_CONTINUE)
3201 return ret;
3202 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
3203 X86_TRANSFER_TASK_SWITCH, NULL);
3204 if (ret != X86EMUL_CONTINUE)
3205 return ret;
3206 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
3207 X86_TRANSFER_TASK_SWITCH, NULL);
3208 if (ret != X86EMUL_CONTINUE)
3209 return ret;
3210 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
3211 X86_TRANSFER_TASK_SWITCH, NULL);
3212 if (ret != X86EMUL_CONTINUE)
3213 return ret;
3214 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
3215 X86_TRANSFER_TASK_SWITCH, NULL);
3216 if (ret != X86EMUL_CONTINUE)
3217 return ret;
3218 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
3219 X86_TRANSFER_TASK_SWITCH, NULL);
3220
3221 return ret;
3222 }
3223
task_switch_32(struct x86_emulate_ctxt * ctxt,u16 old_tss_sel,ulong old_tss_base,struct desc_struct * new_desc)3224 static int task_switch_32(struct x86_emulate_ctxt *ctxt, u16 old_tss_sel,
3225 ulong old_tss_base, struct desc_struct *new_desc)
3226 {
3227 struct tss_segment_32 tss_seg;
3228 int ret;
3229 u32 new_tss_base = get_desc_base(new_desc);
3230 u32 eip_offset = offsetof(struct tss_segment_32, eip);
3231 u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
3232
3233 ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
3234 if (ret != X86EMUL_CONTINUE)
3235 return ret;
3236
3237 save_state_to_tss32(ctxt, &tss_seg);
3238
3239 /* Only GP registers and segment selectors are saved */
3240 ret = linear_write_system(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
3241 ldt_sel_offset - eip_offset);
3242 if (ret != X86EMUL_CONTINUE)
3243 return ret;
3244
3245 ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
3246 if (ret != X86EMUL_CONTINUE)
3247 return ret;
3248
3249 if (old_tss_sel != 0xffff) {
3250 tss_seg.prev_task_link = old_tss_sel;
3251
3252 ret = linear_write_system(ctxt, new_tss_base,
3253 &tss_seg.prev_task_link,
3254 sizeof(tss_seg.prev_task_link));
3255 if (ret != X86EMUL_CONTINUE)
3256 return ret;
3257 }
3258
3259 return load_state_from_tss32(ctxt, &tss_seg);
3260 }
3261
emulator_do_task_switch(struct x86_emulate_ctxt * ctxt,u16 tss_selector,int idt_index,int reason,bool has_error_code,u32 error_code)3262 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
3263 u16 tss_selector, int idt_index, int reason,
3264 bool has_error_code, u32 error_code)
3265 {
3266 const struct x86_emulate_ops *ops = ctxt->ops;
3267 struct desc_struct curr_tss_desc, next_tss_desc;
3268 int ret;
3269 u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
3270 ulong old_tss_base =
3271 ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
3272 u32 desc_limit;
3273 ulong desc_addr, dr7;
3274
3275 /* FIXME: old_tss_base == ~0 ? */
3276
3277 ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr);
3278 if (ret != X86EMUL_CONTINUE)
3279 return ret;
3280 ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr);
3281 if (ret != X86EMUL_CONTINUE)
3282 return ret;
3283
3284 /* FIXME: check that next_tss_desc is tss */
3285
3286 /*
3287 * Check privileges. The three cases are task switch caused by...
3288 *
3289 * 1. jmp/call/int to task gate: Check against DPL of the task gate
3290 * 2. Exception/IRQ/iret: No check is performed
3291 * 3. jmp/call to TSS/task-gate: No check is performed since the
3292 * hardware checks it before exiting.
3293 */
3294 if (reason == TASK_SWITCH_GATE) {
3295 if (idt_index != -1) {
3296 /* Software interrupts */
3297 struct desc_struct task_gate_desc;
3298 int dpl;
3299
3300 ret = read_interrupt_descriptor(ctxt, idt_index,
3301 &task_gate_desc);
3302 if (ret != X86EMUL_CONTINUE)
3303 return ret;
3304
3305 dpl = task_gate_desc.dpl;
3306 if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
3307 return emulate_gp(ctxt, (idt_index << 3) | 0x2);
3308 }
3309 }
3310
3311 desc_limit = desc_limit_scaled(&next_tss_desc);
3312 if (!next_tss_desc.p ||
3313 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
3314 desc_limit < 0x2b)) {
3315 return emulate_ts(ctxt, tss_selector & 0xfffc);
3316 }
3317
3318 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
3319 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
3320 write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
3321 }
3322
3323 if (reason == TASK_SWITCH_IRET)
3324 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
3325
3326 /* set back link to prev task only if NT bit is set in eflags
3327 note that old_tss_sel is not used after this point */
3328 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
3329 old_tss_sel = 0xffff;
3330
3331 if (next_tss_desc.type & 8)
3332 ret = task_switch_32(ctxt, old_tss_sel, old_tss_base, &next_tss_desc);
3333 else
3334 ret = task_switch_16(ctxt, old_tss_sel,
3335 old_tss_base, &next_tss_desc);
3336 if (ret != X86EMUL_CONTINUE)
3337 return ret;
3338
3339 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
3340 ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
3341
3342 if (reason != TASK_SWITCH_IRET) {
3343 next_tss_desc.type |= (1 << 1); /* set busy flag */
3344 write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
3345 }
3346
3347 ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS);
3348 ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
3349
3350 if (has_error_code) {
3351 ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
3352 ctxt->lock_prefix = 0;
3353 ctxt->src.val = (unsigned long) error_code;
3354 ret = em_push(ctxt);
3355 }
3356
3357 ops->get_dr(ctxt, 7, &dr7);
3358 ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));
3359
3360 return ret;
3361 }
3362
emulator_task_switch(struct x86_emulate_ctxt * ctxt,u16 tss_selector,int idt_index,int reason,bool has_error_code,u32 error_code)3363 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
3364 u16 tss_selector, int idt_index, int reason,
3365 bool has_error_code, u32 error_code)
3366 {
3367 int rc;
3368
3369 invalidate_registers(ctxt);
3370 ctxt->_eip = ctxt->eip;
3371 ctxt->dst.type = OP_NONE;
3372
3373 rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
3374 has_error_code, error_code);
3375
3376 if (rc == X86EMUL_CONTINUE) {
3377 ctxt->eip = ctxt->_eip;
3378 writeback_registers(ctxt);
3379 }
3380
3381 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3382 }
3383
string_addr_inc(struct x86_emulate_ctxt * ctxt,int reg,struct operand * op)3384 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
3385 struct operand *op)
3386 {
3387 int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
3388
3389 register_address_increment(ctxt, reg, df * op->bytes);
3390 op->addr.mem.ea = register_address(ctxt, reg);
3391 }
3392
em_das(struct x86_emulate_ctxt * ctxt)3393 static int em_das(struct x86_emulate_ctxt *ctxt)
3394 {
3395 u8 al, old_al;
3396 bool af, cf, old_cf;
3397
3398 cf = ctxt->eflags & X86_EFLAGS_CF;
3399 al = ctxt->dst.val;
3400
3401 old_al = al;
3402 old_cf = cf;
3403 cf = false;
3404 af = ctxt->eflags & X86_EFLAGS_AF;
3405 if ((al & 0x0f) > 9 || af) {
3406 al -= 6;
3407 cf = old_cf | (al >= 250);
3408 af = true;
3409 } else {
3410 af = false;
3411 }
3412 if (old_al > 0x99 || old_cf) {
3413 al -= 0x60;
3414 cf = true;
3415 }
3416
3417 ctxt->dst.val = al;
3418 /* Set PF, ZF, SF */
3419 ctxt->src.type = OP_IMM;
3420 ctxt->src.val = 0;
3421 ctxt->src.bytes = 1;
3422 fastop(ctxt, em_or);
3423 ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
3424 if (cf)
3425 ctxt->eflags |= X86_EFLAGS_CF;
3426 if (af)
3427 ctxt->eflags |= X86_EFLAGS_AF;
3428 return X86EMUL_CONTINUE;
3429 }
3430
em_aam(struct x86_emulate_ctxt * ctxt)3431 static int em_aam(struct x86_emulate_ctxt *ctxt)
3432 {
3433 u8 al, ah;
3434
3435 if (ctxt->src.val == 0)
3436 return emulate_de(ctxt);
3437
3438 al = ctxt->dst.val & 0xff;
3439 ah = al / ctxt->src.val;
3440 al %= ctxt->src.val;
3441
3442 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);
3443
3444 /* Set PF, ZF, SF */
3445 ctxt->src.type = OP_IMM;
3446 ctxt->src.val = 0;
3447 ctxt->src.bytes = 1;
3448 fastop(ctxt, em_or);
3449
3450 return X86EMUL_CONTINUE;
3451 }
3452
em_aad(struct x86_emulate_ctxt * ctxt)3453 static int em_aad(struct x86_emulate_ctxt *ctxt)
3454 {
3455 u8 al = ctxt->dst.val & 0xff;
3456 u8 ah = (ctxt->dst.val >> 8) & 0xff;
3457
3458 al = (al + (ah * ctxt->src.val)) & 0xff;
3459
3460 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
3461
3462 /* Set PF, ZF, SF */
3463 ctxt->src.type = OP_IMM;
3464 ctxt->src.val = 0;
3465 ctxt->src.bytes = 1;
3466 fastop(ctxt, em_or);
3467
3468 return X86EMUL_CONTINUE;
3469 }
3470
em_call(struct x86_emulate_ctxt * ctxt)3471 static int em_call(struct x86_emulate_ctxt *ctxt)
3472 {
3473 int rc;
3474 long rel = ctxt->src.val;
3475
3476 ctxt->src.val = (unsigned long)ctxt->_eip;
3477 rc = jmp_rel(ctxt, rel);
3478 if (rc != X86EMUL_CONTINUE)
3479 return rc;
3480 return em_push(ctxt);
3481 }
3482
em_call_far(struct x86_emulate_ctxt * ctxt)3483 static int em_call_far(struct x86_emulate_ctxt *ctxt)
3484 {
3485 u16 sel, old_cs;
3486 ulong old_eip;
3487 int rc;
3488 struct desc_struct old_desc, new_desc;
3489 const struct x86_emulate_ops *ops = ctxt->ops;
3490 int cpl = ctxt->ops->cpl(ctxt);
3491 enum x86emul_mode prev_mode = ctxt->mode;
3492
3493 old_eip = ctxt->_eip;
3494 ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
3495
3496 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
3497 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
3498 X86_TRANSFER_CALL_JMP, &new_desc);
3499 if (rc != X86EMUL_CONTINUE)
3500 return rc;
3501
3502 rc = assign_eip_far(ctxt, ctxt->src.val);
3503 if (rc != X86EMUL_CONTINUE)
3504 goto fail;
3505
3506 ctxt->src.val = old_cs;
3507 rc = em_push(ctxt);
3508 if (rc != X86EMUL_CONTINUE)
3509 goto fail;
3510
3511 ctxt->src.val = old_eip;
3512 rc = em_push(ctxt);
3513 /* If we failed, we tainted the memory, but the very least we should
3514 restore cs */
3515 if (rc != X86EMUL_CONTINUE) {
3516 pr_warn_once("faulting far call emulation tainted memory\n");
3517 goto fail;
3518 }
3519 return rc;
3520 fail:
3521 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3522 ctxt->mode = prev_mode;
3523 return rc;
3524
3525 }
3526
em_ret_near_imm(struct x86_emulate_ctxt * ctxt)3527 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
3528 {
3529 int rc;
3530 unsigned long eip;
3531
3532 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
3533 if (rc != X86EMUL_CONTINUE)
3534 return rc;
3535 rc = assign_eip_near(ctxt, eip);
3536 if (rc != X86EMUL_CONTINUE)
3537 return rc;
3538 rsp_increment(ctxt, ctxt->src.val);
3539 return X86EMUL_CONTINUE;
3540 }
3541
em_xchg(struct x86_emulate_ctxt * ctxt)3542 static int em_xchg(struct x86_emulate_ctxt *ctxt)
3543 {
3544 /* Write back the register source. */
3545 ctxt->src.val = ctxt->dst.val;
3546 write_register_operand(&ctxt->src);
3547
3548 /* Write back the memory destination with implicit LOCK prefix. */
3549 ctxt->dst.val = ctxt->src.orig_val;
3550 ctxt->lock_prefix = 1;
3551 return X86EMUL_CONTINUE;
3552 }
3553
em_imul_3op(struct x86_emulate_ctxt * ctxt)3554 static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
3555 {
3556 ctxt->dst.val = ctxt->src2.val;
3557 return fastop(ctxt, em_imul);
3558 }
3559
em_cwd(struct x86_emulate_ctxt * ctxt)3560 static int em_cwd(struct x86_emulate_ctxt *ctxt)
3561 {
3562 ctxt->dst.type = OP_REG;
3563 ctxt->dst.bytes = ctxt->src.bytes;
3564 ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
3565 ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
3566
3567 return X86EMUL_CONTINUE;
3568 }
3569
em_rdpid(struct x86_emulate_ctxt * ctxt)3570 static int em_rdpid(struct x86_emulate_ctxt *ctxt)
3571 {
3572 u64 tsc_aux = 0;
3573
3574 if (!ctxt->ops->guest_has_rdpid(ctxt))
3575 return emulate_ud(ctxt);
3576
3577 ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux);
3578 ctxt->dst.val = tsc_aux;
3579 return X86EMUL_CONTINUE;
3580 }
3581
em_rdtsc(struct x86_emulate_ctxt * ctxt)3582 static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
3583 {
3584 u64 tsc = 0;
3585
3586 ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
3587 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
3588 *reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
3589 return X86EMUL_CONTINUE;
3590 }
3591
em_rdpmc(struct x86_emulate_ctxt * ctxt)3592 static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
3593 {
3594 u64 pmc;
3595
3596 if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
3597 return emulate_gp(ctxt, 0);
3598 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
3599 *reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
3600 return X86EMUL_CONTINUE;
3601 }
3602
em_mov(struct x86_emulate_ctxt * ctxt)3603 static int em_mov(struct x86_emulate_ctxt *ctxt)
3604 {
3605 memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
3606 return X86EMUL_CONTINUE;
3607 }
3608
em_movbe(struct x86_emulate_ctxt * ctxt)3609 static int em_movbe(struct x86_emulate_ctxt *ctxt)
3610 {
3611 u16 tmp;
3612
3613 if (!ctxt->ops->guest_has_movbe(ctxt))
3614 return emulate_ud(ctxt);
3615
3616 switch (ctxt->op_bytes) {
3617 case 2:
3618 /*
3619 * From MOVBE definition: "...When the operand size is 16 bits,
3620 * the upper word of the destination register remains unchanged
3621 * ..."
3622 *
3623 * Both casting ->valptr and ->val to u16 breaks strict aliasing
3624 * rules so we have to do the operation almost per hand.
3625 */
3626 tmp = (u16)ctxt->src.val;
3627 ctxt->dst.val &= ~0xffffUL;
3628 ctxt->dst.val |= (unsigned long)swab16(tmp);
3629 break;
3630 case 4:
3631 ctxt->dst.val = swab32((u32)ctxt->src.val);
3632 break;
3633 case 8:
3634 ctxt->dst.val = swab64(ctxt->src.val);
3635 break;
3636 default:
3637 BUG();
3638 }
3639 return X86EMUL_CONTINUE;
3640 }
3641
em_cr_write(struct x86_emulate_ctxt * ctxt)3642 static int em_cr_write(struct x86_emulate_ctxt *ctxt)
3643 {
3644 int cr_num = ctxt->modrm_reg;
3645 int r;
3646
3647 if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val))
3648 return emulate_gp(ctxt, 0);
3649
3650 /* Disable writeback. */
3651 ctxt->dst.type = OP_NONE;
3652
3653 if (cr_num == 0) {
3654 /*
3655 * CR0 write might have updated CR0.PE and/or CR0.PG
3656 * which can affect the cpu's execution mode.
3657 */
3658 r = emulator_recalc_and_set_mode(ctxt);
3659 if (r != X86EMUL_CONTINUE)
3660 return r;
3661 }
3662
3663 return X86EMUL_CONTINUE;
3664 }
3665
em_dr_write(struct x86_emulate_ctxt * ctxt)3666 static int em_dr_write(struct x86_emulate_ctxt *ctxt)
3667 {
3668 unsigned long val;
3669
3670 if (ctxt->mode == X86EMUL_MODE_PROT64)
3671 val = ctxt->src.val & ~0ULL;
3672 else
3673 val = ctxt->src.val & ~0U;
3674
3675 /* #UD condition is already handled. */
3676 if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
3677 return emulate_gp(ctxt, 0);
3678
3679 /* Disable writeback. */
3680 ctxt->dst.type = OP_NONE;
3681 return X86EMUL_CONTINUE;
3682 }
3683
em_wrmsr(struct x86_emulate_ctxt * ctxt)3684 static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
3685 {
3686 u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3687 u64 msr_data;
3688 int r;
3689
3690 msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
3691 | ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
3692 r = ctxt->ops->set_msr_with_filter(ctxt, msr_index, msr_data);
3693
3694 if (r == X86EMUL_PROPAGATE_FAULT)
3695 return emulate_gp(ctxt, 0);
3696
3697 return r;
3698 }
3699
em_rdmsr(struct x86_emulate_ctxt * ctxt)3700 static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
3701 {
3702 u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3703 u64 msr_data;
3704 int r;
3705
3706 r = ctxt->ops->get_msr_with_filter(ctxt, msr_index, &msr_data);
3707
3708 if (r == X86EMUL_PROPAGATE_FAULT)
3709 return emulate_gp(ctxt, 0);
3710
3711 if (r == X86EMUL_CONTINUE) {
3712 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
3713 *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
3714 }
3715 return r;
3716 }
3717
em_store_sreg(struct x86_emulate_ctxt * ctxt,int segment)3718 static int em_store_sreg(struct x86_emulate_ctxt *ctxt, int segment)
3719 {
3720 if (segment > VCPU_SREG_GS &&
3721 (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3722 ctxt->ops->cpl(ctxt) > 0)
3723 return emulate_gp(ctxt, 0);
3724
3725 ctxt->dst.val = get_segment_selector(ctxt, segment);
3726 if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
3727 ctxt->dst.bytes = 2;
3728 return X86EMUL_CONTINUE;
3729 }
3730
em_mov_rm_sreg(struct x86_emulate_ctxt * ctxt)3731 static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
3732 {
3733 if (ctxt->modrm_reg > VCPU_SREG_GS)
3734 return emulate_ud(ctxt);
3735
3736 return em_store_sreg(ctxt, ctxt->modrm_reg);
3737 }
3738
em_mov_sreg_rm(struct x86_emulate_ctxt * ctxt)3739 static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
3740 {
3741 u16 sel = ctxt->src.val;
3742
3743 if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
3744 return emulate_ud(ctxt);
3745
3746 if (ctxt->modrm_reg == VCPU_SREG_SS)
3747 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
3748
3749 /* Disable writeback. */
3750 ctxt->dst.type = OP_NONE;
3751 return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
3752 }
3753
em_sldt(struct x86_emulate_ctxt * ctxt)3754 static int em_sldt(struct x86_emulate_ctxt *ctxt)
3755 {
3756 return em_store_sreg(ctxt, VCPU_SREG_LDTR);
3757 }
3758
em_lldt(struct x86_emulate_ctxt * ctxt)3759 static int em_lldt(struct x86_emulate_ctxt *ctxt)
3760 {
3761 u16 sel = ctxt->src.val;
3762
3763 /* Disable writeback. */
3764 ctxt->dst.type = OP_NONE;
3765 return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
3766 }
3767
em_str(struct x86_emulate_ctxt * ctxt)3768 static int em_str(struct x86_emulate_ctxt *ctxt)
3769 {
3770 return em_store_sreg(ctxt, VCPU_SREG_TR);
3771 }
3772
em_ltr(struct x86_emulate_ctxt * ctxt)3773 static int em_ltr(struct x86_emulate_ctxt *ctxt)
3774 {
3775 u16 sel = ctxt->src.val;
3776
3777 /* Disable writeback. */
3778 ctxt->dst.type = OP_NONE;
3779 return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
3780 }
3781
em_invlpg(struct x86_emulate_ctxt * ctxt)3782 static int em_invlpg(struct x86_emulate_ctxt *ctxt)
3783 {
3784 int rc;
3785 ulong linear;
3786
3787 rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
3788 if (rc == X86EMUL_CONTINUE)
3789 ctxt->ops->invlpg(ctxt, linear);
3790 /* Disable writeback. */
3791 ctxt->dst.type = OP_NONE;
3792 return X86EMUL_CONTINUE;
3793 }
3794
em_clts(struct x86_emulate_ctxt * ctxt)3795 static int em_clts(struct x86_emulate_ctxt *ctxt)
3796 {
3797 ulong cr0;
3798
3799 cr0 = ctxt->ops->get_cr(ctxt, 0);
3800 cr0 &= ~X86_CR0_TS;
3801 ctxt->ops->set_cr(ctxt, 0, cr0);
3802 return X86EMUL_CONTINUE;
3803 }
3804
em_hypercall(struct x86_emulate_ctxt * ctxt)3805 static int em_hypercall(struct x86_emulate_ctxt *ctxt)
3806 {
3807 int rc = ctxt->ops->fix_hypercall(ctxt);
3808
3809 if (rc != X86EMUL_CONTINUE)
3810 return rc;
3811
3812 /* Let the processor re-execute the fixed hypercall */
3813 ctxt->_eip = ctxt->eip;
3814 /* Disable writeback. */
3815 ctxt->dst.type = OP_NONE;
3816 return X86EMUL_CONTINUE;
3817 }
3818
emulate_store_desc_ptr(struct x86_emulate_ctxt * ctxt,void (* get)(struct x86_emulate_ctxt * ctxt,struct desc_ptr * ptr))3819 static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
3820 void (*get)(struct x86_emulate_ctxt *ctxt,
3821 struct desc_ptr *ptr))
3822 {
3823 struct desc_ptr desc_ptr;
3824
3825 if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3826 ctxt->ops->cpl(ctxt) > 0)
3827 return emulate_gp(ctxt, 0);
3828
3829 if (ctxt->mode == X86EMUL_MODE_PROT64)
3830 ctxt->op_bytes = 8;
3831 get(ctxt, &desc_ptr);
3832 if (ctxt->op_bytes == 2) {
3833 ctxt->op_bytes = 4;
3834 desc_ptr.address &= 0x00ffffff;
3835 }
3836 /* Disable writeback. */
3837 ctxt->dst.type = OP_NONE;
3838 return segmented_write_std(ctxt, ctxt->dst.addr.mem,
3839 &desc_ptr, 2 + ctxt->op_bytes);
3840 }
3841
em_sgdt(struct x86_emulate_ctxt * ctxt)3842 static int em_sgdt(struct x86_emulate_ctxt *ctxt)
3843 {
3844 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt);
3845 }
3846
em_sidt(struct x86_emulate_ctxt * ctxt)3847 static int em_sidt(struct x86_emulate_ctxt *ctxt)
3848 {
3849 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
3850 }
3851
em_lgdt_lidt(struct x86_emulate_ctxt * ctxt,bool lgdt)3852 static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
3853 {
3854 struct desc_ptr desc_ptr;
3855 int rc;
3856
3857 if (ctxt->mode == X86EMUL_MODE_PROT64)
3858 ctxt->op_bytes = 8;
3859 rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3860 &desc_ptr.size, &desc_ptr.address,
3861 ctxt->op_bytes);
3862 if (rc != X86EMUL_CONTINUE)
3863 return rc;
3864 if (ctxt->mode == X86EMUL_MODE_PROT64 &&
3865 emul_is_noncanonical_address(desc_ptr.address, ctxt))
3866 return emulate_gp(ctxt, 0);
3867 if (lgdt)
3868 ctxt->ops->set_gdt(ctxt, &desc_ptr);
3869 else
3870 ctxt->ops->set_idt(ctxt, &desc_ptr);
3871 /* Disable writeback. */
3872 ctxt->dst.type = OP_NONE;
3873 return X86EMUL_CONTINUE;
3874 }
3875
em_lgdt(struct x86_emulate_ctxt * ctxt)3876 static int em_lgdt(struct x86_emulate_ctxt *ctxt)
3877 {
3878 return em_lgdt_lidt(ctxt, true);
3879 }
3880
em_lidt(struct x86_emulate_ctxt * ctxt)3881 static int em_lidt(struct x86_emulate_ctxt *ctxt)
3882 {
3883 return em_lgdt_lidt(ctxt, false);
3884 }
3885
em_smsw(struct x86_emulate_ctxt * ctxt)3886 static int em_smsw(struct x86_emulate_ctxt *ctxt)
3887 {
3888 if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3889 ctxt->ops->cpl(ctxt) > 0)
3890 return emulate_gp(ctxt, 0);
3891
3892 if (ctxt->dst.type == OP_MEM)
3893 ctxt->dst.bytes = 2;
3894 ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3895 return X86EMUL_CONTINUE;
3896 }
3897
em_lmsw(struct x86_emulate_ctxt * ctxt)3898 static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3899 {
3900 ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
3901 | (ctxt->src.val & 0x0f));
3902 ctxt->dst.type = OP_NONE;
3903 return X86EMUL_CONTINUE;
3904 }
3905
em_loop(struct x86_emulate_ctxt * ctxt)3906 static int em_loop(struct x86_emulate_ctxt *ctxt)
3907 {
3908 int rc = X86EMUL_CONTINUE;
3909
3910 register_address_increment(ctxt, VCPU_REGS_RCX, -1);
3911 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3912 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3913 rc = jmp_rel(ctxt, ctxt->src.val);
3914
3915 return rc;
3916 }
3917
em_jcxz(struct x86_emulate_ctxt * ctxt)3918 static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3919 {
3920 int rc = X86EMUL_CONTINUE;
3921
3922 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3923 rc = jmp_rel(ctxt, ctxt->src.val);
3924
3925 return rc;
3926 }
3927
em_in(struct x86_emulate_ctxt * ctxt)3928 static int em_in(struct x86_emulate_ctxt *ctxt)
3929 {
3930 if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
3931 &ctxt->dst.val))
3932 return X86EMUL_IO_NEEDED;
3933
3934 return X86EMUL_CONTINUE;
3935 }
3936
em_out(struct x86_emulate_ctxt * ctxt)3937 static int em_out(struct x86_emulate_ctxt *ctxt)
3938 {
3939 ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
3940 &ctxt->src.val, 1);
3941 /* Disable writeback. */
3942 ctxt->dst.type = OP_NONE;
3943 return X86EMUL_CONTINUE;
3944 }
3945
em_cli(struct x86_emulate_ctxt * ctxt)3946 static int em_cli(struct x86_emulate_ctxt *ctxt)
3947 {
3948 if (emulator_bad_iopl(ctxt))
3949 return emulate_gp(ctxt, 0);
3950
3951 ctxt->eflags &= ~X86_EFLAGS_IF;
3952 return X86EMUL_CONTINUE;
3953 }
3954
em_sti(struct x86_emulate_ctxt * ctxt)3955 static int em_sti(struct x86_emulate_ctxt *ctxt)
3956 {
3957 if (emulator_bad_iopl(ctxt))
3958 return emulate_gp(ctxt, 0);
3959
3960 ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
3961 ctxt->eflags |= X86_EFLAGS_IF;
3962 return X86EMUL_CONTINUE;
3963 }
3964
em_cpuid(struct x86_emulate_ctxt * ctxt)3965 static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3966 {
3967 u32 eax, ebx, ecx, edx;
3968 u64 msr = 0;
3969
3970 ctxt->ops->get_msr(ctxt, MSR_MISC_FEATURES_ENABLES, &msr);
3971 if (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
3972 ctxt->ops->cpl(ctxt)) {
3973 return emulate_gp(ctxt, 0);
3974 }
3975
3976 eax = reg_read(ctxt, VCPU_REGS_RAX);
3977 ecx = reg_read(ctxt, VCPU_REGS_RCX);
3978 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
3979 *reg_write(ctxt, VCPU_REGS_RAX) = eax;
3980 *reg_write(ctxt, VCPU_REGS_RBX) = ebx;
3981 *reg_write(ctxt, VCPU_REGS_RCX) = ecx;
3982 *reg_write(ctxt, VCPU_REGS_RDX) = edx;
3983 return X86EMUL_CONTINUE;
3984 }
3985
em_sahf(struct x86_emulate_ctxt * ctxt)3986 static int em_sahf(struct x86_emulate_ctxt *ctxt)
3987 {
3988 u32 flags;
3989
3990 flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
3991 X86_EFLAGS_SF;
3992 flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
3993
3994 ctxt->eflags &= ~0xffUL;
3995 ctxt->eflags |= flags | X86_EFLAGS_FIXED;
3996 return X86EMUL_CONTINUE;
3997 }
3998
em_lahf(struct x86_emulate_ctxt * ctxt)3999 static int em_lahf(struct x86_emulate_ctxt *ctxt)
4000 {
4001 *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
4002 *reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
4003 return X86EMUL_CONTINUE;
4004 }
4005
em_bswap(struct x86_emulate_ctxt * ctxt)4006 static int em_bswap(struct x86_emulate_ctxt *ctxt)
4007 {
4008 switch (ctxt->op_bytes) {
4009 #ifdef CONFIG_X86_64
4010 case 8:
4011 asm("bswap %0" : "+r"(ctxt->dst.val));
4012 break;
4013 #endif
4014 default:
4015 asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
4016 break;
4017 }
4018 return X86EMUL_CONTINUE;
4019 }
4020
em_clflush(struct x86_emulate_ctxt * ctxt)4021 static int em_clflush(struct x86_emulate_ctxt *ctxt)
4022 {
4023 /* emulating clflush regardless of cpuid */
4024 return X86EMUL_CONTINUE;
4025 }
4026
em_clflushopt(struct x86_emulate_ctxt * ctxt)4027 static int em_clflushopt(struct x86_emulate_ctxt *ctxt)
4028 {
4029 /* emulating clflushopt regardless of cpuid */
4030 return X86EMUL_CONTINUE;
4031 }
4032
em_movsxd(struct x86_emulate_ctxt * ctxt)4033 static int em_movsxd(struct x86_emulate_ctxt *ctxt)
4034 {
4035 ctxt->dst.val = (s32) ctxt->src.val;
4036 return X86EMUL_CONTINUE;
4037 }
4038
check_fxsr(struct x86_emulate_ctxt * ctxt)4039 static int check_fxsr(struct x86_emulate_ctxt *ctxt)
4040 {
4041 if (!ctxt->ops->guest_has_fxsr(ctxt))
4042 return emulate_ud(ctxt);
4043
4044 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
4045 return emulate_nm(ctxt);
4046
4047 /*
4048 * Don't emulate a case that should never be hit, instead of working
4049 * around a lack of fxsave64/fxrstor64 on old compilers.
4050 */
4051 if (ctxt->mode >= X86EMUL_MODE_PROT64)
4052 return X86EMUL_UNHANDLEABLE;
4053
4054 return X86EMUL_CONTINUE;
4055 }
4056
4057 /*
4058 * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but does save
4059 * and restore MXCSR.
4060 */
__fxstate_size(int nregs)4061 static size_t __fxstate_size(int nregs)
4062 {
4063 return offsetof(struct fxregs_state, xmm_space[0]) + nregs * 16;
4064 }
4065
fxstate_size(struct x86_emulate_ctxt * ctxt)4066 static inline size_t fxstate_size(struct x86_emulate_ctxt *ctxt)
4067 {
4068 bool cr4_osfxsr;
4069 if (ctxt->mode == X86EMUL_MODE_PROT64)
4070 return __fxstate_size(16);
4071
4072 cr4_osfxsr = ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR;
4073 return __fxstate_size(cr4_osfxsr ? 8 : 0);
4074 }
4075
4076 /*
4077 * FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
4078 * 1) 16 bit mode
4079 * 2) 32 bit mode
4080 * - like (1), but FIP and FDP (foo) are only 16 bit. At least Intel CPUs
4081 * preserve whole 32 bit values, though, so (1) and (2) are the same wrt.
4082 * save and restore
4083 * 3) 64-bit mode with REX.W prefix
4084 * - like (2), but XMM 8-15 are being saved and restored
4085 * 4) 64-bit mode without REX.W prefix
4086 * - like (3), but FIP and FDP are 64 bit
4087 *
4088 * Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the
4089 * desired result. (4) is not emulated.
4090 *
4091 * Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS
4092 * and FPU DS) should match.
4093 */
em_fxsave(struct x86_emulate_ctxt * ctxt)4094 static int em_fxsave(struct x86_emulate_ctxt *ctxt)
4095 {
4096 struct fxregs_state fx_state;
4097 int rc;
4098
4099 rc = check_fxsr(ctxt);
4100 if (rc != X86EMUL_CONTINUE)
4101 return rc;
4102
4103 kvm_fpu_get();
4104
4105 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
4106
4107 kvm_fpu_put();
4108
4109 if (rc != X86EMUL_CONTINUE)
4110 return rc;
4111
4112 return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state,
4113 fxstate_size(ctxt));
4114 }
4115
4116 /*
4117 * FXRSTOR might restore XMM registers not provided by the guest. Fill
4118 * in the host registers (via FXSAVE) instead, so they won't be modified.
4119 * (preemption has to stay disabled until FXRSTOR).
4120 *
4121 * Use noinline to keep the stack for other functions called by callers small.
4122 */
fxregs_fixup(struct fxregs_state * fx_state,const size_t used_size)4123 static noinline int fxregs_fixup(struct fxregs_state *fx_state,
4124 const size_t used_size)
4125 {
4126 struct fxregs_state fx_tmp;
4127 int rc;
4128
4129 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp));
4130 memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size,
4131 __fxstate_size(16) - used_size);
4132
4133 return rc;
4134 }
4135
em_fxrstor(struct x86_emulate_ctxt * ctxt)4136 static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
4137 {
4138 struct fxregs_state fx_state;
4139 int rc;
4140 size_t size;
4141
4142 rc = check_fxsr(ctxt);
4143 if (rc != X86EMUL_CONTINUE)
4144 return rc;
4145
4146 size = fxstate_size(ctxt);
4147 rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
4148 if (rc != X86EMUL_CONTINUE)
4149 return rc;
4150
4151 kvm_fpu_get();
4152
4153 if (size < __fxstate_size(16)) {
4154 rc = fxregs_fixup(&fx_state, size);
4155 if (rc != X86EMUL_CONTINUE)
4156 goto out;
4157 }
4158
4159 if (fx_state.mxcsr >> 16) {
4160 rc = emulate_gp(ctxt, 0);
4161 goto out;
4162 }
4163
4164 if (rc == X86EMUL_CONTINUE)
4165 rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
4166
4167 out:
4168 kvm_fpu_put();
4169
4170 return rc;
4171 }
4172
em_xsetbv(struct x86_emulate_ctxt * ctxt)4173 static int em_xsetbv(struct x86_emulate_ctxt *ctxt)
4174 {
4175 u32 eax, ecx, edx;
4176
4177 if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSXSAVE))
4178 return emulate_ud(ctxt);
4179
4180 eax = reg_read(ctxt, VCPU_REGS_RAX);
4181 edx = reg_read(ctxt, VCPU_REGS_RDX);
4182 ecx = reg_read(ctxt, VCPU_REGS_RCX);
4183
4184 if (ctxt->ops->set_xcr(ctxt, ecx, ((u64)edx << 32) | eax))
4185 return emulate_gp(ctxt, 0);
4186
4187 return X86EMUL_CONTINUE;
4188 }
4189
valid_cr(int nr)4190 static bool valid_cr(int nr)
4191 {
4192 switch (nr) {
4193 case 0:
4194 case 2 ... 4:
4195 case 8:
4196 return true;
4197 default:
4198 return false;
4199 }
4200 }
4201
check_cr_access(struct x86_emulate_ctxt * ctxt)4202 static int check_cr_access(struct x86_emulate_ctxt *ctxt)
4203 {
4204 if (!valid_cr(ctxt->modrm_reg))
4205 return emulate_ud(ctxt);
4206
4207 return X86EMUL_CONTINUE;
4208 }
4209
check_dr7_gd(struct x86_emulate_ctxt * ctxt)4210 static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
4211 {
4212 unsigned long dr7;
4213
4214 ctxt->ops->get_dr(ctxt, 7, &dr7);
4215
4216 return dr7 & DR7_GD;
4217 }
4218
check_dr_read(struct x86_emulate_ctxt * ctxt)4219 static int check_dr_read(struct x86_emulate_ctxt *ctxt)
4220 {
4221 int dr = ctxt->modrm_reg;
4222 u64 cr4;
4223
4224 if (dr > 7)
4225 return emulate_ud(ctxt);
4226
4227 cr4 = ctxt->ops->get_cr(ctxt, 4);
4228 if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
4229 return emulate_ud(ctxt);
4230
4231 if (check_dr7_gd(ctxt)) {
4232 ulong dr6;
4233
4234 ctxt->ops->get_dr(ctxt, 6, &dr6);
4235 dr6 &= ~DR_TRAP_BITS;
4236 dr6 |= DR6_BD | DR6_ACTIVE_LOW;
4237 ctxt->ops->set_dr(ctxt, 6, dr6);
4238 return emulate_db(ctxt);
4239 }
4240
4241 return X86EMUL_CONTINUE;
4242 }
4243
check_dr_write(struct x86_emulate_ctxt * ctxt)4244 static int check_dr_write(struct x86_emulate_ctxt *ctxt)
4245 {
4246 u64 new_val = ctxt->src.val64;
4247 int dr = ctxt->modrm_reg;
4248
4249 if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
4250 return emulate_gp(ctxt, 0);
4251
4252 return check_dr_read(ctxt);
4253 }
4254
check_svme(struct x86_emulate_ctxt * ctxt)4255 static int check_svme(struct x86_emulate_ctxt *ctxt)
4256 {
4257 u64 efer = 0;
4258
4259 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
4260
4261 if (!(efer & EFER_SVME))
4262 return emulate_ud(ctxt);
4263
4264 return X86EMUL_CONTINUE;
4265 }
4266
check_svme_pa(struct x86_emulate_ctxt * ctxt)4267 static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
4268 {
4269 u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
4270
4271 /* Valid physical address? */
4272 if (rax & 0xffff000000000000ULL)
4273 return emulate_gp(ctxt, 0);
4274
4275 return check_svme(ctxt);
4276 }
4277
check_rdtsc(struct x86_emulate_ctxt * ctxt)4278 static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
4279 {
4280 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
4281
4282 if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
4283 return emulate_gp(ctxt, 0);
4284
4285 return X86EMUL_CONTINUE;
4286 }
4287
check_rdpmc(struct x86_emulate_ctxt * ctxt)4288 static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
4289 {
4290 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
4291 u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
4292
4293 /*
4294 * VMware allows access to these Pseduo-PMCs even when read via RDPMC
4295 * in Ring3 when CR4.PCE=0.
4296 */
4297 if (enable_vmware_backdoor && is_vmware_backdoor_pmc(rcx))
4298 return X86EMUL_CONTINUE;
4299
4300 /*
4301 * If CR4.PCE is set, the SDM requires CPL=0 or CR0.PE=0. The CR0.PE
4302 * check however is unnecessary because CPL is always 0 outside
4303 * protected mode.
4304 */
4305 if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
4306 ctxt->ops->check_pmc(ctxt, rcx))
4307 return emulate_gp(ctxt, 0);
4308
4309 return X86EMUL_CONTINUE;
4310 }
4311
check_perm_in(struct x86_emulate_ctxt * ctxt)4312 static int check_perm_in(struct x86_emulate_ctxt *ctxt)
4313 {
4314 ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
4315 if (!emulator_io_permited(ctxt, ctxt->src.val, ctxt->dst.bytes))
4316 return emulate_gp(ctxt, 0);
4317
4318 return X86EMUL_CONTINUE;
4319 }
4320
check_perm_out(struct x86_emulate_ctxt * ctxt)4321 static int check_perm_out(struct x86_emulate_ctxt *ctxt)
4322 {
4323 ctxt->src.bytes = min(ctxt->src.bytes, 4u);
4324 if (!emulator_io_permited(ctxt, ctxt->dst.val, ctxt->src.bytes))
4325 return emulate_gp(ctxt, 0);
4326
4327 return X86EMUL_CONTINUE;
4328 }
4329
4330 #define D(_y) { .flags = (_y) }
4331 #define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
4332 #define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
4333 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4334 #define N D(NotImpl)
4335 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
4336 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
4337 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
4338 #define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
4339 #define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
4340 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
4341 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
4342 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
4343 #define II(_f, _e, _i) \
4344 { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
4345 #define IIP(_f, _e, _i, _p) \
4346 { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
4347 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4348 #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
4349
4350 #define D2bv(_f) D((_f) | ByteOp), D(_f)
4351 #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
4352 #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
4353 #define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
4354 #define I2bvIP(_f, _e, _i, _p) \
4355 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
4356
4357 #define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
4358 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
4359 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
4360
4361 static const struct opcode group7_rm0[] = {
4362 N,
4363 I(SrcNone | Priv | EmulateOnUD, em_hypercall),
4364 N, N, N, N, N, N,
4365 };
4366
4367 static const struct opcode group7_rm1[] = {
4368 DI(SrcNone | Priv, monitor),
4369 DI(SrcNone | Priv, mwait),
4370 N, N, N, N, N, N,
4371 };
4372
4373 static const struct opcode group7_rm2[] = {
4374 N,
4375 II(ImplicitOps | Priv, em_xsetbv, xsetbv),
4376 N, N, N, N, N, N,
4377 };
4378
4379 static const struct opcode group7_rm3[] = {
4380 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
4381 II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall),
4382 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
4383 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
4384 DIP(SrcNone | Prot | Priv, stgi, check_svme),
4385 DIP(SrcNone | Prot | Priv, clgi, check_svme),
4386 DIP(SrcNone | Prot | Priv, skinit, check_svme),
4387 DIP(SrcNone | Prot | Priv, invlpga, check_svme),
4388 };
4389
4390 static const struct opcode group7_rm7[] = {
4391 N,
4392 DIP(SrcNone, rdtscp, check_rdtsc),
4393 N, N, N, N, N, N,
4394 };
4395
4396 static const struct opcode group1[] = {
4397 F(Lock, em_add),
4398 F(Lock | PageTable, em_or),
4399 F(Lock, em_adc),
4400 F(Lock, em_sbb),
4401 F(Lock | PageTable, em_and),
4402 F(Lock, em_sub),
4403 F(Lock, em_xor),
4404 F(NoWrite, em_cmp),
4405 };
4406
4407 static const struct opcode group1A[] = {
4408 I(DstMem | SrcNone | Mov | Stack | IncSP | TwoMemOp, em_pop), N, N, N, N, N, N, N,
4409 };
4410
4411 static const struct opcode group2[] = {
4412 F(DstMem | ModRM, em_rol),
4413 F(DstMem | ModRM, em_ror),
4414 F(DstMem | ModRM, em_rcl),
4415 F(DstMem | ModRM, em_rcr),
4416 F(DstMem | ModRM, em_shl),
4417 F(DstMem | ModRM, em_shr),
4418 F(DstMem | ModRM, em_shl),
4419 F(DstMem | ModRM, em_sar),
4420 };
4421
4422 static const struct opcode group3[] = {
4423 F(DstMem | SrcImm | NoWrite, em_test),
4424 F(DstMem | SrcImm | NoWrite, em_test),
4425 F(DstMem | SrcNone | Lock, em_not),
4426 F(DstMem | SrcNone | Lock, em_neg),
4427 F(DstXacc | Src2Mem, em_mul_ex),
4428 F(DstXacc | Src2Mem, em_imul_ex),
4429 F(DstXacc | Src2Mem, em_div_ex),
4430 F(DstXacc | Src2Mem, em_idiv_ex),
4431 };
4432
4433 static const struct opcode group4[] = {
4434 F(ByteOp | DstMem | SrcNone | Lock, em_inc),
4435 F(ByteOp | DstMem | SrcNone | Lock, em_dec),
4436 N, N, N, N, N, N,
4437 };
4438
4439 static const struct opcode group5[] = {
4440 F(DstMem | SrcNone | Lock, em_inc),
4441 F(DstMem | SrcNone | Lock, em_dec),
4442 I(SrcMem | NearBranch | IsBranch, em_call_near_abs),
4443 I(SrcMemFAddr | ImplicitOps | IsBranch, em_call_far),
4444 I(SrcMem | NearBranch | IsBranch, em_jmp_abs),
4445 I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far),
4446 I(SrcMem | Stack | TwoMemOp, em_push), D(Undefined),
4447 };
4448
4449 static const struct opcode group6[] = {
4450 II(Prot | DstMem, em_sldt, sldt),
4451 II(Prot | DstMem, em_str, str),
4452 II(Prot | Priv | SrcMem16, em_lldt, lldt),
4453 II(Prot | Priv | SrcMem16, em_ltr, ltr),
4454 N, N, N, N,
4455 };
4456
4457 static const struct group_dual group7 = { {
4458 II(Mov | DstMem, em_sgdt, sgdt),
4459 II(Mov | DstMem, em_sidt, sidt),
4460 II(SrcMem | Priv, em_lgdt, lgdt),
4461 II(SrcMem | Priv, em_lidt, lidt),
4462 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4463 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4464 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
4465 }, {
4466 EXT(0, group7_rm0),
4467 EXT(0, group7_rm1),
4468 EXT(0, group7_rm2),
4469 EXT(0, group7_rm3),
4470 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4471 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4472 EXT(0, group7_rm7),
4473 } };
4474
4475 static const struct opcode group8[] = {
4476 N, N, N, N,
4477 F(DstMem | SrcImmByte | NoWrite, em_bt),
4478 F(DstMem | SrcImmByte | Lock | PageTable, em_bts),
4479 F(DstMem | SrcImmByte | Lock, em_btr),
4480 F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
4481 };
4482
4483 /*
4484 * The "memory" destination is actually always a register, since we come
4485 * from the register case of group9.
4486 */
4487 static const struct gprefix pfx_0f_c7_7 = {
4488 N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid),
4489 };
4490
4491
4492 static const struct group_dual group9 = { {
4493 N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
4494 }, {
4495 N, N, N, N, N, N, N,
4496 GP(0, &pfx_0f_c7_7),
4497 } };
4498
4499 static const struct opcode group11[] = {
4500 I(DstMem | SrcImm | Mov | PageTable, em_mov),
4501 X7(D(Undefined)),
4502 };
4503
4504 static const struct gprefix pfx_0f_ae_7 = {
4505 I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N,
4506 };
4507
4508 static const struct group_dual group15 = { {
4509 I(ModRM | Aligned16, em_fxsave),
4510 I(ModRM | Aligned16, em_fxrstor),
4511 N, N, N, N, N, GP(0, &pfx_0f_ae_7),
4512 }, {
4513 N, N, N, N, N, N, N, N,
4514 } };
4515
4516 static const struct gprefix pfx_0f_6f_0f_7f = {
4517 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
4518 };
4519
4520 static const struct instr_dual instr_dual_0f_2b = {
4521 I(0, em_mov), N
4522 };
4523
4524 static const struct gprefix pfx_0f_2b = {
4525 ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
4526 };
4527
4528 static const struct gprefix pfx_0f_10_0f_11 = {
4529 I(Unaligned, em_mov), I(Unaligned, em_mov), N, N,
4530 };
4531
4532 static const struct gprefix pfx_0f_28_0f_29 = {
4533 I(Aligned, em_mov), I(Aligned, em_mov), N, N,
4534 };
4535
4536 static const struct gprefix pfx_0f_e7 = {
4537 N, I(Sse, em_mov), N, N,
4538 };
4539
4540 static const struct escape escape_d9 = { {
4541 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
4542 }, {
4543 /* 0xC0 - 0xC7 */
4544 N, N, N, N, N, N, N, N,
4545 /* 0xC8 - 0xCF */
4546 N, N, N, N, N, N, N, N,
4547 /* 0xD0 - 0xC7 */
4548 N, N, N, N, N, N, N, N,
4549 /* 0xD8 - 0xDF */
4550 N, N, N, N, N, N, N, N,
4551 /* 0xE0 - 0xE7 */
4552 N, N, N, N, N, N, N, N,
4553 /* 0xE8 - 0xEF */
4554 N, N, N, N, N, N, N, N,
4555 /* 0xF0 - 0xF7 */
4556 N, N, N, N, N, N, N, N,
4557 /* 0xF8 - 0xFF */
4558 N, N, N, N, N, N, N, N,
4559 } };
4560
4561 static const struct escape escape_db = { {
4562 N, N, N, N, N, N, N, N,
4563 }, {
4564 /* 0xC0 - 0xC7 */
4565 N, N, N, N, N, N, N, N,
4566 /* 0xC8 - 0xCF */
4567 N, N, N, N, N, N, N, N,
4568 /* 0xD0 - 0xC7 */
4569 N, N, N, N, N, N, N, N,
4570 /* 0xD8 - 0xDF */
4571 N, N, N, N, N, N, N, N,
4572 /* 0xE0 - 0xE7 */
4573 N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
4574 /* 0xE8 - 0xEF */
4575 N, N, N, N, N, N, N, N,
4576 /* 0xF0 - 0xF7 */
4577 N, N, N, N, N, N, N, N,
4578 /* 0xF8 - 0xFF */
4579 N, N, N, N, N, N, N, N,
4580 } };
4581
4582 static const struct escape escape_dd = { {
4583 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
4584 }, {
4585 /* 0xC0 - 0xC7 */
4586 N, N, N, N, N, N, N, N,
4587 /* 0xC8 - 0xCF */
4588 N, N, N, N, N, N, N, N,
4589 /* 0xD0 - 0xC7 */
4590 N, N, N, N, N, N, N, N,
4591 /* 0xD8 - 0xDF */
4592 N, N, N, N, N, N, N, N,
4593 /* 0xE0 - 0xE7 */
4594 N, N, N, N, N, N, N, N,
4595 /* 0xE8 - 0xEF */
4596 N, N, N, N, N, N, N, N,
4597 /* 0xF0 - 0xF7 */
4598 N, N, N, N, N, N, N, N,
4599 /* 0xF8 - 0xFF */
4600 N, N, N, N, N, N, N, N,
4601 } };
4602
4603 static const struct instr_dual instr_dual_0f_c3 = {
4604 I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
4605 };
4606
4607 static const struct mode_dual mode_dual_63 = {
4608 N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
4609 };
4610
4611 static const struct instr_dual instr_dual_8d = {
4612 D(DstReg | SrcMem | ModRM | NoAccess), N
4613 };
4614
4615 static const struct opcode opcode_table[256] = {
4616 /* 0x00 - 0x07 */
4617 F6ALU(Lock, em_add),
4618 I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
4619 I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
4620 /* 0x08 - 0x0F */
4621 F6ALU(Lock | PageTable, em_or),
4622 I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
4623 N,
4624 /* 0x10 - 0x17 */
4625 F6ALU(Lock, em_adc),
4626 I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
4627 I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
4628 /* 0x18 - 0x1F */
4629 F6ALU(Lock, em_sbb),
4630 I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
4631 I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
4632 /* 0x20 - 0x27 */
4633 F6ALU(Lock | PageTable, em_and), N, N,
4634 /* 0x28 - 0x2F */
4635 F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
4636 /* 0x30 - 0x37 */
4637 F6ALU(Lock, em_xor), N, N,
4638 /* 0x38 - 0x3F */
4639 F6ALU(NoWrite, em_cmp), N, N,
4640 /* 0x40 - 0x4F */
4641 X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
4642 /* 0x50 - 0x57 */
4643 X8(I(SrcReg | Stack, em_push)),
4644 /* 0x58 - 0x5F */
4645 X8(I(DstReg | Stack, em_pop)),
4646 /* 0x60 - 0x67 */
4647 I(ImplicitOps | Stack | No64, em_pusha),
4648 I(ImplicitOps | Stack | No64, em_popa),
4649 N, MD(ModRM, &mode_dual_63),
4650 N, N, N, N,
4651 /* 0x68 - 0x6F */
4652 I(SrcImm | Mov | Stack, em_push),
4653 I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
4654 I(SrcImmByte | Mov | Stack, em_push),
4655 I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
4656 I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
4657 I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
4658 /* 0x70 - 0x7F */
4659 X16(D(SrcImmByte | NearBranch | IsBranch)),
4660 /* 0x80 - 0x87 */
4661 G(ByteOp | DstMem | SrcImm, group1),
4662 G(DstMem | SrcImm, group1),
4663 G(ByteOp | DstMem | SrcImm | No64, group1),
4664 G(DstMem | SrcImmByte, group1),
4665 F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
4666 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
4667 /* 0x88 - 0x8F */
4668 I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
4669 I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
4670 I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
4671 ID(0, &instr_dual_8d),
4672 I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
4673 G(0, group1A),
4674 /* 0x90 - 0x97 */
4675 DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
4676 /* 0x98 - 0x9F */
4677 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
4678 I(SrcImmFAddr | No64 | IsBranch, em_call_far), N,
4679 II(ImplicitOps | Stack, em_pushf, pushf),
4680 II(ImplicitOps | Stack, em_popf, popf),
4681 I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
4682 /* 0xA0 - 0xA7 */
4683 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
4684 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
4685 I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov),
4686 F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r),
4687 /* 0xA8 - 0xAF */
4688 F2bv(DstAcc | SrcImm | NoWrite, em_test),
4689 I2bv(SrcAcc | DstDI | Mov | String, em_mov),
4690 I2bv(SrcSI | DstAcc | Mov | String, em_mov),
4691 F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
4692 /* 0xB0 - 0xB7 */
4693 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
4694 /* 0xB8 - 0xBF */
4695 X8(I(DstReg | SrcImm64 | Mov, em_mov)),
4696 /* 0xC0 - 0xC7 */
4697 G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
4698 I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch, em_ret_near_imm),
4699 I(ImplicitOps | NearBranch | IsBranch, em_ret),
4700 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
4701 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
4702 G(ByteOp, group11), G(0, group11),
4703 /* 0xC8 - 0xCF */
4704 I(Stack | SrcImmU16 | Src2ImmByte | IsBranch, em_enter),
4705 I(Stack | IsBranch, em_leave),
4706 I(ImplicitOps | SrcImmU16 | IsBranch, em_ret_far_imm),
4707 I(ImplicitOps | IsBranch, em_ret_far),
4708 D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch, intn),
4709 D(ImplicitOps | No64 | IsBranch),
4710 II(ImplicitOps | IsBranch, em_iret, iret),
4711 /* 0xD0 - 0xD7 */
4712 G(Src2One | ByteOp, group2), G(Src2One, group2),
4713 G(Src2CL | ByteOp, group2), G(Src2CL, group2),
4714 I(DstAcc | SrcImmUByte | No64, em_aam),
4715 I(DstAcc | SrcImmUByte | No64, em_aad),
4716 F(DstAcc | ByteOp | No64, em_salc),
4717 I(DstAcc | SrcXLat | ByteOp, em_mov),
4718 /* 0xD8 - 0xDF */
4719 N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
4720 /* 0xE0 - 0xE7 */
4721 X3(I(SrcImmByte | NearBranch | IsBranch, em_loop)),
4722 I(SrcImmByte | NearBranch | IsBranch, em_jcxz),
4723 I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
4724 I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
4725 /* 0xE8 - 0xEF */
4726 I(SrcImm | NearBranch | IsBranch, em_call),
4727 D(SrcImm | ImplicitOps | NearBranch | IsBranch),
4728 I(SrcImmFAddr | No64 | IsBranch, em_jmp_far),
4729 D(SrcImmByte | ImplicitOps | NearBranch | IsBranch),
4730 I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in),
4731 I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
4732 /* 0xF0 - 0xF7 */
4733 N, DI(ImplicitOps, icebp), N, N,
4734 DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
4735 G(ByteOp, group3), G(0, group3),
4736 /* 0xF8 - 0xFF */
4737 D(ImplicitOps), D(ImplicitOps),
4738 I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
4739 D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
4740 };
4741
4742 static const struct opcode twobyte_table[256] = {
4743 /* 0x00 - 0x0F */
4744 G(0, group6), GD(0, &group7), N, N,
4745 N, I(ImplicitOps | EmulateOnUD | IsBranch, em_syscall),
4746 II(ImplicitOps | Priv, em_clts, clts), N,
4747 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
4748 N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4749 /* 0x10 - 0x1F */
4750 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_10_0f_11),
4751 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_10_0f_11),
4752 N, N, N, N, N, N,
4753 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 4 * prefetch + 4 * reserved NOP */
4754 D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4755 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4756 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4757 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4758 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* NOP + 7 * reserved NOP */
4759 /* 0x20 - 0x2F */
4760 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_access),
4761 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
4762 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
4763 check_cr_access),
4764 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
4765 check_dr_write),
4766 N, N, N, N,
4767 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
4768 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
4769 N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
4770 N, N, N, N,
4771 /* 0x30 - 0x3F */
4772 II(ImplicitOps | Priv, em_wrmsr, wrmsr),
4773 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
4774 II(ImplicitOps | Priv, em_rdmsr, rdmsr),
4775 IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
4776 I(ImplicitOps | EmulateOnUD | IsBranch, em_sysenter),
4777 I(ImplicitOps | Priv | EmulateOnUD | IsBranch, em_sysexit),
4778 N, N,
4779 N, N, N, N, N, N, N, N,
4780 /* 0x40 - 0x4F */
4781 X16(D(DstReg | SrcMem | ModRM)),
4782 /* 0x50 - 0x5F */
4783 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4784 /* 0x60 - 0x6F */
4785 N, N, N, N,
4786 N, N, N, N,
4787 N, N, N, N,
4788 N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
4789 /* 0x70 - 0x7F */
4790 N, N, N, N,
4791 N, N, N, N,
4792 N, N, N, N,
4793 N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
4794 /* 0x80 - 0x8F */
4795 X16(D(SrcImm | NearBranch | IsBranch)),
4796 /* 0x90 - 0x9F */
4797 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
4798 /* 0xA0 - 0xA7 */
4799 I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
4800 II(ImplicitOps, em_cpuid, cpuid),
4801 F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
4802 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
4803 F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
4804 /* 0xA8 - 0xAF */
4805 I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
4806 II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
4807 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
4808 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
4809 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
4810 GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
4811 /* 0xB0 - 0xB7 */
4812 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
4813 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
4814 F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
4815 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
4816 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
4817 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4818 /* 0xB8 - 0xBF */
4819 N, N,
4820 G(BitOp, group8),
4821 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
4822 I(DstReg | SrcMem | ModRM, em_bsf_c),
4823 I(DstReg | SrcMem | ModRM, em_bsr_c),
4824 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4825 /* 0xC0 - 0xC7 */
4826 F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
4827 N, ID(0, &instr_dual_0f_c3),
4828 N, N, N, GD(0, &group9),
4829 /* 0xC8 - 0xCF */
4830 X8(I(DstReg, em_bswap)),
4831 /* 0xD0 - 0xDF */
4832 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4833 /* 0xE0 - 0xEF */
4834 N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
4835 N, N, N, N, N, N, N, N,
4836 /* 0xF0 - 0xFF */
4837 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
4838 };
4839
4840 static const struct instr_dual instr_dual_0f_38_f0 = {
4841 I(DstReg | SrcMem | Mov, em_movbe), N
4842 };
4843
4844 static const struct instr_dual instr_dual_0f_38_f1 = {
4845 I(DstMem | SrcReg | Mov, em_movbe), N
4846 };
4847
4848 static const struct gprefix three_byte_0f_38_f0 = {
4849 ID(0, &instr_dual_0f_38_f0), N, N, N
4850 };
4851
4852 static const struct gprefix three_byte_0f_38_f1 = {
4853 ID(0, &instr_dual_0f_38_f1), N, N, N
4854 };
4855
4856 /*
4857 * Insns below are selected by the prefix which indexed by the third opcode
4858 * byte.
4859 */
4860 static const struct opcode opcode_map_0f_38[256] = {
4861 /* 0x00 - 0x7f */
4862 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4863 /* 0x80 - 0xef */
4864 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4865 /* 0xf0 - 0xf1 */
4866 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
4867 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
4868 /* 0xf2 - 0xff */
4869 N, N, X4(N), X8(N)
4870 };
4871
4872 #undef D
4873 #undef N
4874 #undef G
4875 #undef GD
4876 #undef I
4877 #undef GP
4878 #undef EXT
4879 #undef MD
4880 #undef ID
4881
4882 #undef D2bv
4883 #undef D2bvIP
4884 #undef I2bv
4885 #undef I2bvIP
4886 #undef I6ALU
4887
imm_size(struct x86_emulate_ctxt * ctxt)4888 static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
4889 {
4890 unsigned size;
4891
4892 size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4893 if (size == 8)
4894 size = 4;
4895 return size;
4896 }
4897
decode_imm(struct x86_emulate_ctxt * ctxt,struct operand * op,unsigned size,bool sign_extension)4898 static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
4899 unsigned size, bool sign_extension)
4900 {
4901 int rc = X86EMUL_CONTINUE;
4902
4903 op->type = OP_IMM;
4904 op->bytes = size;
4905 op->addr.mem.ea = ctxt->_eip;
4906 /* NB. Immediates are sign-extended as necessary. */
4907 switch (op->bytes) {
4908 case 1:
4909 op->val = insn_fetch(s8, ctxt);
4910 break;
4911 case 2:
4912 op->val = insn_fetch(s16, ctxt);
4913 break;
4914 case 4:
4915 op->val = insn_fetch(s32, ctxt);
4916 break;
4917 case 8:
4918 op->val = insn_fetch(s64, ctxt);
4919 break;
4920 }
4921 if (!sign_extension) {
4922 switch (op->bytes) {
4923 case 1:
4924 op->val &= 0xff;
4925 break;
4926 case 2:
4927 op->val &= 0xffff;
4928 break;
4929 case 4:
4930 op->val &= 0xffffffff;
4931 break;
4932 }
4933 }
4934 done:
4935 return rc;
4936 }
4937
decode_operand(struct x86_emulate_ctxt * ctxt,struct operand * op,unsigned d)4938 static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4939 unsigned d)
4940 {
4941 int rc = X86EMUL_CONTINUE;
4942
4943 switch (d) {
4944 case OpReg:
4945 decode_register_operand(ctxt, op);
4946 break;
4947 case OpImmUByte:
4948 rc = decode_imm(ctxt, op, 1, false);
4949 break;
4950 case OpMem:
4951 ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4952 mem_common:
4953 *op = ctxt->memop;
4954 ctxt->memopp = op;
4955 if (ctxt->d & BitOp)
4956 fetch_bit_operand(ctxt);
4957 op->orig_val = op->val;
4958 break;
4959 case OpMem64:
4960 ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
4961 goto mem_common;
4962 case OpAcc:
4963 op->type = OP_REG;
4964 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4965 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4966 fetch_register_operand(op);
4967 op->orig_val = op->val;
4968 break;
4969 case OpAccLo:
4970 op->type = OP_REG;
4971 op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
4972 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4973 fetch_register_operand(op);
4974 op->orig_val = op->val;
4975 break;
4976 case OpAccHi:
4977 if (ctxt->d & ByteOp) {
4978 op->type = OP_NONE;
4979 break;
4980 }
4981 op->type = OP_REG;
4982 op->bytes = ctxt->op_bytes;
4983 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4984 fetch_register_operand(op);
4985 op->orig_val = op->val;
4986 break;
4987 case OpDI:
4988 op->type = OP_MEM;
4989 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4990 op->addr.mem.ea =
4991 register_address(ctxt, VCPU_REGS_RDI);
4992 op->addr.mem.seg = VCPU_SREG_ES;
4993 op->val = 0;
4994 op->count = 1;
4995 break;
4996 case OpDX:
4997 op->type = OP_REG;
4998 op->bytes = 2;
4999 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
5000 fetch_register_operand(op);
5001 break;
5002 case OpCL:
5003 op->type = OP_IMM;
5004 op->bytes = 1;
5005 op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
5006 break;
5007 case OpImmByte:
5008 rc = decode_imm(ctxt, op, 1, true);
5009 break;
5010 case OpOne:
5011 op->type = OP_IMM;
5012 op->bytes = 1;
5013 op->val = 1;
5014 break;
5015 case OpImm:
5016 rc = decode_imm(ctxt, op, imm_size(ctxt), true);
5017 break;
5018 case OpImm64:
5019 rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
5020 break;
5021 case OpMem8:
5022 ctxt->memop.bytes = 1;
5023 if (ctxt->memop.type == OP_REG) {
5024 ctxt->memop.addr.reg = decode_register(ctxt,
5025 ctxt->modrm_rm, true);
5026 fetch_register_operand(&ctxt->memop);
5027 }
5028 goto mem_common;
5029 case OpMem16:
5030 ctxt->memop.bytes = 2;
5031 goto mem_common;
5032 case OpMem32:
5033 ctxt->memop.bytes = 4;
5034 goto mem_common;
5035 case OpImmU16:
5036 rc = decode_imm(ctxt, op, 2, false);
5037 break;
5038 case OpImmU:
5039 rc = decode_imm(ctxt, op, imm_size(ctxt), false);
5040 break;
5041 case OpSI:
5042 op->type = OP_MEM;
5043 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
5044 op->addr.mem.ea =
5045 register_address(ctxt, VCPU_REGS_RSI);
5046 op->addr.mem.seg = ctxt->seg_override;
5047 op->val = 0;
5048 op->count = 1;
5049 break;
5050 case OpXLat:
5051 op->type = OP_MEM;
5052 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
5053 op->addr.mem.ea =
5054 address_mask(ctxt,
5055 reg_read(ctxt, VCPU_REGS_RBX) +
5056 (reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
5057 op->addr.mem.seg = ctxt->seg_override;
5058 op->val = 0;
5059 break;
5060 case OpImmFAddr:
5061 op->type = OP_IMM;
5062 op->addr.mem.ea = ctxt->_eip;
5063 op->bytes = ctxt->op_bytes + 2;
5064 insn_fetch_arr(op->valptr, op->bytes, ctxt);
5065 break;
5066 case OpMemFAddr:
5067 ctxt->memop.bytes = ctxt->op_bytes + 2;
5068 goto mem_common;
5069 case OpES:
5070 op->type = OP_IMM;
5071 op->val = VCPU_SREG_ES;
5072 break;
5073 case OpCS:
5074 op->type = OP_IMM;
5075 op->val = VCPU_SREG_CS;
5076 break;
5077 case OpSS:
5078 op->type = OP_IMM;
5079 op->val = VCPU_SREG_SS;
5080 break;
5081 case OpDS:
5082 op->type = OP_IMM;
5083 op->val = VCPU_SREG_DS;
5084 break;
5085 case OpFS:
5086 op->type = OP_IMM;
5087 op->val = VCPU_SREG_FS;
5088 break;
5089 case OpGS:
5090 op->type = OP_IMM;
5091 op->val = VCPU_SREG_GS;
5092 break;
5093 case OpImplicit:
5094 /* Special instructions do their own operand decoding. */
5095 default:
5096 op->type = OP_NONE; /* Disable writeback. */
5097 break;
5098 }
5099
5100 done:
5101 return rc;
5102 }
5103
x86_decode_insn(struct x86_emulate_ctxt * ctxt,void * insn,int insn_len,int emulation_type)5104 int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type)
5105 {
5106 int rc = X86EMUL_CONTINUE;
5107 int mode = ctxt->mode;
5108 int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
5109 bool op_prefix = false;
5110 bool has_seg_override = false;
5111 struct opcode opcode;
5112 u16 dummy;
5113 struct desc_struct desc;
5114
5115 ctxt->memop.type = OP_NONE;
5116 ctxt->memopp = NULL;
5117 ctxt->_eip = ctxt->eip;
5118 ctxt->fetch.ptr = ctxt->fetch.data;
5119 ctxt->fetch.end = ctxt->fetch.data + insn_len;
5120 ctxt->opcode_len = 1;
5121 ctxt->intercept = x86_intercept_none;
5122 if (insn_len > 0)
5123 memcpy(ctxt->fetch.data, insn, insn_len);
5124 else {
5125 rc = __do_insn_fetch_bytes(ctxt, 1);
5126 if (rc != X86EMUL_CONTINUE)
5127 goto done;
5128 }
5129
5130 switch (mode) {
5131 case X86EMUL_MODE_REAL:
5132 case X86EMUL_MODE_VM86:
5133 def_op_bytes = def_ad_bytes = 2;
5134 ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
5135 if (desc.d)
5136 def_op_bytes = def_ad_bytes = 4;
5137 break;
5138 case X86EMUL_MODE_PROT16:
5139 def_op_bytes = def_ad_bytes = 2;
5140 break;
5141 case X86EMUL_MODE_PROT32:
5142 def_op_bytes = def_ad_bytes = 4;
5143 break;
5144 #ifdef CONFIG_X86_64
5145 case X86EMUL_MODE_PROT64:
5146 def_op_bytes = 4;
5147 def_ad_bytes = 8;
5148 break;
5149 #endif
5150 default:
5151 return EMULATION_FAILED;
5152 }
5153
5154 ctxt->op_bytes = def_op_bytes;
5155 ctxt->ad_bytes = def_ad_bytes;
5156
5157 /* Legacy prefixes. */
5158 for (;;) {
5159 switch (ctxt->b = insn_fetch(u8, ctxt)) {
5160 case 0x66: /* operand-size override */
5161 op_prefix = true;
5162 /* switch between 2/4 bytes */
5163 ctxt->op_bytes = def_op_bytes ^ 6;
5164 break;
5165 case 0x67: /* address-size override */
5166 if (mode == X86EMUL_MODE_PROT64)
5167 /* switch between 4/8 bytes */
5168 ctxt->ad_bytes = def_ad_bytes ^ 12;
5169 else
5170 /* switch between 2/4 bytes */
5171 ctxt->ad_bytes = def_ad_bytes ^ 6;
5172 break;
5173 case 0x26: /* ES override */
5174 has_seg_override = true;
5175 ctxt->seg_override = VCPU_SREG_ES;
5176 break;
5177 case 0x2e: /* CS override */
5178 has_seg_override = true;
5179 ctxt->seg_override = VCPU_SREG_CS;
5180 break;
5181 case 0x36: /* SS override */
5182 has_seg_override = true;
5183 ctxt->seg_override = VCPU_SREG_SS;
5184 break;
5185 case 0x3e: /* DS override */
5186 has_seg_override = true;
5187 ctxt->seg_override = VCPU_SREG_DS;
5188 break;
5189 case 0x64: /* FS override */
5190 has_seg_override = true;
5191 ctxt->seg_override = VCPU_SREG_FS;
5192 break;
5193 case 0x65: /* GS override */
5194 has_seg_override = true;
5195 ctxt->seg_override = VCPU_SREG_GS;
5196 break;
5197 case 0x40 ... 0x4f: /* REX */
5198 if (mode != X86EMUL_MODE_PROT64)
5199 goto done_prefixes;
5200 ctxt->rex_prefix = ctxt->b;
5201 continue;
5202 case 0xf0: /* LOCK */
5203 ctxt->lock_prefix = 1;
5204 break;
5205 case 0xf2: /* REPNE/REPNZ */
5206 case 0xf3: /* REP/REPE/REPZ */
5207 ctxt->rep_prefix = ctxt->b;
5208 break;
5209 default:
5210 goto done_prefixes;
5211 }
5212
5213 /* Any legacy prefix after a REX prefix nullifies its effect. */
5214
5215 ctxt->rex_prefix = 0;
5216 }
5217
5218 done_prefixes:
5219
5220 /* REX prefix. */
5221 if (ctxt->rex_prefix & 8)
5222 ctxt->op_bytes = 8; /* REX.W */
5223
5224 /* Opcode byte(s). */
5225 opcode = opcode_table[ctxt->b];
5226 /* Two-byte opcode? */
5227 if (ctxt->b == 0x0f) {
5228 ctxt->opcode_len = 2;
5229 ctxt->b = insn_fetch(u8, ctxt);
5230 opcode = twobyte_table[ctxt->b];
5231
5232 /* 0F_38 opcode map */
5233 if (ctxt->b == 0x38) {
5234 ctxt->opcode_len = 3;
5235 ctxt->b = insn_fetch(u8, ctxt);
5236 opcode = opcode_map_0f_38[ctxt->b];
5237 }
5238 }
5239 ctxt->d = opcode.flags;
5240
5241 if (ctxt->d & ModRM)
5242 ctxt->modrm = insn_fetch(u8, ctxt);
5243
5244 /* vex-prefix instructions are not implemented */
5245 if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
5246 (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
5247 ctxt->d = NotImpl;
5248 }
5249
5250 while (ctxt->d & GroupMask) {
5251 switch (ctxt->d & GroupMask) {
5252 case Group:
5253 goffset = (ctxt->modrm >> 3) & 7;
5254 opcode = opcode.u.group[goffset];
5255 break;
5256 case GroupDual:
5257 goffset = (ctxt->modrm >> 3) & 7;
5258 if ((ctxt->modrm >> 6) == 3)
5259 opcode = opcode.u.gdual->mod3[goffset];
5260 else
5261 opcode = opcode.u.gdual->mod012[goffset];
5262 break;
5263 case RMExt:
5264 goffset = ctxt->modrm & 7;
5265 opcode = opcode.u.group[goffset];
5266 break;
5267 case Prefix:
5268 if (ctxt->rep_prefix && op_prefix)
5269 return EMULATION_FAILED;
5270 simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
5271 switch (simd_prefix) {
5272 case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
5273 case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
5274 case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
5275 case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
5276 }
5277 break;
5278 case Escape:
5279 if (ctxt->modrm > 0xbf) {
5280 size_t size = ARRAY_SIZE(opcode.u.esc->high);
5281 u32 index = array_index_nospec(
5282 ctxt->modrm - 0xc0, size);
5283
5284 opcode = opcode.u.esc->high[index];
5285 } else {
5286 opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
5287 }
5288 break;
5289 case InstrDual:
5290 if ((ctxt->modrm >> 6) == 3)
5291 opcode = opcode.u.idual->mod3;
5292 else
5293 opcode = opcode.u.idual->mod012;
5294 break;
5295 case ModeDual:
5296 if (ctxt->mode == X86EMUL_MODE_PROT64)
5297 opcode = opcode.u.mdual->mode64;
5298 else
5299 opcode = opcode.u.mdual->mode32;
5300 break;
5301 default:
5302 return EMULATION_FAILED;
5303 }
5304
5305 ctxt->d &= ~(u64)GroupMask;
5306 ctxt->d |= opcode.flags;
5307 }
5308
5309 ctxt->is_branch = opcode.flags & IsBranch;
5310
5311 /* Unrecognised? */
5312 if (ctxt->d == 0)
5313 return EMULATION_FAILED;
5314
5315 ctxt->execute = opcode.u.execute;
5316
5317 if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
5318 likely(!(ctxt->d & EmulateOnUD)))
5319 return EMULATION_FAILED;
5320
5321 if (unlikely(ctxt->d &
5322 (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
5323 No16))) {
5324 /*
5325 * These are copied unconditionally here, and checked unconditionally
5326 * in x86_emulate_insn.
5327 */
5328 ctxt->check_perm = opcode.check_perm;
5329 ctxt->intercept = opcode.intercept;
5330
5331 if (ctxt->d & NotImpl)
5332 return EMULATION_FAILED;
5333
5334 if (mode == X86EMUL_MODE_PROT64) {
5335 if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
5336 ctxt->op_bytes = 8;
5337 else if (ctxt->d & NearBranch)
5338 ctxt->op_bytes = 8;
5339 }
5340
5341 if (ctxt->d & Op3264) {
5342 if (mode == X86EMUL_MODE_PROT64)
5343 ctxt->op_bytes = 8;
5344 else
5345 ctxt->op_bytes = 4;
5346 }
5347
5348 if ((ctxt->d & No16) && ctxt->op_bytes == 2)
5349 ctxt->op_bytes = 4;
5350
5351 if (ctxt->d & Sse)
5352 ctxt->op_bytes = 16;
5353 else if (ctxt->d & Mmx)
5354 ctxt->op_bytes = 8;
5355 }
5356
5357 /* ModRM and SIB bytes. */
5358 if (ctxt->d & ModRM) {
5359 rc = decode_modrm(ctxt, &ctxt->memop);
5360 if (!has_seg_override) {
5361 has_seg_override = true;
5362 ctxt->seg_override = ctxt->modrm_seg;
5363 }
5364 } else if (ctxt->d & MemAbs)
5365 rc = decode_abs(ctxt, &ctxt->memop);
5366 if (rc != X86EMUL_CONTINUE)
5367 goto done;
5368
5369 if (!has_seg_override)
5370 ctxt->seg_override = VCPU_SREG_DS;
5371
5372 ctxt->memop.addr.mem.seg = ctxt->seg_override;
5373
5374 /*
5375 * Decode and fetch the source operand: register, memory
5376 * or immediate.
5377 */
5378 rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
5379 if (rc != X86EMUL_CONTINUE)
5380 goto done;
5381
5382 /*
5383 * Decode and fetch the second source operand: register, memory
5384 * or immediate.
5385 */
5386 rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
5387 if (rc != X86EMUL_CONTINUE)
5388 goto done;
5389
5390 /* Decode and fetch the destination operand: register or memory. */
5391 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
5392
5393 if (ctxt->rip_relative && likely(ctxt->memopp))
5394 ctxt->memopp->addr.mem.ea = address_mask(ctxt,
5395 ctxt->memopp->addr.mem.ea + ctxt->_eip);
5396
5397 done:
5398 if (rc == X86EMUL_PROPAGATE_FAULT)
5399 ctxt->have_exception = true;
5400 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
5401 }
5402
x86_page_table_writing_insn(struct x86_emulate_ctxt * ctxt)5403 bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
5404 {
5405 return ctxt->d & PageTable;
5406 }
5407
string_insn_completed(struct x86_emulate_ctxt * ctxt)5408 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
5409 {
5410 /* The second termination condition only applies for REPE
5411 * and REPNE. Test if the repeat string operation prefix is
5412 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
5413 * corresponding termination condition according to:
5414 * - if REPE/REPZ and ZF = 0 then done
5415 * - if REPNE/REPNZ and ZF = 1 then done
5416 */
5417 if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
5418 (ctxt->b == 0xae) || (ctxt->b == 0xaf))
5419 && (((ctxt->rep_prefix == REPE_PREFIX) &&
5420 ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
5421 || ((ctxt->rep_prefix == REPNE_PREFIX) &&
5422 ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
5423 return true;
5424
5425 return false;
5426 }
5427
flush_pending_x87_faults(struct x86_emulate_ctxt * ctxt)5428 static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
5429 {
5430 int rc;
5431
5432 kvm_fpu_get();
5433 rc = asm_safe("fwait");
5434 kvm_fpu_put();
5435
5436 if (unlikely(rc != X86EMUL_CONTINUE))
5437 return emulate_exception(ctxt, MF_VECTOR, 0, false);
5438
5439 return X86EMUL_CONTINUE;
5440 }
5441
fetch_possible_mmx_operand(struct operand * op)5442 static void fetch_possible_mmx_operand(struct operand *op)
5443 {
5444 if (op->type == OP_MM)
5445 kvm_read_mmx_reg(op->addr.mm, &op->mm_val);
5446 }
5447
fastop(struct x86_emulate_ctxt * ctxt,fastop_t fop)5448 static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop)
5449 {
5450 ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
5451
5452 if (!(ctxt->d & ByteOp))
5453 fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
5454
5455 asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
5456 : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
5457 [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
5458 : "c"(ctxt->src2.val));
5459
5460 ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
5461 if (!fop) /* exception is returned in fop variable */
5462 return emulate_de(ctxt);
5463 return X86EMUL_CONTINUE;
5464 }
5465
init_decode_cache(struct x86_emulate_ctxt * ctxt)5466 void init_decode_cache(struct x86_emulate_ctxt *ctxt)
5467 {
5468 /* Clear fields that are set conditionally but read without a guard. */
5469 ctxt->rip_relative = false;
5470 ctxt->rex_prefix = 0;
5471 ctxt->lock_prefix = 0;
5472 ctxt->rep_prefix = 0;
5473 ctxt->regs_valid = 0;
5474 ctxt->regs_dirty = 0;
5475
5476 ctxt->io_read.pos = 0;
5477 ctxt->io_read.end = 0;
5478 ctxt->mem_read.end = 0;
5479 }
5480
x86_emulate_insn(struct x86_emulate_ctxt * ctxt)5481 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
5482 {
5483 const struct x86_emulate_ops *ops = ctxt->ops;
5484 int rc = X86EMUL_CONTINUE;
5485 int saved_dst_type = ctxt->dst.type;
5486 unsigned emul_flags;
5487
5488 ctxt->mem_read.pos = 0;
5489
5490 /* LOCK prefix is allowed only with some instructions */
5491 if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
5492 rc = emulate_ud(ctxt);
5493 goto done;
5494 }
5495
5496 if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
5497 rc = emulate_ud(ctxt);
5498 goto done;
5499 }
5500
5501 emul_flags = ctxt->ops->get_hflags(ctxt);
5502 if (unlikely(ctxt->d &
5503 (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
5504 if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
5505 (ctxt->d & Undefined)) {
5506 rc = emulate_ud(ctxt);
5507 goto done;
5508 }
5509
5510 if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
5511 || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
5512 rc = emulate_ud(ctxt);
5513 goto done;
5514 }
5515
5516 if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
5517 rc = emulate_nm(ctxt);
5518 goto done;
5519 }
5520
5521 if (ctxt->d & Mmx) {
5522 rc = flush_pending_x87_faults(ctxt);
5523 if (rc != X86EMUL_CONTINUE)
5524 goto done;
5525 /*
5526 * Now that we know the fpu is exception safe, we can fetch
5527 * operands from it.
5528 */
5529 fetch_possible_mmx_operand(&ctxt->src);
5530 fetch_possible_mmx_operand(&ctxt->src2);
5531 if (!(ctxt->d & Mov))
5532 fetch_possible_mmx_operand(&ctxt->dst);
5533 }
5534
5535 if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) {
5536 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5537 X86_ICPT_PRE_EXCEPT);
5538 if (rc != X86EMUL_CONTINUE)
5539 goto done;
5540 }
5541
5542 /* Instruction can only be executed in protected mode */
5543 if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
5544 rc = emulate_ud(ctxt);
5545 goto done;
5546 }
5547
5548 /* Privileged instruction can be executed only in CPL=0 */
5549 if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
5550 if (ctxt->d & PrivUD)
5551 rc = emulate_ud(ctxt);
5552 else
5553 rc = emulate_gp(ctxt, 0);
5554 goto done;
5555 }
5556
5557 /* Do instruction specific permission checks */
5558 if (ctxt->d & CheckPerm) {
5559 rc = ctxt->check_perm(ctxt);
5560 if (rc != X86EMUL_CONTINUE)
5561 goto done;
5562 }
5563
5564 if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5565 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5566 X86_ICPT_POST_EXCEPT);
5567 if (rc != X86EMUL_CONTINUE)
5568 goto done;
5569 }
5570
5571 if (ctxt->rep_prefix && (ctxt->d & String)) {
5572 /* All REP prefixes have the same first termination condition */
5573 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
5574 string_registers_quirk(ctxt);
5575 ctxt->eip = ctxt->_eip;
5576 ctxt->eflags &= ~X86_EFLAGS_RF;
5577 goto done;
5578 }
5579 }
5580 }
5581
5582 if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
5583 rc = segmented_read(ctxt, ctxt->src.addr.mem,
5584 ctxt->src.valptr, ctxt->src.bytes);
5585 if (rc != X86EMUL_CONTINUE)
5586 goto done;
5587 ctxt->src.orig_val64 = ctxt->src.val64;
5588 }
5589
5590 if (ctxt->src2.type == OP_MEM) {
5591 rc = segmented_read(ctxt, ctxt->src2.addr.mem,
5592 &ctxt->src2.val, ctxt->src2.bytes);
5593 if (rc != X86EMUL_CONTINUE)
5594 goto done;
5595 }
5596
5597 if ((ctxt->d & DstMask) == ImplicitOps)
5598 goto special_insn;
5599
5600
5601 if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
5602 /* optimisation - avoid slow emulated read if Mov */
5603 rc = segmented_read(ctxt, ctxt->dst.addr.mem,
5604 &ctxt->dst.val, ctxt->dst.bytes);
5605 if (rc != X86EMUL_CONTINUE) {
5606 if (!(ctxt->d & NoWrite) &&
5607 rc == X86EMUL_PROPAGATE_FAULT &&
5608 ctxt->exception.vector == PF_VECTOR)
5609 ctxt->exception.error_code |= PFERR_WRITE_MASK;
5610 goto done;
5611 }
5612 }
5613 /* Copy full 64-bit value for CMPXCHG8B. */
5614 ctxt->dst.orig_val64 = ctxt->dst.val64;
5615
5616 special_insn:
5617
5618 if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5619 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5620 X86_ICPT_POST_MEMACCESS);
5621 if (rc != X86EMUL_CONTINUE)
5622 goto done;
5623 }
5624
5625 if (ctxt->rep_prefix && (ctxt->d & String))
5626 ctxt->eflags |= X86_EFLAGS_RF;
5627 else
5628 ctxt->eflags &= ~X86_EFLAGS_RF;
5629
5630 if (ctxt->execute) {
5631 if (ctxt->d & Fastop)
5632 rc = fastop(ctxt, ctxt->fop);
5633 else
5634 rc = ctxt->execute(ctxt);
5635 if (rc != X86EMUL_CONTINUE)
5636 goto done;
5637 goto writeback;
5638 }
5639
5640 if (ctxt->opcode_len == 2)
5641 goto twobyte_insn;
5642 else if (ctxt->opcode_len == 3)
5643 goto threebyte_insn;
5644
5645 switch (ctxt->b) {
5646 case 0x70 ... 0x7f: /* jcc (short) */
5647 if (test_cc(ctxt->b, ctxt->eflags))
5648 rc = jmp_rel(ctxt, ctxt->src.val);
5649 break;
5650 case 0x8d: /* lea r16/r32, m */
5651 ctxt->dst.val = ctxt->src.addr.mem.ea;
5652 break;
5653 case 0x90 ... 0x97: /* nop / xchg reg, rax */
5654 if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
5655 ctxt->dst.type = OP_NONE;
5656 else
5657 rc = em_xchg(ctxt);
5658 break;
5659 case 0x98: /* cbw/cwde/cdqe */
5660 switch (ctxt->op_bytes) {
5661 case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
5662 case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
5663 case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
5664 }
5665 break;
5666 case 0xcc: /* int3 */
5667 rc = emulate_int(ctxt, 3);
5668 break;
5669 case 0xcd: /* int n */
5670 rc = emulate_int(ctxt, ctxt->src.val);
5671 break;
5672 case 0xce: /* into */
5673 if (ctxt->eflags & X86_EFLAGS_OF)
5674 rc = emulate_int(ctxt, 4);
5675 break;
5676 case 0xe9: /* jmp rel */
5677 case 0xeb: /* jmp rel short */
5678 rc = jmp_rel(ctxt, ctxt->src.val);
5679 ctxt->dst.type = OP_NONE; /* Disable writeback. */
5680 break;
5681 case 0xf4: /* hlt */
5682 ctxt->ops->halt(ctxt);
5683 break;
5684 case 0xf5: /* cmc */
5685 /* complement carry flag from eflags reg */
5686 ctxt->eflags ^= X86_EFLAGS_CF;
5687 break;
5688 case 0xf8: /* clc */
5689 ctxt->eflags &= ~X86_EFLAGS_CF;
5690 break;
5691 case 0xf9: /* stc */
5692 ctxt->eflags |= X86_EFLAGS_CF;
5693 break;
5694 case 0xfc: /* cld */
5695 ctxt->eflags &= ~X86_EFLAGS_DF;
5696 break;
5697 case 0xfd: /* std */
5698 ctxt->eflags |= X86_EFLAGS_DF;
5699 break;
5700 default:
5701 goto cannot_emulate;
5702 }
5703
5704 if (rc != X86EMUL_CONTINUE)
5705 goto done;
5706
5707 writeback:
5708 if (ctxt->d & SrcWrite) {
5709 BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
5710 rc = writeback(ctxt, &ctxt->src);
5711 if (rc != X86EMUL_CONTINUE)
5712 goto done;
5713 }
5714 if (!(ctxt->d & NoWrite)) {
5715 rc = writeback(ctxt, &ctxt->dst);
5716 if (rc != X86EMUL_CONTINUE)
5717 goto done;
5718 }
5719
5720 /*
5721 * restore dst type in case the decoding will be reused
5722 * (happens for string instruction )
5723 */
5724 ctxt->dst.type = saved_dst_type;
5725
5726 if ((ctxt->d & SrcMask) == SrcSI)
5727 string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
5728
5729 if ((ctxt->d & DstMask) == DstDI)
5730 string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
5731
5732 if (ctxt->rep_prefix && (ctxt->d & String)) {
5733 unsigned int count;
5734 struct read_cache *r = &ctxt->io_read;
5735 if ((ctxt->d & SrcMask) == SrcSI)
5736 count = ctxt->src.count;
5737 else
5738 count = ctxt->dst.count;
5739 register_address_increment(ctxt, VCPU_REGS_RCX, -count);
5740
5741 if (!string_insn_completed(ctxt)) {
5742 /*
5743 * Re-enter guest when pio read ahead buffer is empty
5744 * or, if it is not used, after each 1024 iteration.
5745 */
5746 if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
5747 (r->end == 0 || r->end != r->pos)) {
5748 /*
5749 * Reset read cache. Usually happens before
5750 * decode, but since instruction is restarted
5751 * we have to do it here.
5752 */
5753 ctxt->mem_read.end = 0;
5754 writeback_registers(ctxt);
5755 return EMULATION_RESTART;
5756 }
5757 goto done; /* skip rip writeback */
5758 }
5759 ctxt->eflags &= ~X86_EFLAGS_RF;
5760 }
5761
5762 ctxt->eip = ctxt->_eip;
5763 if (ctxt->mode != X86EMUL_MODE_PROT64)
5764 ctxt->eip = (u32)ctxt->_eip;
5765
5766 done:
5767 if (rc == X86EMUL_PROPAGATE_FAULT) {
5768 if (KVM_EMULATOR_BUG_ON(ctxt->exception.vector > 0x1f, ctxt))
5769 return EMULATION_FAILED;
5770 ctxt->have_exception = true;
5771 }
5772 if (rc == X86EMUL_INTERCEPTED)
5773 return EMULATION_INTERCEPTED;
5774
5775 if (rc == X86EMUL_CONTINUE)
5776 writeback_registers(ctxt);
5777
5778 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
5779
5780 twobyte_insn:
5781 switch (ctxt->b) {
5782 case 0x09: /* wbinvd */
5783 (ctxt->ops->wbinvd)(ctxt);
5784 break;
5785 case 0x08: /* invd */
5786 case 0x0d: /* GrpP (prefetch) */
5787 case 0x18: /* Grp16 (prefetch/nop) */
5788 case 0x1f: /* nop */
5789 break;
5790 case 0x20: /* mov cr, reg */
5791 ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
5792 break;
5793 case 0x21: /* mov from dr to reg */
5794 ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
5795 break;
5796 case 0x40 ... 0x4f: /* cmov */
5797 if (test_cc(ctxt->b, ctxt->eflags))
5798 ctxt->dst.val = ctxt->src.val;
5799 else if (ctxt->op_bytes != 4)
5800 ctxt->dst.type = OP_NONE; /* no writeback */
5801 break;
5802 case 0x80 ... 0x8f: /* jnz rel, etc*/
5803 if (test_cc(ctxt->b, ctxt->eflags))
5804 rc = jmp_rel(ctxt, ctxt->src.val);
5805 break;
5806 case 0x90 ... 0x9f: /* setcc r/m8 */
5807 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
5808 break;
5809 case 0xb6 ... 0xb7: /* movzx */
5810 ctxt->dst.bytes = ctxt->op_bytes;
5811 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
5812 : (u16) ctxt->src.val;
5813 break;
5814 case 0xbe ... 0xbf: /* movsx */
5815 ctxt->dst.bytes = ctxt->op_bytes;
5816 ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
5817 (s16) ctxt->src.val;
5818 break;
5819 default:
5820 goto cannot_emulate;
5821 }
5822
5823 threebyte_insn:
5824
5825 if (rc != X86EMUL_CONTINUE)
5826 goto done;
5827
5828 goto writeback;
5829
5830 cannot_emulate:
5831 return EMULATION_FAILED;
5832 }
5833
emulator_invalidate_register_cache(struct x86_emulate_ctxt * ctxt)5834 void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
5835 {
5836 invalidate_registers(ctxt);
5837 }
5838
emulator_writeback_register_cache(struct x86_emulate_ctxt * ctxt)5839 void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
5840 {
5841 writeback_registers(ctxt);
5842 }
5843
emulator_can_use_gpa(struct x86_emulate_ctxt * ctxt)5844 bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt)
5845 {
5846 if (ctxt->rep_prefix && (ctxt->d & String))
5847 return false;
5848
5849 if (ctxt->d & TwoMemOp)
5850 return false;
5851
5852 return true;
5853 }
5854