1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * xsave/xrstor support.
4  *
5  * Author: Suresh Siddha <suresh.b.siddha@intel.com>
6  */
7 #include <linux/bitops.h>
8 #include <linux/compat.h>
9 #include <linux/cpu.h>
10 #include <linux/mman.h>
11 #include <linux/nospec.h>
12 #include <linux/pkeys.h>
13 #include <linux/seq_file.h>
14 #include <linux/proc_fs.h>
15 #include <linux/vmalloc.h>
16 
17 #include <asm/fpu/api.h>
18 #include <asm/fpu/regset.h>
19 #include <asm/fpu/signal.h>
20 #include <asm/fpu/xcr.h>
21 
22 #include <asm/tlbflush.h>
23 #include <asm/prctl.h>
24 #include <asm/elf.h>
25 
26 #include "context.h"
27 #include "internal.h"
28 #include "legacy.h"
29 #include "xstate.h"
30 
31 #define for_each_extended_xfeature(bit, mask)				\
32 	(bit) = FIRST_EXTENDED_XFEATURE;				\
33 	for_each_set_bit_from(bit, (unsigned long *)&(mask), 8 * sizeof(mask))
34 
35 /*
36  * Although we spell it out in here, the Processor Trace
37  * xfeature is completely unused.  We use other mechanisms
38  * to save/restore PT state in Linux.
39  */
40 static const char *xfeature_names[] =
41 {
42 	"x87 floating point registers",
43 	"SSE registers",
44 	"AVX registers",
45 	"MPX bounds registers",
46 	"MPX CSR",
47 	"AVX-512 opmask",
48 	"AVX-512 Hi256",
49 	"AVX-512 ZMM_Hi256",
50 	"Processor Trace (unused)",
51 	"Protection Keys User registers",
52 	"PASID state",
53 	"Control-flow User registers",
54 	"Control-flow Kernel registers (unused)",
55 	"unknown xstate feature",
56 	"unknown xstate feature",
57 	"unknown xstate feature",
58 	"unknown xstate feature",
59 	"AMX Tile config",
60 	"AMX Tile data",
61 	"unknown xstate feature",
62 };
63 
64 static unsigned short xsave_cpuid_features[] __initdata = {
65 	[XFEATURE_FP]				= X86_FEATURE_FPU,
66 	[XFEATURE_SSE]				= X86_FEATURE_XMM,
67 	[XFEATURE_YMM]				= X86_FEATURE_AVX,
68 	[XFEATURE_BNDREGS]			= X86_FEATURE_MPX,
69 	[XFEATURE_BNDCSR]			= X86_FEATURE_MPX,
70 	[XFEATURE_OPMASK]			= X86_FEATURE_AVX512F,
71 	[XFEATURE_ZMM_Hi256]			= X86_FEATURE_AVX512F,
72 	[XFEATURE_Hi16_ZMM]			= X86_FEATURE_AVX512F,
73 	[XFEATURE_PT_UNIMPLEMENTED_SO_FAR]	= X86_FEATURE_INTEL_PT,
74 	[XFEATURE_PKRU]				= X86_FEATURE_OSPKE,
75 	[XFEATURE_PASID]			= X86_FEATURE_ENQCMD,
76 	[XFEATURE_CET_USER]			= X86_FEATURE_SHSTK,
77 	[XFEATURE_XTILE_CFG]			= X86_FEATURE_AMX_TILE,
78 	[XFEATURE_XTILE_DATA]			= X86_FEATURE_AMX_TILE,
79 };
80 
81 static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init =
82 	{ [ 0 ... XFEATURE_MAX - 1] = -1};
83 static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init =
84 	{ [ 0 ... XFEATURE_MAX - 1] = -1};
85 static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init;
86 
87 #define XSTATE_FLAG_SUPERVISOR	BIT(0)
88 #define XSTATE_FLAG_ALIGNED64	BIT(1)
89 
90 /*
91  * Return whether the system supports a given xfeature.
92  *
93  * Also return the name of the (most advanced) feature that the caller requested:
94  */
cpu_has_xfeatures(u64 xfeatures_needed,const char ** feature_name)95 int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
96 {
97 	u64 xfeatures_missing = xfeatures_needed & ~fpu_kernel_cfg.max_features;
98 
99 	if (unlikely(feature_name)) {
100 		long xfeature_idx, max_idx;
101 		u64 xfeatures_print;
102 		/*
103 		 * So we use FLS here to be able to print the most advanced
104 		 * feature that was requested but is missing. So if a driver
105 		 * asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the
106 		 * missing AVX feature - this is the most informative message
107 		 * to users:
108 		 */
109 		if (xfeatures_missing)
110 			xfeatures_print = xfeatures_missing;
111 		else
112 			xfeatures_print = xfeatures_needed;
113 
114 		xfeature_idx = fls64(xfeatures_print)-1;
115 		max_idx = ARRAY_SIZE(xfeature_names)-1;
116 		xfeature_idx = min(xfeature_idx, max_idx);
117 
118 		*feature_name = xfeature_names[xfeature_idx];
119 	}
120 
121 	if (xfeatures_missing)
122 		return 0;
123 
124 	return 1;
125 }
126 EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
127 
xfeature_is_aligned64(int xfeature_nr)128 static bool xfeature_is_aligned64(int xfeature_nr)
129 {
130 	return xstate_flags[xfeature_nr] & XSTATE_FLAG_ALIGNED64;
131 }
132 
xfeature_is_supervisor(int xfeature_nr)133 static bool xfeature_is_supervisor(int xfeature_nr)
134 {
135 	return xstate_flags[xfeature_nr] & XSTATE_FLAG_SUPERVISOR;
136 }
137 
xfeature_get_offset(u64 xcomp_bv,int xfeature)138 static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature)
139 {
140 	unsigned int offs, i;
141 
142 	/*
143 	 * Non-compacted format and legacy features use the cached fixed
144 	 * offsets.
145 	 */
146 	if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) ||
147 	    xfeature <= XFEATURE_SSE)
148 		return xstate_offsets[xfeature];
149 
150 	/*
151 	 * Compacted format offsets depend on the actual content of the
152 	 * compacted xsave area which is determined by the xcomp_bv header
153 	 * field.
154 	 */
155 	offs = FXSAVE_SIZE + XSAVE_HDR_SIZE;
156 	for_each_extended_xfeature(i, xcomp_bv) {
157 		if (xfeature_is_aligned64(i))
158 			offs = ALIGN(offs, 64);
159 		if (i == xfeature)
160 			break;
161 		offs += xstate_sizes[i];
162 	}
163 	return offs;
164 }
165 
166 /*
167  * Enable the extended processor state save/restore feature.
168  * Called once per CPU onlining.
169  */
fpu__init_cpu_xstate(void)170 void fpu__init_cpu_xstate(void)
171 {
172 	if (!boot_cpu_has(X86_FEATURE_XSAVE) || !fpu_kernel_cfg.max_features)
173 		return;
174 
175 	cr4_set_bits(X86_CR4_OSXSAVE);
176 
177 	/*
178 	 * Must happen after CR4 setup and before xsetbv() to allow KVM
179 	 * lazy passthrough.  Write independent of the dynamic state static
180 	 * key as that does not work on the boot CPU. This also ensures
181 	 * that any stale state is wiped out from XFD.
182 	 */
183 	if (cpu_feature_enabled(X86_FEATURE_XFD))
184 		wrmsrl(MSR_IA32_XFD, init_fpstate.xfd);
185 
186 	/*
187 	 * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
188 	 * managed by XSAVE{C, OPT, S} and XRSTOR{S}.  Only XSAVE user
189 	 * states can be set here.
190 	 */
191 	xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
192 
193 	/*
194 	 * MSR_IA32_XSS sets supervisor states managed by XSAVES.
195 	 */
196 	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
197 		wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
198 				     xfeatures_mask_independent());
199 	}
200 }
201 
xfeature_enabled(enum xfeature xfeature)202 static bool xfeature_enabled(enum xfeature xfeature)
203 {
204 	return fpu_kernel_cfg.max_features & BIT_ULL(xfeature);
205 }
206 
207 /*
208  * Record the offsets and sizes of various xstates contained
209  * in the XSAVE state memory layout.
210  */
setup_xstate_cache(void)211 static void __init setup_xstate_cache(void)
212 {
213 	u32 eax, ebx, ecx, edx, i;
214 	/* start at the beginning of the "extended state" */
215 	unsigned int last_good_offset = offsetof(struct xregs_state,
216 						 extended_state_area);
217 	/*
218 	 * The FP xstates and SSE xstates are legacy states. They are always
219 	 * in the fixed offsets in the xsave area in either compacted form
220 	 * or standard form.
221 	 */
222 	xstate_offsets[XFEATURE_FP]	= 0;
223 	xstate_sizes[XFEATURE_FP]	= offsetof(struct fxregs_state,
224 						   xmm_space);
225 
226 	xstate_offsets[XFEATURE_SSE]	= xstate_sizes[XFEATURE_FP];
227 	xstate_sizes[XFEATURE_SSE]	= sizeof_field(struct fxregs_state,
228 						       xmm_space);
229 
230 	for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
231 		cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
232 
233 		xstate_sizes[i] = eax;
234 		xstate_flags[i] = ecx;
235 
236 		/*
237 		 * If an xfeature is supervisor state, the offset in EBX is
238 		 * invalid, leave it to -1.
239 		 */
240 		if (xfeature_is_supervisor(i))
241 			continue;
242 
243 		xstate_offsets[i] = ebx;
244 
245 		/*
246 		 * In our xstate size checks, we assume that the highest-numbered
247 		 * xstate feature has the highest offset in the buffer.  Ensure
248 		 * it does.
249 		 */
250 		WARN_ONCE(last_good_offset > xstate_offsets[i],
251 			  "x86/fpu: misordered xstate at %d\n", last_good_offset);
252 
253 		last_good_offset = xstate_offsets[i];
254 	}
255 }
256 
print_xstate_feature(u64 xstate_mask)257 static void __init print_xstate_feature(u64 xstate_mask)
258 {
259 	const char *feature_name;
260 
261 	if (cpu_has_xfeatures(xstate_mask, &feature_name))
262 		pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", xstate_mask, feature_name);
263 }
264 
265 /*
266  * Print out all the supported xstate features:
267  */
print_xstate_features(void)268 static void __init print_xstate_features(void)
269 {
270 	print_xstate_feature(XFEATURE_MASK_FP);
271 	print_xstate_feature(XFEATURE_MASK_SSE);
272 	print_xstate_feature(XFEATURE_MASK_YMM);
273 	print_xstate_feature(XFEATURE_MASK_BNDREGS);
274 	print_xstate_feature(XFEATURE_MASK_BNDCSR);
275 	print_xstate_feature(XFEATURE_MASK_OPMASK);
276 	print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
277 	print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
278 	print_xstate_feature(XFEATURE_MASK_PKRU);
279 	print_xstate_feature(XFEATURE_MASK_PASID);
280 	print_xstate_feature(XFEATURE_MASK_CET_USER);
281 	print_xstate_feature(XFEATURE_MASK_XTILE_CFG);
282 	print_xstate_feature(XFEATURE_MASK_XTILE_DATA);
283 }
284 
285 /*
286  * This check is important because it is easy to get XSTATE_*
287  * confused with XSTATE_BIT_*.
288  */
289 #define CHECK_XFEATURE(nr) do {		\
290 	WARN_ON(nr < FIRST_EXTENDED_XFEATURE);	\
291 	WARN_ON(nr >= XFEATURE_MAX);	\
292 } while (0)
293 
294 /*
295  * Print out xstate component offsets and sizes
296  */
print_xstate_offset_size(void)297 static void __init print_xstate_offset_size(void)
298 {
299 	int i;
300 
301 	for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
302 		pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n",
303 			i, xfeature_get_offset(fpu_kernel_cfg.max_features, i),
304 			i, xstate_sizes[i]);
305 	}
306 }
307 
308 /*
309  * This function is called only during boot time when x86 caps are not set
310  * up and alternative can not be used yet.
311  */
os_xrstor_booting(struct xregs_state * xstate)312 static __init void os_xrstor_booting(struct xregs_state *xstate)
313 {
314 	u64 mask = fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSTATE;
315 	u32 lmask = mask;
316 	u32 hmask = mask >> 32;
317 	int err;
318 
319 	if (cpu_feature_enabled(X86_FEATURE_XSAVES))
320 		XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
321 	else
322 		XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
323 
324 	/*
325 	 * We should never fault when copying from a kernel buffer, and the FPU
326 	 * state we set at boot time should be valid.
327 	 */
328 	WARN_ON_FPU(err);
329 }
330 
331 /*
332  * All supported features have either init state all zeros or are
333  * handled in setup_init_fpu() individually. This is an explicit
334  * feature list and does not use XFEATURE_MASK*SUPPORTED to catch
335  * newly added supported features at build time and make people
336  * actually look at the init state for the new feature.
337  */
338 #define XFEATURES_INIT_FPSTATE_HANDLED		\
339 	(XFEATURE_MASK_FP |			\
340 	 XFEATURE_MASK_SSE |			\
341 	 XFEATURE_MASK_YMM |			\
342 	 XFEATURE_MASK_OPMASK |			\
343 	 XFEATURE_MASK_ZMM_Hi256 |		\
344 	 XFEATURE_MASK_Hi16_ZMM	 |		\
345 	 XFEATURE_MASK_PKRU |			\
346 	 XFEATURE_MASK_BNDREGS |		\
347 	 XFEATURE_MASK_BNDCSR |			\
348 	 XFEATURE_MASK_PASID |			\
349 	 XFEATURE_MASK_CET_USER |		\
350 	 XFEATURE_MASK_XTILE)
351 
352 /*
353  * setup the xstate image representing the init state
354  */
setup_init_fpu_buf(void)355 static void __init setup_init_fpu_buf(void)
356 {
357 	BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED |
358 		      XFEATURE_MASK_SUPERVISOR_SUPPORTED) !=
359 		     XFEATURES_INIT_FPSTATE_HANDLED);
360 
361 	if (!boot_cpu_has(X86_FEATURE_XSAVE))
362 		return;
363 
364 	print_xstate_features();
365 
366 	xstate_init_xcomp_bv(&init_fpstate.regs.xsave, init_fpstate.xfeatures);
367 
368 	/*
369 	 * Init all the features state with header.xfeatures being 0x0
370 	 */
371 	os_xrstor_booting(&init_fpstate.regs.xsave);
372 
373 	/*
374 	 * All components are now in init state. Read the state back so
375 	 * that init_fpstate contains all non-zero init state. This only
376 	 * works with XSAVE, but not with XSAVEOPT and XSAVEC/S because
377 	 * those use the init optimization which skips writing data for
378 	 * components in init state.
379 	 *
380 	 * XSAVE could be used, but that would require to reshuffle the
381 	 * data when XSAVEC/S is available because XSAVEC/S uses xstate
382 	 * compaction. But doing so is a pointless exercise because most
383 	 * components have an all zeros init state except for the legacy
384 	 * ones (FP and SSE). Those can be saved with FXSAVE into the
385 	 * legacy area. Adding new features requires to ensure that init
386 	 * state is all zeroes or if not to add the necessary handling
387 	 * here.
388 	 */
389 	fxsave(&init_fpstate.regs.fxsave);
390 }
391 
xfeature_size(int xfeature_nr)392 int xfeature_size(int xfeature_nr)
393 {
394 	u32 eax, ebx, ecx, edx;
395 
396 	CHECK_XFEATURE(xfeature_nr);
397 	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
398 	return eax;
399 }
400 
401 /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
validate_user_xstate_header(const struct xstate_header * hdr,struct fpstate * fpstate)402 static int validate_user_xstate_header(const struct xstate_header *hdr,
403 				       struct fpstate *fpstate)
404 {
405 	/* No unknown or supervisor features may be set */
406 	if (hdr->xfeatures & ~fpstate->user_xfeatures)
407 		return -EINVAL;
408 
409 	/* Userspace must use the uncompacted format */
410 	if (hdr->xcomp_bv)
411 		return -EINVAL;
412 
413 	/*
414 	 * If 'reserved' is shrunken to add a new field, make sure to validate
415 	 * that new field here!
416 	 */
417 	BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
418 
419 	/* No reserved bits may be set */
420 	if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
421 		return -EINVAL;
422 
423 	return 0;
424 }
425 
__xstate_dump_leaves(void)426 static void __init __xstate_dump_leaves(void)
427 {
428 	int i;
429 	u32 eax, ebx, ecx, edx;
430 	static int should_dump = 1;
431 
432 	if (!should_dump)
433 		return;
434 	should_dump = 0;
435 	/*
436 	 * Dump out a few leaves past the ones that we support
437 	 * just in case there are some goodies up there
438 	 */
439 	for (i = 0; i < XFEATURE_MAX + 10; i++) {
440 		cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
441 		pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n",
442 			XSTATE_CPUID, i, eax, ebx, ecx, edx);
443 	}
444 }
445 
446 #define XSTATE_WARN_ON(x, fmt, ...) do {					\
447 	if (WARN_ONCE(x, "XSAVE consistency problem: " fmt, ##__VA_ARGS__)) {	\
448 		__xstate_dump_leaves();						\
449 	}									\
450 } while (0)
451 
452 #define XCHECK_SZ(sz, nr, __struct) ({					\
453 	if (WARN_ONCE(sz != sizeof(__struct),				\
454 	    "[%s]: struct is %zu bytes, cpu state %d bytes\n",		\
455 	    xfeature_names[nr], sizeof(__struct), sz)) {		\
456 		__xstate_dump_leaves();					\
457 	}								\
458 	true;								\
459 })
460 
461 
462 /**
463  * check_xtile_data_against_struct - Check tile data state size.
464  *
465  * Calculate the state size by multiplying the single tile size which is
466  * recorded in a C struct, and the number of tiles that the CPU informs.
467  * Compare the provided size with the calculation.
468  *
469  * @size:	The tile data state size
470  *
471  * Returns:	0 on success, -EINVAL on mismatch.
472  */
check_xtile_data_against_struct(int size)473 static int __init check_xtile_data_against_struct(int size)
474 {
475 	u32 max_palid, palid, state_size;
476 	u32 eax, ebx, ecx, edx;
477 	u16 max_tile;
478 
479 	/*
480 	 * Check the maximum palette id:
481 	 *   eax: the highest numbered palette subleaf.
482 	 */
483 	cpuid_count(TILE_CPUID, 0, &max_palid, &ebx, &ecx, &edx);
484 
485 	/*
486 	 * Cross-check each tile size and find the maximum number of
487 	 * supported tiles.
488 	 */
489 	for (palid = 1, max_tile = 0; palid <= max_palid; palid++) {
490 		u16 tile_size, max;
491 
492 		/*
493 		 * Check the tile size info:
494 		 *   eax[31:16]:  bytes per title
495 		 *   ebx[31:16]:  the max names (or max number of tiles)
496 		 */
497 		cpuid_count(TILE_CPUID, palid, &eax, &ebx, &edx, &edx);
498 		tile_size = eax >> 16;
499 		max = ebx >> 16;
500 
501 		if (tile_size != sizeof(struct xtile_data)) {
502 			pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n",
503 			       __stringify(XFEATURE_XTILE_DATA),
504 			       sizeof(struct xtile_data), tile_size);
505 			__xstate_dump_leaves();
506 			return -EINVAL;
507 		}
508 
509 		if (max > max_tile)
510 			max_tile = max;
511 	}
512 
513 	state_size = sizeof(struct xtile_data) * max_tile;
514 	if (size != state_size) {
515 		pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n",
516 		       __stringify(XFEATURE_XTILE_DATA), state_size, size);
517 		__xstate_dump_leaves();
518 		return -EINVAL;
519 	}
520 	return 0;
521 }
522 
523 /*
524  * We have a C struct for each 'xstate'.  We need to ensure
525  * that our software representation matches what the CPU
526  * tells us about the state's size.
527  */
check_xstate_against_struct(int nr)528 static bool __init check_xstate_against_struct(int nr)
529 {
530 	/*
531 	 * Ask the CPU for the size of the state.
532 	 */
533 	int sz = xfeature_size(nr);
534 
535 	/*
536 	 * Match each CPU state with the corresponding software
537 	 * structure.
538 	 */
539 	switch (nr) {
540 	case XFEATURE_YMM:	  return XCHECK_SZ(sz, nr, struct ymmh_struct);
541 	case XFEATURE_BNDREGS:	  return XCHECK_SZ(sz, nr, struct mpx_bndreg_state);
542 	case XFEATURE_BNDCSR:	  return XCHECK_SZ(sz, nr, struct mpx_bndcsr_state);
543 	case XFEATURE_OPMASK:	  return XCHECK_SZ(sz, nr, struct avx_512_opmask_state);
544 	case XFEATURE_ZMM_Hi256:  return XCHECK_SZ(sz, nr, struct avx_512_zmm_uppers_state);
545 	case XFEATURE_Hi16_ZMM:	  return XCHECK_SZ(sz, nr, struct avx_512_hi16_state);
546 	case XFEATURE_PKRU:	  return XCHECK_SZ(sz, nr, struct pkru_state);
547 	case XFEATURE_PASID:	  return XCHECK_SZ(sz, nr, struct ia32_pasid_state);
548 	case XFEATURE_XTILE_CFG:  return XCHECK_SZ(sz, nr, struct xtile_cfg);
549 	case XFEATURE_CET_USER:	  return XCHECK_SZ(sz, nr, struct cet_user_state);
550 	case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true;
551 	default:
552 		XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr);
553 		return false;
554 	}
555 
556 	return true;
557 }
558 
xstate_calculate_size(u64 xfeatures,bool compacted)559 static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
560 {
561 	unsigned int topmost = fls64(xfeatures) -  1;
562 	unsigned int offset = xstate_offsets[topmost];
563 
564 	if (topmost <= XFEATURE_SSE)
565 		return sizeof(struct xregs_state);
566 
567 	if (compacted)
568 		offset = xfeature_get_offset(xfeatures, topmost);
569 	return offset + xstate_sizes[topmost];
570 }
571 
572 /*
573  * This essentially double-checks what the cpu told us about
574  * how large the XSAVE buffer needs to be.  We are recalculating
575  * it to be safe.
576  *
577  * Independent XSAVE features allocate their own buffers and are not
578  * covered by these checks. Only the size of the buffer for task->fpu
579  * is checked here.
580  */
paranoid_xstate_size_valid(unsigned int kernel_size)581 static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
582 {
583 	bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
584 	bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES);
585 	unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
586 	int i;
587 
588 	for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
589 		if (!check_xstate_against_struct(i))
590 			return false;
591 		/*
592 		 * Supervisor state components can be managed only by
593 		 * XSAVES.
594 		 */
595 		if (!xsaves && xfeature_is_supervisor(i)) {
596 			XSTATE_WARN_ON(1, "Got supervisor feature %d, but XSAVES not advertised\n", i);
597 			return false;
598 		}
599 	}
600 	size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted);
601 	XSTATE_WARN_ON(size != kernel_size,
602 		       "size %u != kernel_size %u\n", size, kernel_size);
603 	return size == kernel_size;
604 }
605 
606 /*
607  * Get total size of enabled xstates in XCR0 | IA32_XSS.
608  *
609  * Note the SDM's wording here.  "sub-function 0" only enumerates
610  * the size of the *user* states.  If we use it to size a buffer
611  * that we use 'XSAVES' on, we could potentially overflow the
612  * buffer because 'XSAVES' saves system states too.
613  *
614  * This also takes compaction into account. So this works for
615  * XSAVEC as well.
616  */
get_compacted_size(void)617 static unsigned int __init get_compacted_size(void)
618 {
619 	unsigned int eax, ebx, ecx, edx;
620 	/*
621 	 * - CPUID function 0DH, sub-function 1:
622 	 *    EBX enumerates the size (in bytes) required by
623 	 *    the XSAVES instruction for an XSAVE area
624 	 *    containing all the state components
625 	 *    corresponding to bits currently set in
626 	 *    XCR0 | IA32_XSS.
627 	 *
628 	 * When XSAVES is not available but XSAVEC is (virt), then there
629 	 * are no supervisor states, but XSAVEC still uses compacted
630 	 * format.
631 	 */
632 	cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
633 	return ebx;
634 }
635 
636 /*
637  * Get the total size of the enabled xstates without the independent supervisor
638  * features.
639  */
get_xsave_compacted_size(void)640 static unsigned int __init get_xsave_compacted_size(void)
641 {
642 	u64 mask = xfeatures_mask_independent();
643 	unsigned int size;
644 
645 	if (!mask)
646 		return get_compacted_size();
647 
648 	/* Disable independent features. */
649 	wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
650 
651 	/*
652 	 * Ask the hardware what size is required of the buffer.
653 	 * This is the size required for the task->fpu buffer.
654 	 */
655 	size = get_compacted_size();
656 
657 	/* Re-enable independent features so XSAVES will work on them again. */
658 	wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
659 
660 	return size;
661 }
662 
get_xsave_size_user(void)663 static unsigned int __init get_xsave_size_user(void)
664 {
665 	unsigned int eax, ebx, ecx, edx;
666 	/*
667 	 * - CPUID function 0DH, sub-function 0:
668 	 *    EBX enumerates the size (in bytes) required by
669 	 *    the XSAVE instruction for an XSAVE area
670 	 *    containing all the *user* state components
671 	 *    corresponding to bits currently set in XCR0.
672 	 */
673 	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
674 	return ebx;
675 }
676 
init_xstate_size(void)677 static int __init init_xstate_size(void)
678 {
679 	/* Recompute the context size for enabled features: */
680 	unsigned int user_size, kernel_size, kernel_default_size;
681 	bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
682 
683 	/* Uncompacted user space size */
684 	user_size = get_xsave_size_user();
685 
686 	/*
687 	 * XSAVES kernel size includes supervisor states and uses compacted
688 	 * format. XSAVEC uses compacted format, but does not save
689 	 * supervisor states.
690 	 *
691 	 * XSAVE[OPT] do not support supervisor states so kernel and user
692 	 * size is identical.
693 	 */
694 	if (compacted)
695 		kernel_size = get_xsave_compacted_size();
696 	else
697 		kernel_size = user_size;
698 
699 	kernel_default_size =
700 		xstate_calculate_size(fpu_kernel_cfg.default_features, compacted);
701 
702 	if (!paranoid_xstate_size_valid(kernel_size))
703 		return -EINVAL;
704 
705 	fpu_kernel_cfg.max_size = kernel_size;
706 	fpu_user_cfg.max_size = user_size;
707 
708 	fpu_kernel_cfg.default_size = kernel_default_size;
709 	fpu_user_cfg.default_size =
710 		xstate_calculate_size(fpu_user_cfg.default_features, false);
711 
712 	return 0;
713 }
714 
715 /*
716  * We enabled the XSAVE hardware, but something went wrong and
717  * we can not use it.  Disable it.
718  */
fpu__init_disable_system_xstate(unsigned int legacy_size)719 static void __init fpu__init_disable_system_xstate(unsigned int legacy_size)
720 {
721 	fpu_kernel_cfg.max_features = 0;
722 	cr4_clear_bits(X86_CR4_OSXSAVE);
723 	setup_clear_cpu_cap(X86_FEATURE_XSAVE);
724 
725 	/* Restore the legacy size.*/
726 	fpu_kernel_cfg.max_size = legacy_size;
727 	fpu_kernel_cfg.default_size = legacy_size;
728 	fpu_user_cfg.max_size = legacy_size;
729 	fpu_user_cfg.default_size = legacy_size;
730 
731 	/*
732 	 * Prevent enabling the static branch which enables writes to the
733 	 * XFD MSR.
734 	 */
735 	init_fpstate.xfd = 0;
736 
737 	fpstate_reset(&current->thread.fpu);
738 }
739 
740 /*
741  * Enable and initialize the xsave feature.
742  * Called once per system bootup.
743  */
fpu__init_system_xstate(unsigned int legacy_size)744 void __init fpu__init_system_xstate(unsigned int legacy_size)
745 {
746 	unsigned int eax, ebx, ecx, edx;
747 	u64 xfeatures;
748 	int err;
749 	int i;
750 
751 	if (!boot_cpu_has(X86_FEATURE_FPU)) {
752 		pr_info("x86/fpu: No FPU detected\n");
753 		return;
754 	}
755 
756 	if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
757 		pr_info("x86/fpu: x87 FPU will use %s\n",
758 			boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE");
759 		return;
760 	}
761 
762 	if (boot_cpu_data.cpuid_level < XSTATE_CPUID) {
763 		WARN_ON_FPU(1);
764 		return;
765 	}
766 
767 	/*
768 	 * Find user xstates supported by the processor.
769 	 */
770 	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
771 	fpu_kernel_cfg.max_features = eax + ((u64)edx << 32);
772 
773 	/*
774 	 * Find supervisor xstates supported by the processor.
775 	 */
776 	cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
777 	fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32);
778 
779 	if ((fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
780 		/*
781 		 * This indicates that something really unexpected happened
782 		 * with the enumeration.  Disable XSAVE and try to continue
783 		 * booting without it.  This is too early to BUG().
784 		 */
785 		pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n",
786 		       fpu_kernel_cfg.max_features);
787 		goto out_disable;
788 	}
789 
790 	/*
791 	 * Clear XSAVE features that are disabled in the normal CPUID.
792 	 */
793 	for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
794 		unsigned short cid = xsave_cpuid_features[i];
795 
796 		/* Careful: X86_FEATURE_FPU is 0! */
797 		if ((i != XFEATURE_FP && !cid) || !boot_cpu_has(cid))
798 			fpu_kernel_cfg.max_features &= ~BIT_ULL(i);
799 	}
800 
801 	if (!cpu_feature_enabled(X86_FEATURE_XFD))
802 		fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC;
803 
804 	if (!cpu_feature_enabled(X86_FEATURE_XSAVES))
805 		fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
806 	else
807 		fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
808 					XFEATURE_MASK_SUPERVISOR_SUPPORTED;
809 
810 	fpu_user_cfg.max_features = fpu_kernel_cfg.max_features;
811 	fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
812 
813 	/* Clean out dynamic features from default */
814 	fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features;
815 	fpu_kernel_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
816 
817 	fpu_user_cfg.default_features = fpu_user_cfg.max_features;
818 	fpu_user_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
819 
820 	/* Store it for paranoia check at the end */
821 	xfeatures = fpu_kernel_cfg.max_features;
822 
823 	/*
824 	 * Initialize the default XFD state in initfp_state and enable the
825 	 * dynamic sizing mechanism if dynamic states are available.  The
826 	 * static key cannot be enabled here because this runs before
827 	 * jump_label_init(). This is delayed to an initcall.
828 	 */
829 	init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC;
830 
831 	/* Set up compaction feature bit */
832 	if (cpu_feature_enabled(X86_FEATURE_XSAVEC) ||
833 	    cpu_feature_enabled(X86_FEATURE_XSAVES))
834 		setup_force_cpu_cap(X86_FEATURE_XCOMPACTED);
835 
836 	/* Enable xstate instructions to be able to continue with initialization: */
837 	fpu__init_cpu_xstate();
838 
839 	/* Cache size, offset and flags for initialization */
840 	setup_xstate_cache();
841 
842 	err = init_xstate_size();
843 	if (err)
844 		goto out_disable;
845 
846 	/* Reset the state for the current task */
847 	fpstate_reset(&current->thread.fpu);
848 
849 	/*
850 	 * Update info used for ptrace frames; use standard-format size and no
851 	 * supervisor xstates:
852 	 */
853 	update_regset_xstate_info(fpu_user_cfg.max_size,
854 				  fpu_user_cfg.max_features);
855 
856 	/*
857 	 * init_fpstate excludes dynamic states as they are large but init
858 	 * state is zero.
859 	 */
860 	init_fpstate.size		= fpu_kernel_cfg.default_size;
861 	init_fpstate.xfeatures		= fpu_kernel_cfg.default_features;
862 
863 	if (init_fpstate.size > sizeof(init_fpstate.regs)) {
864 		pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d), disabling XSAVE\n",
865 			sizeof(init_fpstate.regs), init_fpstate.size);
866 		goto out_disable;
867 	}
868 
869 	setup_init_fpu_buf();
870 
871 	/*
872 	 * Paranoia check whether something in the setup modified the
873 	 * xfeatures mask.
874 	 */
875 	if (xfeatures != fpu_kernel_cfg.max_features) {
876 		pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init, disabling XSAVE\n",
877 		       xfeatures, fpu_kernel_cfg.max_features);
878 		goto out_disable;
879 	}
880 
881 	/*
882 	 * CPU capabilities initialization runs before FPU init. So
883 	 * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely
884 	 * functional, set the feature bit so depending code works.
885 	 */
886 	setup_force_cpu_cap(X86_FEATURE_OSXSAVE);
887 
888 	print_xstate_offset_size();
889 	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
890 		fpu_kernel_cfg.max_features,
891 		fpu_kernel_cfg.max_size,
892 		boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard");
893 	return;
894 
895 out_disable:
896 	/* something went wrong, try to boot without any XSAVE support */
897 	fpu__init_disable_system_xstate(legacy_size);
898 }
899 
900 /*
901  * Restore minimal FPU state after suspend:
902  */
fpu__resume_cpu(void)903 void fpu__resume_cpu(void)
904 {
905 	/*
906 	 * Restore XCR0 on xsave capable CPUs:
907 	 */
908 	if (cpu_feature_enabled(X86_FEATURE_XSAVE))
909 		xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
910 
911 	/*
912 	 * Restore IA32_XSS. The same CPUID bit enumerates support
913 	 * of XSAVES and MSR_IA32_XSS.
914 	 */
915 	if (cpu_feature_enabled(X86_FEATURE_XSAVES)) {
916 		wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor()  |
917 				     xfeatures_mask_independent());
918 	}
919 
920 	if (fpu_state_size_dynamic())
921 		wrmsrl(MSR_IA32_XFD, current->thread.fpu.fpstate->xfd);
922 }
923 
924 /*
925  * Given an xstate feature nr, calculate where in the xsave
926  * buffer the state is.  Callers should ensure that the buffer
927  * is valid.
928  */
__raw_xsave_addr(struct xregs_state * xsave,int xfeature_nr)929 static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
930 {
931 	u64 xcomp_bv = xsave->header.xcomp_bv;
932 
933 	if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
934 		return NULL;
935 
936 	if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) {
937 		if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr))))
938 			return NULL;
939 	}
940 
941 	return (void *)xsave + xfeature_get_offset(xcomp_bv, xfeature_nr);
942 }
943 
944 /*
945  * Given the xsave area and a state inside, this function returns the
946  * address of the state.
947  *
948  * This is the API that is called to get xstate address in either
949  * standard format or compacted format of xsave area.
950  *
951  * Note that if there is no data for the field in the xsave buffer
952  * this will return NULL.
953  *
954  * Inputs:
955  *	xstate: the thread's storage area for all FPU data
956  *	xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
957  *	XFEATURE_SSE, etc...)
958  * Output:
959  *	address of the state in the xsave area, or NULL if the
960  *	field is not present in the xsave buffer.
961  */
get_xsave_addr(struct xregs_state * xsave,int xfeature_nr)962 void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
963 {
964 	/*
965 	 * Do we even *have* xsave state?
966 	 */
967 	if (!boot_cpu_has(X86_FEATURE_XSAVE))
968 		return NULL;
969 
970 	/*
971 	 * We should not ever be requesting features that we
972 	 * have not enabled.
973 	 */
974 	if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
975 		return NULL;
976 
977 	/*
978 	 * This assumes the last 'xsave*' instruction to
979 	 * have requested that 'xfeature_nr' be saved.
980 	 * If it did not, we might be seeing and old value
981 	 * of the field in the buffer.
982 	 *
983 	 * This can happen because the last 'xsave' did not
984 	 * request that this feature be saved (unlikely)
985 	 * or because the "init optimization" caused it
986 	 * to not be saved.
987 	 */
988 	if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr)))
989 		return NULL;
990 
991 	return __raw_xsave_addr(xsave, xfeature_nr);
992 }
993 
994 #ifdef CONFIG_ARCH_HAS_PKEYS
995 
996 /*
997  * This will go out and modify PKRU register to set the access
998  * rights for @pkey to @init_val.
999  */
arch_set_user_pkey_access(struct task_struct * tsk,int pkey,unsigned long init_val)1000 int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
1001 			      unsigned long init_val)
1002 {
1003 	u32 old_pkru, new_pkru_bits = 0;
1004 	int pkey_shift;
1005 
1006 	/*
1007 	 * This check implies XSAVE support.  OSPKE only gets
1008 	 * set if we enable XSAVE and we enable PKU in XCR0.
1009 	 */
1010 	if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
1011 		return -EINVAL;
1012 
1013 	/*
1014 	 * This code should only be called with valid 'pkey'
1015 	 * values originating from in-kernel users.  Complain
1016 	 * if a bad value is observed.
1017 	 */
1018 	if (WARN_ON_ONCE(pkey >= arch_max_pkey()))
1019 		return -EINVAL;
1020 
1021 	/* Set the bits we need in PKRU:  */
1022 	if (init_val & PKEY_DISABLE_ACCESS)
1023 		new_pkru_bits |= PKRU_AD_BIT;
1024 	if (init_val & PKEY_DISABLE_WRITE)
1025 		new_pkru_bits |= PKRU_WD_BIT;
1026 
1027 	/* Shift the bits in to the correct place in PKRU for pkey: */
1028 	pkey_shift = pkey * PKRU_BITS_PER_PKEY;
1029 	new_pkru_bits <<= pkey_shift;
1030 
1031 	/* Get old PKRU and mask off any old bits in place: */
1032 	old_pkru = read_pkru();
1033 	old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
1034 
1035 	/* Write old part along with new part: */
1036 	write_pkru(old_pkru | new_pkru_bits);
1037 
1038 	return 0;
1039 }
1040 #endif /* ! CONFIG_ARCH_HAS_PKEYS */
1041 
copy_feature(bool from_xstate,struct membuf * to,void * xstate,void * init_xstate,unsigned int size)1042 static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
1043 			 void *init_xstate, unsigned int size)
1044 {
1045 	membuf_write(to, from_xstate ? xstate : init_xstate, size);
1046 }
1047 
1048 /**
1049  * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
1050  * @to:		membuf descriptor
1051  * @fpstate:	The fpstate buffer from which to copy
1052  * @xfeatures:	The mask of xfeatures to save (XSAVE mode only)
1053  * @pkru_val:	The PKRU value to store in the PKRU component
1054  * @copy_mode:	The requested copy mode
1055  *
1056  * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
1057  * format, i.e. from the kernel internal hardware dependent storage format
1058  * to the requested @mode. UABI XSTATE is always uncompacted!
1059  *
1060  * It supports partial copy but @to.pos always starts from zero.
1061  */
__copy_xstate_to_uabi_buf(struct membuf to,struct fpstate * fpstate,u64 xfeatures,u32 pkru_val,enum xstate_copy_mode copy_mode)1062 void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
1063 			       u64 xfeatures, u32 pkru_val,
1064 			       enum xstate_copy_mode copy_mode)
1065 {
1066 	const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
1067 	struct xregs_state *xinit = &init_fpstate.regs.xsave;
1068 	struct xregs_state *xsave = &fpstate->regs.xsave;
1069 	struct xstate_header header;
1070 	unsigned int zerofrom;
1071 	u64 mask;
1072 	int i;
1073 
1074 	memset(&header, 0, sizeof(header));
1075 	header.xfeatures = xsave->header.xfeatures;
1076 
1077 	/* Mask out the feature bits depending on copy mode */
1078 	switch (copy_mode) {
1079 	case XSTATE_COPY_FP:
1080 		header.xfeatures &= XFEATURE_MASK_FP;
1081 		break;
1082 
1083 	case XSTATE_COPY_FX:
1084 		header.xfeatures &= XFEATURE_MASK_FP | XFEATURE_MASK_SSE;
1085 		break;
1086 
1087 	case XSTATE_COPY_XSAVE:
1088 		header.xfeatures &= fpstate->user_xfeatures & xfeatures;
1089 		break;
1090 	}
1091 
1092 	/* Copy FP state up to MXCSR */
1093 	copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387,
1094 		     &xinit->i387, off_mxcsr);
1095 
1096 	/* Copy MXCSR when SSE or YMM are set in the feature mask */
1097 	copy_feature(header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM),
1098 		     &to, &xsave->i387.mxcsr, &xinit->i387.mxcsr,
1099 		     MXCSR_AND_FLAGS_SIZE);
1100 
1101 	/* Copy the remaining FP state */
1102 	copy_feature(header.xfeatures & XFEATURE_MASK_FP,
1103 		     &to, &xsave->i387.st_space, &xinit->i387.st_space,
1104 		     sizeof(xsave->i387.st_space));
1105 
1106 	/* Copy the SSE state - shared with YMM, but independently managed */
1107 	copy_feature(header.xfeatures & XFEATURE_MASK_SSE,
1108 		     &to, &xsave->i387.xmm_space, &xinit->i387.xmm_space,
1109 		     sizeof(xsave->i387.xmm_space));
1110 
1111 	if (copy_mode != XSTATE_COPY_XSAVE)
1112 		goto out;
1113 
1114 	/* Zero the padding area */
1115 	membuf_zero(&to, sizeof(xsave->i387.padding));
1116 
1117 	/* Copy xsave->i387.sw_reserved */
1118 	membuf_write(&to, xstate_fx_sw_bytes, sizeof(xsave->i387.sw_reserved));
1119 
1120 	/* Copy the user space relevant state of @xsave->header */
1121 	membuf_write(&to, &header, sizeof(header));
1122 
1123 	zerofrom = offsetof(struct xregs_state, extended_state_area);
1124 
1125 	/*
1126 	 * This 'mask' indicates which states to copy from fpstate.
1127 	 * Those extended states that are not present in fpstate are
1128 	 * either disabled or initialized:
1129 	 *
1130 	 * In non-compacted format, disabled features still occupy
1131 	 * state space but there is no state to copy from in the
1132 	 * compacted init_fpstate. The gap tracking will zero these
1133 	 * states.
1134 	 *
1135 	 * The extended features have an all zeroes init state. Thus,
1136 	 * remove them from 'mask' to zero those features in the user
1137 	 * buffer instead of retrieving them from init_fpstate.
1138 	 */
1139 	mask = header.xfeatures;
1140 
1141 	for_each_extended_xfeature(i, mask) {
1142 		/*
1143 		 * If there was a feature or alignment gap, zero the space
1144 		 * in the destination buffer.
1145 		 */
1146 		if (zerofrom < xstate_offsets[i])
1147 			membuf_zero(&to, xstate_offsets[i] - zerofrom);
1148 
1149 		if (i == XFEATURE_PKRU) {
1150 			struct pkru_state pkru = {0};
1151 			/*
1152 			 * PKRU is not necessarily up to date in the
1153 			 * XSAVE buffer. Use the provided value.
1154 			 */
1155 			pkru.pkru = pkru_val;
1156 			membuf_write(&to, &pkru, sizeof(pkru));
1157 		} else {
1158 			membuf_write(&to,
1159 				     __raw_xsave_addr(xsave, i),
1160 				     xstate_sizes[i]);
1161 		}
1162 		/*
1163 		 * Keep track of the last copied state in the non-compacted
1164 		 * target buffer for gap zeroing.
1165 		 */
1166 		zerofrom = xstate_offsets[i] + xstate_sizes[i];
1167 	}
1168 
1169 out:
1170 	if (to.left)
1171 		membuf_zero(&to, to.left);
1172 }
1173 
1174 /**
1175  * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
1176  * @to:		membuf descriptor
1177  * @tsk:	The task from which to copy the saved xstate
1178  * @copy_mode:	The requested copy mode
1179  *
1180  * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
1181  * format, i.e. from the kernel internal hardware dependent storage format
1182  * to the requested @mode. UABI XSTATE is always uncompacted!
1183  *
1184  * It supports partial copy but @to.pos always starts from zero.
1185  */
copy_xstate_to_uabi_buf(struct membuf to,struct task_struct * tsk,enum xstate_copy_mode copy_mode)1186 void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
1187 			     enum xstate_copy_mode copy_mode)
1188 {
1189 	__copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
1190 				  tsk->thread.fpu.fpstate->user_xfeatures,
1191 				  tsk->thread.pkru, copy_mode);
1192 }
1193 
copy_from_buffer(void * dst,unsigned int offset,unsigned int size,const void * kbuf,const void __user * ubuf)1194 static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
1195 			    const void *kbuf, const void __user *ubuf)
1196 {
1197 	if (kbuf) {
1198 		memcpy(dst, kbuf + offset, size);
1199 	} else {
1200 		if (copy_from_user(dst, ubuf + offset, size))
1201 			return -EFAULT;
1202 	}
1203 	return 0;
1204 }
1205 
1206 
1207 /**
1208  * copy_uabi_to_xstate - Copy a UABI format buffer to the kernel xstate
1209  * @fpstate:	The fpstate buffer to copy to
1210  * @kbuf:	The UABI format buffer, if it comes from the kernel
1211  * @ubuf:	The UABI format buffer, if it comes from userspace
1212  * @pkru:	The location to write the PKRU value to
1213  *
1214  * Converts from the UABI format into the kernel internal hardware
1215  * dependent format.
1216  *
1217  * This function ultimately has three different callers with distinct PKRU
1218  * behavior.
1219  * 1.	When called from sigreturn the PKRU register will be restored from
1220  *	@fpstate via an XRSTOR. Correctly copying the UABI format buffer to
1221  *	@fpstate is sufficient to cover this case, but the caller will also
1222  *	pass a pointer to the thread_struct's pkru field in @pkru and updating
1223  *	it is harmless.
1224  * 2.	When called from ptrace the PKRU register will be restored from the
1225  *	thread_struct's pkru field. A pointer to that is passed in @pkru.
1226  *	The kernel will restore it manually, so the XRSTOR behavior that resets
1227  *	the PKRU register to the hardware init value (0) if the corresponding
1228  *	xfeatures bit is not set is emulated here.
1229  * 3.	When called from KVM the PKRU register will be restored from the vcpu's
1230  *	pkru field. A pointer to that is passed in @pkru. KVM hasn't used
1231  *	XRSTOR and hasn't had the PKRU resetting behavior described above. To
1232  *	preserve that KVM behavior, it passes NULL for @pkru if the xfeatures
1233  *	bit is not set.
1234  */
copy_uabi_to_xstate(struct fpstate * fpstate,const void * kbuf,const void __user * ubuf,u32 * pkru)1235 static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
1236 			       const void __user *ubuf, u32 *pkru)
1237 {
1238 	struct xregs_state *xsave = &fpstate->regs.xsave;
1239 	unsigned int offset, size;
1240 	struct xstate_header hdr;
1241 	u64 mask;
1242 	int i;
1243 
1244 	offset = offsetof(struct xregs_state, header);
1245 	if (copy_from_buffer(&hdr, offset, sizeof(hdr), kbuf, ubuf))
1246 		return -EFAULT;
1247 
1248 	if (validate_user_xstate_header(&hdr, fpstate))
1249 		return -EINVAL;
1250 
1251 	/* Validate MXCSR when any of the related features is in use */
1252 	mask = XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM;
1253 	if (hdr.xfeatures & mask) {
1254 		u32 mxcsr[2];
1255 
1256 		offset = offsetof(struct fxregs_state, mxcsr);
1257 		if (copy_from_buffer(mxcsr, offset, sizeof(mxcsr), kbuf, ubuf))
1258 			return -EFAULT;
1259 
1260 		/* Reserved bits in MXCSR must be zero. */
1261 		if (mxcsr[0] & ~mxcsr_feature_mask)
1262 			return -EINVAL;
1263 
1264 		/* SSE and YMM require MXCSR even when FP is not in use. */
1265 		if (!(hdr.xfeatures & XFEATURE_MASK_FP)) {
1266 			xsave->i387.mxcsr = mxcsr[0];
1267 			xsave->i387.mxcsr_mask = mxcsr[1];
1268 		}
1269 	}
1270 
1271 	for (i = 0; i < XFEATURE_MAX; i++) {
1272 		mask = BIT_ULL(i);
1273 
1274 		if (hdr.xfeatures & mask) {
1275 			void *dst = __raw_xsave_addr(xsave, i);
1276 
1277 			offset = xstate_offsets[i];
1278 			size = xstate_sizes[i];
1279 
1280 			if (copy_from_buffer(dst, offset, size, kbuf, ubuf))
1281 				return -EFAULT;
1282 		}
1283 	}
1284 
1285 	if (hdr.xfeatures & XFEATURE_MASK_PKRU) {
1286 		struct pkru_state *xpkru;
1287 
1288 		xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU);
1289 		*pkru = xpkru->pkru;
1290 	} else {
1291 		/*
1292 		 * KVM may pass NULL here to indicate that it does not need
1293 		 * PKRU updated.
1294 		 */
1295 		if (pkru)
1296 			*pkru = 0;
1297 	}
1298 
1299 	/*
1300 	 * The state that came in from userspace was user-state only.
1301 	 * Mask all the user states out of 'xfeatures':
1302 	 */
1303 	xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
1304 
1305 	/*
1306 	 * Add back in the features that came in from userspace:
1307 	 */
1308 	xsave->header.xfeatures |= hdr.xfeatures;
1309 
1310 	return 0;
1311 }
1312 
1313 /*
1314  * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S]
1315  * format and copy to the target thread. Used by ptrace and KVM.
1316  */
copy_uabi_from_kernel_to_xstate(struct fpstate * fpstate,const void * kbuf,u32 * pkru)1317 int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru)
1318 {
1319 	return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru);
1320 }
1321 
1322 /*
1323  * Convert from a sigreturn standard-format user-space buffer to kernel
1324  * XSAVE[S] format and copy to the target thread. This is called from the
1325  * sigreturn() and rt_sigreturn() system calls.
1326  */
copy_sigframe_from_user_to_xstate(struct task_struct * tsk,const void __user * ubuf)1327 int copy_sigframe_from_user_to_xstate(struct task_struct *tsk,
1328 				      const void __user *ubuf)
1329 {
1330 	return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf, &tsk->thread.pkru);
1331 }
1332 
validate_independent_components(u64 mask)1333 static bool validate_independent_components(u64 mask)
1334 {
1335 	u64 xchk;
1336 
1337 	if (WARN_ON_FPU(!cpu_feature_enabled(X86_FEATURE_XSAVES)))
1338 		return false;
1339 
1340 	xchk = ~xfeatures_mask_independent();
1341 
1342 	if (WARN_ON_ONCE(!mask || mask & xchk))
1343 		return false;
1344 
1345 	return true;
1346 }
1347 
1348 /**
1349  * xsaves - Save selected components to a kernel xstate buffer
1350  * @xstate:	Pointer to the buffer
1351  * @mask:	Feature mask to select the components to save
1352  *
1353  * The @xstate buffer must be 64 byte aligned and correctly initialized as
1354  * XSAVES does not write the full xstate header. Before first use the
1355  * buffer should be zeroed otherwise a consecutive XRSTORS from that buffer
1356  * can #GP.
1357  *
1358  * The feature mask must be a subset of the independent features.
1359  */
xsaves(struct xregs_state * xstate,u64 mask)1360 void xsaves(struct xregs_state *xstate, u64 mask)
1361 {
1362 	int err;
1363 
1364 	if (!validate_independent_components(mask))
1365 		return;
1366 
1367 	XSTATE_OP(XSAVES, xstate, (u32)mask, (u32)(mask >> 32), err);
1368 	WARN_ON_ONCE(err);
1369 }
1370 
1371 /**
1372  * xrstors - Restore selected components from a kernel xstate buffer
1373  * @xstate:	Pointer to the buffer
1374  * @mask:	Feature mask to select the components to restore
1375  *
1376  * The @xstate buffer must be 64 byte aligned and correctly initialized
1377  * otherwise XRSTORS from that buffer can #GP.
1378  *
1379  * Proper usage is to restore the state which was saved with
1380  * xsaves() into @xstate.
1381  *
1382  * The feature mask must be a subset of the independent features.
1383  */
xrstors(struct xregs_state * xstate,u64 mask)1384 void xrstors(struct xregs_state *xstate, u64 mask)
1385 {
1386 	int err;
1387 
1388 	if (!validate_independent_components(mask))
1389 		return;
1390 
1391 	XSTATE_OP(XRSTORS, xstate, (u32)mask, (u32)(mask >> 32), err);
1392 	WARN_ON_ONCE(err);
1393 }
1394 
1395 #if IS_ENABLED(CONFIG_KVM)
fpstate_clear_xstate_component(struct fpstate * fps,unsigned int xfeature)1396 void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature)
1397 {
1398 	void *addr = get_xsave_addr(&fps->regs.xsave, xfeature);
1399 
1400 	if (addr)
1401 		memset(addr, 0, xstate_sizes[xfeature]);
1402 }
1403 EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component);
1404 #endif
1405 
1406 #ifdef CONFIG_X86_64
1407 
1408 #ifdef CONFIG_X86_DEBUG_FPU
1409 /*
1410  * Ensure that a subsequent XSAVE* or XRSTOR* instruction with RFBM=@mask
1411  * can safely operate on the @fpstate buffer.
1412  */
xstate_op_valid(struct fpstate * fpstate,u64 mask,bool rstor)1413 static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor)
1414 {
1415 	u64 xfd = __this_cpu_read(xfd_state);
1416 
1417 	if (fpstate->xfd == xfd)
1418 		return true;
1419 
1420 	 /*
1421 	  * The XFD MSR does not match fpstate->xfd. That's invalid when
1422 	  * the passed in fpstate is current's fpstate.
1423 	  */
1424 	if (fpstate->xfd == current->thread.fpu.fpstate->xfd)
1425 		return false;
1426 
1427 	/*
1428 	 * XRSTOR(S) from init_fpstate are always correct as it will just
1429 	 * bring all components into init state and not read from the
1430 	 * buffer. XSAVE(S) raises #PF after init.
1431 	 */
1432 	if (fpstate == &init_fpstate)
1433 		return rstor;
1434 
1435 	/*
1436 	 * XSAVE(S): clone(), fpu_swap_kvm_fpu()
1437 	 * XRSTORS(S): fpu_swap_kvm_fpu()
1438 	 */
1439 
1440 	/*
1441 	 * No XSAVE/XRSTOR instructions (except XSAVE itself) touch
1442 	 * the buffer area for XFD-disabled state components.
1443 	 */
1444 	mask &= ~xfd;
1445 
1446 	/*
1447 	 * Remove features which are valid in fpstate. They
1448 	 * have space allocated in fpstate.
1449 	 */
1450 	mask &= ~fpstate->xfeatures;
1451 
1452 	/*
1453 	 * Any remaining state components in 'mask' might be written
1454 	 * by XSAVE/XRSTOR. Fail validation it found.
1455 	 */
1456 	return !mask;
1457 }
1458 
xfd_validate_state(struct fpstate * fpstate,u64 mask,bool rstor)1459 void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor)
1460 {
1461 	WARN_ON_ONCE(!xstate_op_valid(fpstate, mask, rstor));
1462 }
1463 #endif /* CONFIG_X86_DEBUG_FPU */
1464 
xfd_update_static_branch(void)1465 static int __init xfd_update_static_branch(void)
1466 {
1467 	/*
1468 	 * If init_fpstate.xfd has bits set then dynamic features are
1469 	 * available and the dynamic sizing must be enabled.
1470 	 */
1471 	if (init_fpstate.xfd)
1472 		static_branch_enable(&__fpu_state_size_dynamic);
1473 	return 0;
1474 }
arch_initcall(xfd_update_static_branch)1475 arch_initcall(xfd_update_static_branch)
1476 
1477 void fpstate_free(struct fpu *fpu)
1478 {
1479 	if (fpu->fpstate && fpu->fpstate != &fpu->__fpstate)
1480 		vfree(fpu->fpstate);
1481 }
1482 
1483 /**
1484  * fpstate_realloc - Reallocate struct fpstate for the requested new features
1485  *
1486  * @xfeatures:	A bitmap of xstate features which extend the enabled features
1487  *		of that task
1488  * @ksize:	The required size for the kernel buffer
1489  * @usize:	The required size for user space buffers
1490  * @guest_fpu:	Pointer to a guest FPU container. NULL for host allocations
1491  *
1492  * Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer
1493  * terminates quickly, vfree()-induced IPIs may be a concern, but tasks
1494  * with large states are likely to live longer.
1495  *
1496  * Returns: 0 on success, -ENOMEM on allocation error.
1497  */
fpstate_realloc(u64 xfeatures,unsigned int ksize,unsigned int usize,struct fpu_guest * guest_fpu)1498 static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
1499 			   unsigned int usize, struct fpu_guest *guest_fpu)
1500 {
1501 	struct fpu *fpu = &current->thread.fpu;
1502 	struct fpstate *curfps, *newfps = NULL;
1503 	unsigned int fpsize;
1504 	bool in_use;
1505 
1506 	fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64);
1507 
1508 	newfps = vzalloc(fpsize);
1509 	if (!newfps)
1510 		return -ENOMEM;
1511 	newfps->size = ksize;
1512 	newfps->user_size = usize;
1513 	newfps->is_valloc = true;
1514 
1515 	/*
1516 	 * When a guest FPU is supplied, use @guest_fpu->fpstate
1517 	 * as reference independent whether it is in use or not.
1518 	 */
1519 	curfps = guest_fpu ? guest_fpu->fpstate : fpu->fpstate;
1520 
1521 	/* Determine whether @curfps is the active fpstate */
1522 	in_use = fpu->fpstate == curfps;
1523 
1524 	if (guest_fpu) {
1525 		newfps->is_guest = true;
1526 		newfps->is_confidential = curfps->is_confidential;
1527 		newfps->in_use = curfps->in_use;
1528 		guest_fpu->xfeatures |= xfeatures;
1529 		guest_fpu->uabi_size = usize;
1530 	}
1531 
1532 	fpregs_lock();
1533 	/*
1534 	 * If @curfps is in use, ensure that the current state is in the
1535 	 * registers before swapping fpstate as that might invalidate it
1536 	 * due to layout changes.
1537 	 */
1538 	if (in_use && test_thread_flag(TIF_NEED_FPU_LOAD))
1539 		fpregs_restore_userregs();
1540 
1541 	newfps->xfeatures = curfps->xfeatures | xfeatures;
1542 	newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
1543 	newfps->xfd = curfps->xfd & ~xfeatures;
1544 
1545 	/* Do the final updates within the locked region */
1546 	xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures);
1547 
1548 	if (guest_fpu) {
1549 		guest_fpu->fpstate = newfps;
1550 		/* If curfps is active, update the FPU fpstate pointer */
1551 		if (in_use)
1552 			fpu->fpstate = newfps;
1553 	} else {
1554 		fpu->fpstate = newfps;
1555 	}
1556 
1557 	if (in_use)
1558 		xfd_update_state(fpu->fpstate);
1559 	fpregs_unlock();
1560 
1561 	/* Only free valloc'ed state */
1562 	if (curfps && curfps->is_valloc)
1563 		vfree(curfps);
1564 
1565 	return 0;
1566 }
1567 
validate_sigaltstack(unsigned int usize)1568 static int validate_sigaltstack(unsigned int usize)
1569 {
1570 	struct task_struct *thread, *leader = current->group_leader;
1571 	unsigned long framesize = get_sigframe_size();
1572 
1573 	lockdep_assert_held(&current->sighand->siglock);
1574 
1575 	/* get_sigframe_size() is based on fpu_user_cfg.max_size */
1576 	framesize -= fpu_user_cfg.max_size;
1577 	framesize += usize;
1578 	for_each_thread(leader, thread) {
1579 		if (thread->sas_ss_size && thread->sas_ss_size < framesize)
1580 			return -ENOSPC;
1581 	}
1582 	return 0;
1583 }
1584 
__xstate_request_perm(u64 permitted,u64 requested,bool guest)1585 static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
1586 {
1587 	/*
1588 	 * This deliberately does not exclude !XSAVES as we still might
1589 	 * decide to optionally context switch XCR0 or talk the silicon
1590 	 * vendors into extending XFD for the pre AMX states, especially
1591 	 * AVX512.
1592 	 */
1593 	bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
1594 	struct fpu *fpu = &current->group_leader->thread.fpu;
1595 	struct fpu_state_perm *perm;
1596 	unsigned int ksize, usize;
1597 	u64 mask;
1598 	int ret = 0;
1599 
1600 	/* Check whether fully enabled */
1601 	if ((permitted & requested) == requested)
1602 		return 0;
1603 
1604 	/* Calculate the resulting kernel state size */
1605 	mask = permitted | requested;
1606 	/* Take supervisor states into account on the host */
1607 	if (!guest)
1608 		mask |= xfeatures_mask_supervisor();
1609 	ksize = xstate_calculate_size(mask, compacted);
1610 
1611 	/* Calculate the resulting user state size */
1612 	mask &= XFEATURE_MASK_USER_SUPPORTED;
1613 	usize = xstate_calculate_size(mask, false);
1614 
1615 	if (!guest) {
1616 		ret = validate_sigaltstack(usize);
1617 		if (ret)
1618 			return ret;
1619 	}
1620 
1621 	perm = guest ? &fpu->guest_perm : &fpu->perm;
1622 	/* Pairs with the READ_ONCE() in xstate_get_group_perm() */
1623 	WRITE_ONCE(perm->__state_perm, mask);
1624 	/* Protected by sighand lock */
1625 	perm->__state_size = ksize;
1626 	perm->__user_state_size = usize;
1627 	return ret;
1628 }
1629 
1630 /*
1631  * Permissions array to map facilities with more than one component
1632  */
1633 static const u64 xstate_prctl_req[XFEATURE_MAX] = {
1634 	[XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA,
1635 };
1636 
xstate_request_perm(unsigned long idx,bool guest)1637 static int xstate_request_perm(unsigned long idx, bool guest)
1638 {
1639 	u64 permitted, requested;
1640 	int ret;
1641 
1642 	if (idx >= XFEATURE_MAX)
1643 		return -EINVAL;
1644 
1645 	/*
1646 	 * Look up the facility mask which can require more than
1647 	 * one xstate component.
1648 	 */
1649 	idx = array_index_nospec(idx, ARRAY_SIZE(xstate_prctl_req));
1650 	requested = xstate_prctl_req[idx];
1651 	if (!requested)
1652 		return -EOPNOTSUPP;
1653 
1654 	if ((fpu_user_cfg.max_features & requested) != requested)
1655 		return -EOPNOTSUPP;
1656 
1657 	/* Lockless quick check */
1658 	permitted = xstate_get_group_perm(guest);
1659 	if ((permitted & requested) == requested)
1660 		return 0;
1661 
1662 	/* Protect against concurrent modifications */
1663 	spin_lock_irq(&current->sighand->siglock);
1664 	permitted = xstate_get_group_perm(guest);
1665 
1666 	/* First vCPU allocation locks the permissions. */
1667 	if (guest && (permitted & FPU_GUEST_PERM_LOCKED))
1668 		ret = -EBUSY;
1669 	else
1670 		ret = __xstate_request_perm(permitted, requested, guest);
1671 	spin_unlock_irq(&current->sighand->siglock);
1672 	return ret;
1673 }
1674 
__xfd_enable_feature(u64 xfd_err,struct fpu_guest * guest_fpu)1675 int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu)
1676 {
1677 	u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC;
1678 	struct fpu_state_perm *perm;
1679 	unsigned int ksize, usize;
1680 	struct fpu *fpu;
1681 
1682 	if (!xfd_event) {
1683 		if (!guest_fpu)
1684 			pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err);
1685 		return 0;
1686 	}
1687 
1688 	/* Protect against concurrent modifications */
1689 	spin_lock_irq(&current->sighand->siglock);
1690 
1691 	/* If not permitted let it die */
1692 	if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) {
1693 		spin_unlock_irq(&current->sighand->siglock);
1694 		return -EPERM;
1695 	}
1696 
1697 	fpu = &current->group_leader->thread.fpu;
1698 	perm = guest_fpu ? &fpu->guest_perm : &fpu->perm;
1699 	ksize = perm->__state_size;
1700 	usize = perm->__user_state_size;
1701 
1702 	/*
1703 	 * The feature is permitted. State size is sufficient.  Dropping
1704 	 * the lock is safe here even if more features are added from
1705 	 * another task, the retrieved buffer sizes are valid for the
1706 	 * currently requested feature(s).
1707 	 */
1708 	spin_unlock_irq(&current->sighand->siglock);
1709 
1710 	/*
1711 	 * Try to allocate a new fpstate. If that fails there is no way
1712 	 * out.
1713 	 */
1714 	if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu))
1715 		return -EFAULT;
1716 	return 0;
1717 }
1718 
xfd_enable_feature(u64 xfd_err)1719 int xfd_enable_feature(u64 xfd_err)
1720 {
1721 	return __xfd_enable_feature(xfd_err, NULL);
1722 }
1723 
1724 #else /* CONFIG_X86_64 */
xstate_request_perm(unsigned long idx,bool guest)1725 static inline int xstate_request_perm(unsigned long idx, bool guest)
1726 {
1727 	return -EPERM;
1728 }
1729 #endif  /* !CONFIG_X86_64 */
1730 
xstate_get_guest_group_perm(void)1731 u64 xstate_get_guest_group_perm(void)
1732 {
1733 	return xstate_get_group_perm(true);
1734 }
1735 EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm);
1736 
1737 /**
1738  * fpu_xstate_prctl - xstate permission operations
1739  * @tsk:	Redundant pointer to current
1740  * @option:	A subfunction of arch_prctl()
1741  * @arg2:	option argument
1742  * Return:	0 if successful; otherwise, an error code
1743  *
1744  * Option arguments:
1745  *
1746  * ARCH_GET_XCOMP_SUPP: Pointer to user space u64 to store the info
1747  * ARCH_GET_XCOMP_PERM: Pointer to user space u64 to store the info
1748  * ARCH_REQ_XCOMP_PERM: Facility number requested
1749  *
1750  * For facilities which require more than one XSTATE component, the request
1751  * must be the highest state component number related to that facility,
1752  * e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and
1753  * XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18).
1754  */
fpu_xstate_prctl(int option,unsigned long arg2)1755 long fpu_xstate_prctl(int option, unsigned long arg2)
1756 {
1757 	u64 __user *uptr = (u64 __user *)arg2;
1758 	u64 permitted, supported;
1759 	unsigned long idx = arg2;
1760 	bool guest = false;
1761 
1762 	switch (option) {
1763 	case ARCH_GET_XCOMP_SUPP:
1764 		supported = fpu_user_cfg.max_features |	fpu_user_cfg.legacy_features;
1765 		return put_user(supported, uptr);
1766 
1767 	case ARCH_GET_XCOMP_PERM:
1768 		/*
1769 		 * Lockless snapshot as it can also change right after the
1770 		 * dropping the lock.
1771 		 */
1772 		permitted = xstate_get_host_group_perm();
1773 		permitted &= XFEATURE_MASK_USER_SUPPORTED;
1774 		return put_user(permitted, uptr);
1775 
1776 	case ARCH_GET_XCOMP_GUEST_PERM:
1777 		permitted = xstate_get_guest_group_perm();
1778 		permitted &= XFEATURE_MASK_USER_SUPPORTED;
1779 		return put_user(permitted, uptr);
1780 
1781 	case ARCH_REQ_XCOMP_GUEST_PERM:
1782 		guest = true;
1783 		fallthrough;
1784 
1785 	case ARCH_REQ_XCOMP_PERM:
1786 		if (!IS_ENABLED(CONFIG_X86_64))
1787 			return -EOPNOTSUPP;
1788 
1789 		return xstate_request_perm(idx, guest);
1790 
1791 	default:
1792 		return -EINVAL;
1793 	}
1794 }
1795 
1796 #ifdef CONFIG_PROC_PID_ARCH_STATUS
1797 /*
1798  * Report the amount of time elapsed in millisecond since last AVX512
1799  * use in the task.
1800  */
avx512_status(struct seq_file * m,struct task_struct * task)1801 static void avx512_status(struct seq_file *m, struct task_struct *task)
1802 {
1803 	unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
1804 	long delta;
1805 
1806 	if (!timestamp) {
1807 		/*
1808 		 * Report -1 if no AVX512 usage
1809 		 */
1810 		delta = -1;
1811 	} else {
1812 		delta = (long)(jiffies - timestamp);
1813 		/*
1814 		 * Cap to LONG_MAX if time difference > LONG_MAX
1815 		 */
1816 		if (delta < 0)
1817 			delta = LONG_MAX;
1818 		delta = jiffies_to_msecs(delta);
1819 	}
1820 
1821 	seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
1822 	seq_putc(m, '\n');
1823 }
1824 
1825 /*
1826  * Report architecture specific information
1827  */
proc_pid_arch_status(struct seq_file * m,struct pid_namespace * ns,struct pid * pid,struct task_struct * task)1828 int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
1829 			struct pid *pid, struct task_struct *task)
1830 {
1831 	/*
1832 	 * Report AVX512 state if the processor and build option supported.
1833 	 */
1834 	if (cpu_feature_enabled(X86_FEATURE_AVX512F))
1835 		avx512_status(m, task);
1836 
1837 	return 0;
1838 }
1839 #endif /* CONFIG_PROC_PID_ARCH_STATUS */
1840