1 /*
2  * X86-64 specific CPU setup.
3  * Copyright (C) 1995  Linus Torvalds
4  * Copyright 2001, 2002 SuSE Labs / Andi Kleen.
5  * See setup.c for older changelog.
6  * $Id: setup64.c,v 1.27 2004/02/27 18:30:19 ak Exp $
7  */
8 #include <linux/config.h>
9 #include <linux/init.h>
10 #include <linux/kernel.h>
11 #include <linux/sched.h>
12 #include <linux/string.h>
13 #include <asm/pda.h>
14 #include <asm/pda.h>
15 #include <asm/processor.h>
16 #include <asm/desc.h>
17 #include <asm/bitops.h>
18 #include <asm/atomic.h>
19 #include <asm/mmu_context.h>
20 #include <asm/proto.h>
21 #include <asm/mman.h>
22 
23 char x86_boot_params[2048] __initdata = {0,};
24 
25 static unsigned long cpu_initialized __initdata = 0;
26 
27 struct x8664_pda cpu_pda[NR_CPUS] __cacheline_aligned;
28 
29 extern void system_call(void);
30 extern void ia32_cstar_target(void);
31 
32 struct desc_ptr gdt_descr = { 0 /* filled in */, (unsigned long) gdt_table };
33 struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table };
34 
35 /* When you change the default make sure the no EFER path below sets the
36    correct flags everywhere. */
37 unsigned long __supported_pte_mask = ~0UL;
38 static int do_not_nx __initdata = 0;
39 unsigned long vm_stack_flags = __VM_STACK_FLAGS;
40 unsigned long vm_stack_flags32 = __VM_STACK_FLAGS;
41 unsigned long vm_data_default_flags = __VM_DATA_DEFAULT_FLAGS;
42 unsigned long vm_data_default_flags32 = __VM_DATA_DEFAULT_FLAGS;
43 unsigned long vm_force_exec32 = PROT_EXEC;
44 
45 char boot_cpu_stack[IRQSTACKSIZE] __cacheline_aligned;
46 
47 /* noexec=on|off
48 
49 on	Enable
50 off	Disable
51 noforce (default) Don't enable by default for heap/stack/data,
52 	but allow PROT_EXEC to be effective
53 
54 */
55 
nonx_setup(char * str)56 int __init nonx_setup(char *str)
57 {
58 	if (!strncmp(str, "on",3)) {
59 		__supported_pte_mask |= _PAGE_NX;
60 		do_not_nx = 0;
61 		vm_data_default_flags &= ~VM_EXEC;
62 		vm_stack_flags &= ~VM_EXEC;
63 	} else if (!strncmp(str, "noforce",7) || !strncmp(str,"off",3)) {
64 		do_not_nx = (str[0] == 'o');
65 		if (do_not_nx)
66 			__supported_pte_mask &= ~_PAGE_NX;
67 		vm_data_default_flags |= VM_EXEC;
68 		vm_stack_flags |= VM_EXEC;
69 	}
70 	return 1;
71 }
72 
73 /* noexec32=opt{,opt}
74 
75 Control the no exec default for 32bit processes. Can be also overwritten
76 per executable using ELF header flags (e.g. needed for the X server)
77 Requires noexec=on or noexec=noforce to be effective.
78 
79 Valid options:
80    all,on    Heap,stack,data is non executable.
81    off       (default) Heap,stack,data is executable
82    stack     Stack is non executable, heap/data is.
83    force     Don't imply PROT_EXEC for PROT_READ
84    compat    (default) Imply PROT_EXEC for PROT_READ
85 
86 */
nonx32_setup(char * str)87 static int __init nonx32_setup(char *str)
88 {
89 	char *s;
90 	while ((s = strsep(&str, ",")) != NULL) {
91 		if (!strcmp(s, "all") || !strcmp(s,"on")) {
92 			vm_data_default_flags32 &= ~VM_EXEC;
93 			vm_stack_flags32 &= ~VM_EXEC;
94 		} else if (!strcmp(s, "off")) {
95 			vm_data_default_flags32 |= VM_EXEC;
96 			vm_stack_flags32 |= VM_EXEC;
97 		} else if (!strcmp(s, "stack")) {
98 			vm_data_default_flags32 |= VM_EXEC;
99 			vm_stack_flags32 &= ~VM_EXEC;
100 		} else if (!strcmp(s, "force")) {
101 			vm_force_exec32 = 0;
102 		} else if (!strcmp(s, "compat")) {
103 			vm_force_exec32 = PROT_EXEC;
104 		}
105 	}
106 	return 1;
107 }
108 
109 __setup("noexec=", nonx_setup);
110 __setup("noexec32=", nonx32_setup);
111 
pda_init(int cpu)112 void pda_init(int cpu)
113 {
114         pml4_t *level4;
115 
116 	if (cpu == 0) {
117 		/* others are initialized in smpboot.c */
118 		cpu_pda[cpu].pcurrent = init_tasks[cpu];
119 		cpu_pda[cpu].irqstackptr = boot_cpu_stack;
120 		level4 = init_level4_pgt;
121 	} else {
122 		cpu_pda[cpu].irqstackptr = (char *)
123 			__get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
124 		if (!cpu_pda[cpu].irqstackptr)
125 			panic("cannot allocate irqstack for cpu %d\n", cpu);
126 		level4 = (pml4_t *)__get_free_pages(GFP_ATOMIC, 0);
127 	}
128 	if (!level4)
129 		panic("Cannot allocate top level page for cpu %d", cpu);
130 
131 	cpu_pda[cpu].level4_pgt = (unsigned long *)level4;
132 	if (level4 != init_level4_pgt)
133 		memcpy(level4, &init_level4_pgt, PAGE_SIZE);
134 	set_pml4(level4 + 510,
135 		 mk_kernel_pml4(__pa_symbol(boot_vmalloc_pgt), KERNPG_TABLE));
136 	asm volatile("movq %0,%%cr3" :: "r" (__pa(level4)));
137 
138 	cpu_pda[cpu].irqstackptr += IRQSTACKSIZE-64;
139 	cpu_pda[cpu].cpunumber = cpu;
140 	cpu_pda[cpu].irqcount = -1;
141 
142 	asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
143 	wrmsrl(MSR_GS_BASE, cpu_pda + cpu);
144 }
145 
syscall_init(void)146 void syscall_init(void)
147 {
148 	/*
149 	 * LSTAR and STAR live in a bit strange symbiosis.
150 	 * They both write to the same internal register. STAR allows to set CS/DS
151 	 * but only a 32bit target. LSTAR sets the 64bit rip.
152 	 */
153 	wrmsrl(MSR_STAR,  ((u64)__USER32_CS)<<48  | ((u64)__KERNEL_CS)<<32);
154 	wrmsrl(MSR_LSTAR, system_call);
155 
156 #ifdef CONFIG_IA32_EMULATION
157 	wrmsrl(MSR_CSTAR, ia32_cstar_target);
158 #endif
159 }
160 
161 char boot_exception_stacks[N_EXCEPTION_STACKS*EXCEPTION_STKSZ];
162 
check_efer(void)163 void check_efer(void)
164 {
165 	unsigned long efer;
166 	rdmsrl(MSR_EFER, efer);
167 	if (!(efer & EFER_NX) || do_not_nx) {
168 		__supported_pte_mask &= ~_PAGE_NX;
169 	} else {
170 		__supported_pte_mask |= _PAGE_NX;
171 	}
172 }
173 
174 /*
175  * cpu_init() initializes state that is per-CPU. Some data is already
176  * initialized (naturally) in the bootstrap process, such as the GDT
177  * and IDT. We reload them nevertheless, this function acts as a
178  * 'CPU state barrier', nothing should get across.
179  * A lot of state is already set up in PDA init.
180  */
cpu_init(void)181 void __init cpu_init (void)
182 {
183 #ifdef CONFIG_SMP
184 	int nr = stack_smp_processor_id();
185 #else
186 	int nr = smp_processor_id();
187 #endif
188 	struct tss_struct * t = &init_tss[nr];
189 	unsigned long v;
190 	unsigned long estack;
191 
192 	/* CPU 0 is initialised in head64.c */
193 	if (nr != 0)
194 		pda_init(nr);
195 
196 	if (test_and_set_bit(nr, &cpu_initialized))
197 		panic("CPU#%d already initialized!\n", nr);
198 
199 	printk("Initializing CPU#%d\n", nr);
200 
201 	clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
202 
203 	gdt_descr.size = NR_CPUS * sizeof(struct per_cpu_gdt) + __GDT_HEAD_SIZE;
204 
205 	__asm__ __volatile__("lgdt %0": "=m" (gdt_descr));
206 	__asm__ __volatile__("lidt %0": "=m" (idt_descr));
207 
208 	/*
209 	 * Delete NT
210 	 */
211 
212 	asm volatile("pushfq ; popq %%rax ; btr $14,%%rax ; pushq %%rax ; popfq" ::: "eax");
213 
214 	syscall_init();
215 
216 	check_efer();
217 
218 	t->io_map_base = INVALID_IO_BITMAP_OFFSET;
219 	memset(t->io_bitmap, 0xff, sizeof(t->io_bitmap));
220 
221 	/* Flags to clear on syscall */
222 	wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE);
223 
224 	wrmsrl(MSR_FS_BASE, 0);
225 	wrmsrl(MSR_KERNEL_GS_BASE, 0);
226 	barrier();
227 
228 	/*
229 	 * set up and load the per-CPU TSS
230 	 */
231 	estack = (unsigned long)boot_exception_stacks + EXCEPTION_STKSZ;
232 	for (v = 0; v < N_EXCEPTION_STACKS; v++) {
233 		if (nr == 0) {
234 			t->ist[v] = estack;
235 			estack += EXCEPTION_STKSZ;
236 		} else {
237 			estack = __get_free_pages(GFP_ATOMIC, EXCEPTION_STK_ORDER);
238 			if(!estack)
239 				panic("Can't allocate exception stack %lu for CPU %d\n", v, nr);
240 			t->ist[v] = estack + EXCEPTION_STKSZ;
241 		}
242 	}
243 
244 	atomic_inc(&init_mm.mm_count);
245 	current->active_mm = &init_mm;
246 	if(current->mm)
247 		BUG();
248 	enter_lazy_tlb(&init_mm, current, nr);
249 
250 	set_tss_desc(nr, t);
251 	load_TR(nr);
252 	load_LDT(&init_mm);
253 
254 	/*
255 	 * Clear all 6 debug registers:
256 	 */
257 
258 	set_debug(0UL, 0);
259 	set_debug(0UL, 1);
260 	set_debug(0UL, 2);
261 	set_debug(0UL, 3);
262 	set_debug(0UL, 6);
263 	set_debug(0UL, 7);
264 
265 	/*
266 	 * Force FPU initialization:
267 	 */
268 	current->flags &= ~PF_USEDFPU;
269 	current->used_math = 0;
270 	stts();
271 }
272