1 /*
2 * X86-64 specific CPU setup.
3 * Copyright (C) 1995 Linus Torvalds
4 * Copyright 2001, 2002 SuSE Labs / Andi Kleen.
5 * See setup.c for older changelog.
6 * $Id: setup64.c,v 1.27 2004/02/27 18:30:19 ak Exp $
7 */
8 #include <linux/config.h>
9 #include <linux/init.h>
10 #include <linux/kernel.h>
11 #include <linux/sched.h>
12 #include <linux/string.h>
13 #include <asm/pda.h>
14 #include <asm/pda.h>
15 #include <asm/processor.h>
16 #include <asm/desc.h>
17 #include <asm/bitops.h>
18 #include <asm/atomic.h>
19 #include <asm/mmu_context.h>
20 #include <asm/proto.h>
21 #include <asm/mman.h>
22
23 char x86_boot_params[2048] __initdata = {0,};
24
25 static unsigned long cpu_initialized __initdata = 0;
26
27 struct x8664_pda cpu_pda[NR_CPUS] __cacheline_aligned;
28
29 extern void system_call(void);
30 extern void ia32_cstar_target(void);
31
32 struct desc_ptr gdt_descr = { 0 /* filled in */, (unsigned long) gdt_table };
33 struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table };
34
35 /* When you change the default make sure the no EFER path below sets the
36 correct flags everywhere. */
37 unsigned long __supported_pte_mask = ~0UL;
38 static int do_not_nx __initdata = 0;
39 unsigned long vm_stack_flags = __VM_STACK_FLAGS;
40 unsigned long vm_stack_flags32 = __VM_STACK_FLAGS;
41 unsigned long vm_data_default_flags = __VM_DATA_DEFAULT_FLAGS;
42 unsigned long vm_data_default_flags32 = __VM_DATA_DEFAULT_FLAGS;
43 unsigned long vm_force_exec32 = PROT_EXEC;
44
45 char boot_cpu_stack[IRQSTACKSIZE] __cacheline_aligned;
46
47 /* noexec=on|off
48
49 on Enable
50 off Disable
51 noforce (default) Don't enable by default for heap/stack/data,
52 but allow PROT_EXEC to be effective
53
54 */
55
nonx_setup(char * str)56 int __init nonx_setup(char *str)
57 {
58 if (!strncmp(str, "on",3)) {
59 __supported_pte_mask |= _PAGE_NX;
60 do_not_nx = 0;
61 vm_data_default_flags &= ~VM_EXEC;
62 vm_stack_flags &= ~VM_EXEC;
63 } else if (!strncmp(str, "noforce",7) || !strncmp(str,"off",3)) {
64 do_not_nx = (str[0] == 'o');
65 if (do_not_nx)
66 __supported_pte_mask &= ~_PAGE_NX;
67 vm_data_default_flags |= VM_EXEC;
68 vm_stack_flags |= VM_EXEC;
69 }
70 return 1;
71 }
72
73 /* noexec32=opt{,opt}
74
75 Control the no exec default for 32bit processes. Can be also overwritten
76 per executable using ELF header flags (e.g. needed for the X server)
77 Requires noexec=on or noexec=noforce to be effective.
78
79 Valid options:
80 all,on Heap,stack,data is non executable.
81 off (default) Heap,stack,data is executable
82 stack Stack is non executable, heap/data is.
83 force Don't imply PROT_EXEC for PROT_READ
84 compat (default) Imply PROT_EXEC for PROT_READ
85
86 */
nonx32_setup(char * str)87 static int __init nonx32_setup(char *str)
88 {
89 char *s;
90 while ((s = strsep(&str, ",")) != NULL) {
91 if (!strcmp(s, "all") || !strcmp(s,"on")) {
92 vm_data_default_flags32 &= ~VM_EXEC;
93 vm_stack_flags32 &= ~VM_EXEC;
94 } else if (!strcmp(s, "off")) {
95 vm_data_default_flags32 |= VM_EXEC;
96 vm_stack_flags32 |= VM_EXEC;
97 } else if (!strcmp(s, "stack")) {
98 vm_data_default_flags32 |= VM_EXEC;
99 vm_stack_flags32 &= ~VM_EXEC;
100 } else if (!strcmp(s, "force")) {
101 vm_force_exec32 = 0;
102 } else if (!strcmp(s, "compat")) {
103 vm_force_exec32 = PROT_EXEC;
104 }
105 }
106 return 1;
107 }
108
109 __setup("noexec=", nonx_setup);
110 __setup("noexec32=", nonx32_setup);
111
pda_init(int cpu)112 void pda_init(int cpu)
113 {
114 pml4_t *level4;
115
116 if (cpu == 0) {
117 /* others are initialized in smpboot.c */
118 cpu_pda[cpu].pcurrent = init_tasks[cpu];
119 cpu_pda[cpu].irqstackptr = boot_cpu_stack;
120 level4 = init_level4_pgt;
121 } else {
122 cpu_pda[cpu].irqstackptr = (char *)
123 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
124 if (!cpu_pda[cpu].irqstackptr)
125 panic("cannot allocate irqstack for cpu %d\n", cpu);
126 level4 = (pml4_t *)__get_free_pages(GFP_ATOMIC, 0);
127 }
128 if (!level4)
129 panic("Cannot allocate top level page for cpu %d", cpu);
130
131 cpu_pda[cpu].level4_pgt = (unsigned long *)level4;
132 if (level4 != init_level4_pgt)
133 memcpy(level4, &init_level4_pgt, PAGE_SIZE);
134 set_pml4(level4 + 510,
135 mk_kernel_pml4(__pa_symbol(boot_vmalloc_pgt), KERNPG_TABLE));
136 asm volatile("movq %0,%%cr3" :: "r" (__pa(level4)));
137
138 cpu_pda[cpu].irqstackptr += IRQSTACKSIZE-64;
139 cpu_pda[cpu].cpunumber = cpu;
140 cpu_pda[cpu].irqcount = -1;
141
142 asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
143 wrmsrl(MSR_GS_BASE, cpu_pda + cpu);
144 }
145
syscall_init(void)146 void syscall_init(void)
147 {
148 /*
149 * LSTAR and STAR live in a bit strange symbiosis.
150 * They both write to the same internal register. STAR allows to set CS/DS
151 * but only a 32bit target. LSTAR sets the 64bit rip.
152 */
153 wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
154 wrmsrl(MSR_LSTAR, system_call);
155
156 #ifdef CONFIG_IA32_EMULATION
157 wrmsrl(MSR_CSTAR, ia32_cstar_target);
158 #endif
159 }
160
161 char boot_exception_stacks[N_EXCEPTION_STACKS*EXCEPTION_STKSZ];
162
check_efer(void)163 void check_efer(void)
164 {
165 unsigned long efer;
166 rdmsrl(MSR_EFER, efer);
167 if (!(efer & EFER_NX) || do_not_nx) {
168 __supported_pte_mask &= ~_PAGE_NX;
169 } else {
170 __supported_pte_mask |= _PAGE_NX;
171 }
172 }
173
174 /*
175 * cpu_init() initializes state that is per-CPU. Some data is already
176 * initialized (naturally) in the bootstrap process, such as the GDT
177 * and IDT. We reload them nevertheless, this function acts as a
178 * 'CPU state barrier', nothing should get across.
179 * A lot of state is already set up in PDA init.
180 */
cpu_init(void)181 void __init cpu_init (void)
182 {
183 #ifdef CONFIG_SMP
184 int nr = stack_smp_processor_id();
185 #else
186 int nr = smp_processor_id();
187 #endif
188 struct tss_struct * t = &init_tss[nr];
189 unsigned long v;
190 unsigned long estack;
191
192 /* CPU 0 is initialised in head64.c */
193 if (nr != 0)
194 pda_init(nr);
195
196 if (test_and_set_bit(nr, &cpu_initialized))
197 panic("CPU#%d already initialized!\n", nr);
198
199 printk("Initializing CPU#%d\n", nr);
200
201 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
202
203 gdt_descr.size = NR_CPUS * sizeof(struct per_cpu_gdt) + __GDT_HEAD_SIZE;
204
205 __asm__ __volatile__("lgdt %0": "=m" (gdt_descr));
206 __asm__ __volatile__("lidt %0": "=m" (idt_descr));
207
208 /*
209 * Delete NT
210 */
211
212 asm volatile("pushfq ; popq %%rax ; btr $14,%%rax ; pushq %%rax ; popfq" ::: "eax");
213
214 syscall_init();
215
216 check_efer();
217
218 t->io_map_base = INVALID_IO_BITMAP_OFFSET;
219 memset(t->io_bitmap, 0xff, sizeof(t->io_bitmap));
220
221 /* Flags to clear on syscall */
222 wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE);
223
224 wrmsrl(MSR_FS_BASE, 0);
225 wrmsrl(MSR_KERNEL_GS_BASE, 0);
226 barrier();
227
228 /*
229 * set up and load the per-CPU TSS
230 */
231 estack = (unsigned long)boot_exception_stacks + EXCEPTION_STKSZ;
232 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
233 if (nr == 0) {
234 t->ist[v] = estack;
235 estack += EXCEPTION_STKSZ;
236 } else {
237 estack = __get_free_pages(GFP_ATOMIC, EXCEPTION_STK_ORDER);
238 if(!estack)
239 panic("Can't allocate exception stack %lu for CPU %d\n", v, nr);
240 t->ist[v] = estack + EXCEPTION_STKSZ;
241 }
242 }
243
244 atomic_inc(&init_mm.mm_count);
245 current->active_mm = &init_mm;
246 if(current->mm)
247 BUG();
248 enter_lazy_tlb(&init_mm, current, nr);
249
250 set_tss_desc(nr, t);
251 load_TR(nr);
252 load_LDT(&init_mm);
253
254 /*
255 * Clear all 6 debug registers:
256 */
257
258 set_debug(0UL, 0);
259 set_debug(0UL, 1);
260 set_debug(0UL, 2);
261 set_debug(0UL, 3);
262 set_debug(0UL, 6);
263 set_debug(0UL, 7);
264
265 /*
266 * Force FPU initialization:
267 */
268 current->flags &= ~PF_USEDFPU;
269 current->used_math = 0;
270 stts();
271 }
272