1 /*
2 * linux/arch/x86_64/mm/init.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
6 * Copyright (C) 2002 Andi Kleen <ak@suse.de>
7 */
8
9 #include <linux/config.h>
10 #include <linux/signal.h>
11 #include <linux/sched.h>
12 #include <linux/kernel.h>
13 #include <linux/errno.h>
14 #include <linux/string.h>
15 #include <linux/types.h>
16 #include <linux/ptrace.h>
17 #include <linux/mman.h>
18 #include <linux/mm.h>
19 #include <linux/swap.h>
20 #include <linux/smp.h>
21 #include <linux/init.h>
22 #include <linux/blk.h>
23 #include <linux/pagemap.h>
24 #include <linux/bootmem.h>
25
26 #include <asm/processor.h>
27 #include <asm/system.h>
28 #include <asm/uaccess.h>
29 #include <asm/pgtable.h>
30 #include <asm/pgalloc.h>
31 #include <asm/dma.h>
32 #include <asm/fixmap.h>
33 #include <asm/e820.h>
34 #include <asm/apic.h>
35 #include <asm/tlb.h>
36 #include <asm/pda.h>
37 #include <asm/mmu_context.h>
38 #include <asm/proto.h>
39
40 mmu_gather_t mmu_gathers[NR_CPUS];
41
42 static unsigned long totalram_pages;
43
do_check_pgt_cache(int low,int high)44 int do_check_pgt_cache(int low, int high)
45 {
46 int freed = 0;
47 if(read_pda(pgtable_cache_sz) > high) {
48 do {
49 if (read_pda(pgd_quick)) {
50 pgd_free_slow(pgd_alloc_one_fast());
51 freed++;
52 }
53 if (read_pda(pmd_quick)) {
54 pmd_free_slow(pmd_alloc_one_fast(NULL, 0));
55 freed++;
56 }
57 if (read_pda(pte_quick)) {
58 pte_free_slow(pte_alloc_one_fast(NULL, 0));
59 freed++;
60 }
61 } while(read_pda(pgtable_cache_sz) > low);
62 }
63 return freed;
64 }
65
66 #ifndef CONFIG_DISCONTIGMEM
67 /*
68 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
69 * physical space so we can cache the place of the first one and move
70 * around without checking the pgd every time.
71 */
72
show_mem(void)73 void show_mem(void)
74 {
75 int i, total = 0, reserved = 0;
76 int shared = 0, cached = 0;
77
78 printk("Mem-info:\n");
79 show_free_areas();
80 printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
81 i = max_mapnr;
82 while (i-- > 0) {
83 total++;
84 if (PageReserved(mem_map+i))
85 reserved++;
86 else if (PageSwapCache(mem_map+i))
87 cached++;
88 else if (page_count(mem_map+i))
89 shared += page_count(mem_map+i) - 1;
90 }
91 printk("%d pages of RAM\n", total);
92 printk("%d reserved pages\n",reserved);
93 printk("%d pages shared\n",shared);
94 printk("%d pages swap cached\n",cached);
95 printk("%ld pages in page table cache\n",read_pda(pgtable_cache_sz));
96 show_buffers();
97 }
98 #endif
99
100 /* References to section boundaries */
101
102 extern char _text, _etext, _edata, __bss_start, _end;
103 extern char __init_begin, __init_end;
104
105 int after_bootmem;
106
spp_getpage(void)107 static void *spp_getpage(void)
108 {
109 void *ptr;
110 if (after_bootmem)
111 ptr = (void *) get_free_page(GFP_ATOMIC);
112 else
113 ptr = alloc_bootmem_low_pages(PAGE_SIZE);
114 if (!ptr)
115 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
116 return ptr;
117 }
118
set_pte_phys(unsigned long vaddr,unsigned long phys,pgprot_t prot)119 static void set_pte_phys(unsigned long vaddr,
120 unsigned long phys, pgprot_t prot)
121 {
122 pml4_t *level4;
123 pgd_t *pgd;
124 pmd_t *pmd;
125 pte_t *pte;
126
127 level4 = pml4_offset_k(vaddr);
128 if (pml4_none(*level4)) {
129 printk("PML4 FIXMAP MISSING, it should be setup in head.S!\n");
130 return;
131 }
132 pgd = level3_offset_k(level4, vaddr);
133 if (pgd_none(*pgd)) {
134 pmd = (pmd_t *) spp_getpage();
135 set_pgd(pgd, __pgd(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
136 if (pmd != pmd_offset(pgd, 0)) {
137 printk("PAGETABLE BUG #01!\n");
138 return;
139 }
140 }
141 pmd = pmd_offset(pgd, vaddr);
142 if (pmd_none(*pmd)) {
143 pte = (pte_t *) spp_getpage();
144 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
145 if (pte != pte_offset(pmd, 0)) {
146 printk("PAGETABLE BUG #02!\n");
147 return;
148 }
149 }
150 pte = pte_offset(pmd, vaddr);
151 set_pte(pte, mk_pte_phys(phys, prot));
152
153 /*
154 * It's enough to flush this one mapping.
155 * (PGE mappings get flushed as well)
156 */
157 __flush_tlb_one(vaddr);
158 }
159
__set_fixmap(enum fixed_addresses idx,unsigned long phys,pgprot_t prot)160 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
161 {
162 unsigned long address = __fix_to_virt(idx);
163
164 if (idx >= __end_of_fixed_addresses) {
165 printk("Invalid __set_fixmap\n");
166 return;
167 }
168 set_pte_phys(address, phys, prot);
169 }
170
171 extern pmd_t temp_boot_pmds[];
172
173 unsigned long __initdata table_start, table_end;
174
175 static struct temp_map {
176 pmd_t *pmd;
177 void *address;
178 int allocated;
179 } temp_mappings[] __initdata = {
180 { &temp_boot_pmds[0], (void *)(40UL * 1024 * 1024) },
181 { &temp_boot_pmds[1], (void *)(42UL * 1024 * 1024) },
182 {}
183 };
184
alloc_low_page(int * index,unsigned long * phys)185 static __init void *alloc_low_page(int *index, unsigned long *phys)
186 {
187 struct temp_map *ti;
188 int i;
189 unsigned long pfn = table_end++, paddr;
190 void *adr;
191
192 if (table_end >= end_pfn_map)
193 panic("alloc_low_page: ran out of page mappings");
194 for (i = 0; temp_mappings[i].allocated; i++) {
195 if (!temp_mappings[i].pmd)
196 panic("alloc_low_page: ran out of temp mappings");
197 }
198 ti = &temp_mappings[i];
199 paddr = (pfn << PAGE_SHIFT) & PMD_MASK;
200 set_pmd(ti->pmd, __pmd(paddr | _KERNPG_TABLE | _PAGE_PSE));
201 ti->allocated = 1;
202 __flush_tlb();
203 adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK);
204 *index = i;
205 *phys = pfn * PAGE_SIZE;
206 return adr;
207 }
208
unmap_low_page(int i)209 static __init void unmap_low_page(int i)
210 {
211 struct temp_map *ti = &temp_mappings[i];
212 set_pmd(ti->pmd, __pmd(0));
213 ti->allocated = 0;
214 }
215
phys_pgd_init(pgd_t * pgd,unsigned long address,unsigned long end)216 static void __init phys_pgd_init(pgd_t *pgd, unsigned long address, unsigned long end)
217 {
218 long i, j;
219
220 i = pgd_index(address);
221 pgd = pgd + i;
222 for (; i < PTRS_PER_PGD; pgd++, i++) {
223 int map;
224 unsigned long paddr, pmd_phys;
225 pmd_t *pmd;
226
227 paddr = (address & PML4_MASK) + i*PGDIR_SIZE;
228 if (paddr >= end) {
229 for (; i < PTRS_PER_PGD; i++, pgd++)
230 set_pgd(pgd, __pgd(0));
231 break;
232 }
233
234 if (!e820_mapped(paddr, paddr+PGDIR_SIZE, 0)) {
235 set_pgd(pgd, __pgd(0));
236 continue;
237 }
238
239 pmd = alloc_low_page(&map, &pmd_phys);
240 set_pgd(pgd, __pgd(pmd_phys | _KERNPG_TABLE));
241 for (j = 0; j < PTRS_PER_PMD; pmd++, j++ , paddr += PMD_SIZE) {
242 unsigned long pe;
243
244 if (paddr >= end) {
245 for (; j < PTRS_PER_PMD; j++, pmd++)
246 set_pmd(pmd, __pmd(0));
247 break;
248 }
249 pe = _PAGE_PSE | _KERNPG_TABLE | _PAGE_NX | _PAGE_GLOBAL | paddr;
250 pe &= __supported_pte_mask;
251 set_pmd(pmd, __pmd(pe));
252 }
253 unmap_low_page(map);
254 }
255 __flush_tlb();
256 }
257
258 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
259 This runs before bootmem is initialized and gets pages directly from the
260 physical memory. To access them they are temporarily mapped. */
init_memory_mapping(void)261 void __init init_memory_mapping(void)
262 {
263 unsigned long adr;
264 unsigned long end;
265 unsigned long next;
266 unsigned long pgds, pmds, tables;
267
268 end = end_pfn_map << PAGE_SHIFT;
269
270 /*
271 * Find space for the kernel direct mapping tables.
272 * Later we should allocate these tables in the local node of the memory
273 * mapped. Unfortunately this is done currently before the nodes are
274 * discovered.
275 */
276
277 pgds = (end + PGDIR_SIZE - 1) >> PGDIR_SHIFT;
278 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
279 tables = round_up(pgds*8, PAGE_SIZE) + round_up(pmds * 8, PAGE_SIZE);
280
281 /* Direct mapping must currently fit below the kernel in the first MB.
282 This is because we have no way to tell the later passes to not reuse
283 the memory, until bootmem is initialised */
284 /* Should limit MAXMEM for this */
285 table_start = find_e820_area(/*0*/ 0x8000, __pa_symbol(&_text), tables);
286 if (table_start == -1UL)
287 panic("Cannot find space for the kernel page tables");
288
289 table_start >>= PAGE_SHIFT;
290 table_end = table_start;
291
292 end += __PAGE_OFFSET; /* turn virtual */
293
294 for (adr = PAGE_OFFSET; adr < end; adr = next) {
295 int map;
296 unsigned long pgd_phys;
297 pgd_t *pgd = alloc_low_page(&map, &pgd_phys);
298 next = adr + PML4_SIZE;
299 if (next > end)
300 next = end;
301
302 phys_pgd_init(pgd, adr-PAGE_OFFSET, next-PAGE_OFFSET);
303 set_pml4(init_level4_pgt + pml4_index(adr),
304 mk_kernel_pml4(pgd_phys, KERNPG_TABLE));
305 unmap_low_page(map);
306 }
307 asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
308 __flush_tlb_all();
309 printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end,
310 table_start<<PAGE_SHIFT,
311 table_end<<PAGE_SHIFT);
312 }
313
zap_low_mappings(void)314 void __init zap_low_mappings (void)
315 {
316 int i;
317 for (i = 0; i < NR_CPUS; i++) {
318 if (cpu_pda[i].level4_pgt)
319 cpu_pda[i].level4_pgt[0] = 0;
320 }
321
322 flush_tlb_all();
323 }
324
325 #ifndef CONFIG_DISCONTIGMEM
paging_init(void)326 void __init paging_init(void)
327 {
328 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
329 unsigned int max_dma;
330
331 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
332 if (end_pfn < max_dma)
333 zones_size[ZONE_DMA] = end_pfn;
334 else {
335 zones_size[ZONE_DMA] = max_dma;
336 zones_size[ZONE_NORMAL] = end_pfn - max_dma;
337 }
338 free_area_init(zones_size);
339 }
340
page_is_ram(unsigned long pagenr)341 static inline int page_is_ram (unsigned long pagenr)
342 {
343 int i;
344
345 for (i = 0; i < e820.nr_map; i++) {
346 unsigned long addr, end;
347
348 if (e820.map[i].type != E820_RAM) /* not usable memory */
349 continue;
350 /*
351 * !!!FIXME!!! Some BIOSen report areas as RAM that
352 * are not. Notably the 640->1Mb area. We need a sanity
353 * check here.
354 */
355 addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
356 end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
357 if ((pagenr >= addr) && (pagenr < end))
358 return 1;
359 }
360 return 0;
361 }
362 #endif
363
mem_init(void)364 void __init mem_init(void)
365 {
366 unsigned long codesize, reservedpages, datasize, initsize;
367 unsigned long tmp;
368
369 max_mapnr = end_pfn;
370 num_physpages = end_pfn; /* XXX not true because of holes */
371 high_memory = (void *) __va(end_pfn << PAGE_SHIFT);
372
373 /* clear the zero-page */
374 memset(empty_zero_page, 0, PAGE_SIZE);
375
376 reservedpages = 0;
377
378 /* this will put all low memory onto the freelists */
379 #ifdef CONFIG_DISCONTIGMEM
380 totalram_pages += numa_free_all_bootmem();
381 tmp = 0;
382 /* should count reserved pages here for all nodes */
383 #else
384 if (!mem_map) BUG();
385
386 totalram_pages += free_all_bootmem();
387
388 for (tmp = 0; tmp < end_pfn; tmp++)
389 /*
390 * Only count reserved RAM pages
391 */
392 if (page_is_ram(tmp) && PageReserved(mem_map+tmp))
393 reservedpages++;
394 #endif
395
396 after_bootmem = 1;
397
398 codesize = (unsigned long) &_etext - (unsigned long) &_text;
399 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
400 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
401
402 printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
403 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
404 max_mapnr << (PAGE_SHIFT-10),
405 codesize >> 10,
406 reservedpages << (PAGE_SHIFT-10),
407 datasize >> 10,
408 initsize >> 10);
409
410 /*
411 * Subtle. SMP is doing its boot stuff late (because it has to
412 * fork idle threads) - but it also needs low mappings for the
413 * protected-mode entry to work. We zap these entries only after
414 * the WP-bit has been tested.
415 */
416 #ifndef CONFIG_SMP
417 zap_low_mappings();
418 #endif
419 }
420
421 /* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
422 from the CPU leading to inconsistent cache lines. address and size
423 must be aligned to 2MB boundaries.
424 Does nothing when the mapping doesn't exist. */
clear_kernel_mapping(unsigned long address,unsigned long size)425 void __init clear_kernel_mapping(unsigned long address, unsigned long size)
426 {
427 unsigned long end = address + size;
428
429 BUG_ON(address & ~LARGE_PAGE_MASK);
430 BUG_ON(size & ~LARGE_PAGE_MASK);
431
432 for (; address < end; address += LARGE_PAGE_SIZE) {
433 pgd_t *pgd = pgd_offset_k(address);
434 if (!pgd || pgd_none(*pgd))
435 continue;
436 pmd_t *pmd = pmd_offset(pgd, address);
437 if (!pmd || pmd_none(*pmd))
438 continue;
439 if (0 == (pmd_val(*pmd) & _PAGE_PSE)) {
440 /* Could handle this, but it should not happen currently. */
441 printk(KERN_ERR
442 "clear_kernel_mapping: mapping has been split. will leak memory\n");
443 pmd_ERROR(*pmd);
444 }
445 set_pmd(pmd, __pmd(0));
446 }
447 __flush_tlb_all();
448 }
449
free_initmem(void)450 void free_initmem(void)
451 {
452 void *addr;
453
454 addr = (&__init_begin);
455 for (; addr < (void *)(&__init_end); addr += PAGE_SIZE) {
456 ClearPageReserved(virt_to_page(addr));
457 set_page_count(virt_to_page(addr), 1);
458 #ifdef CONFIG_INIT_DEBUG
459 memset((unsigned long)addr & ~(PAGE_SIZE-1), 0xcc, PAGE_SIZE);
460 #endif
461 free_page((unsigned long)addr);
462 totalram_pages++;
463 }
464 printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10);
465 }
466
467 #ifdef CONFIG_BLK_DEV_INITRD
free_initrd_mem(unsigned long start,unsigned long end)468 void free_initrd_mem(unsigned long start, unsigned long end)
469 {
470 if (start < (unsigned long)&_end)
471 return;
472 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
473 for (; start < end; start += PAGE_SIZE) {
474 ClearPageReserved(virt_to_page(start));
475 set_page_count(virt_to_page(start), 1);
476 free_page(start);
477 totalram_pages++;
478 }
479 }
480 #endif
481
si_meminfo(struct sysinfo * val)482 void si_meminfo(struct sysinfo *val)
483 {
484 val->totalram = totalram_pages;
485 val->sharedram = 0;
486 val->freeram = nr_free_pages();
487 val->bufferram = atomic_read(&buffermem_pages);
488 val->totalhigh = 0;
489 val->freehigh = nr_free_highpages();
490 val->mem_unit = PAGE_SIZE;
491 return;
492 }
493
reserve_bootmem_generic(unsigned long phys,unsigned len)494 void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
495 {
496 /* Should check here against the e820 map to avoid double free */
497 #ifdef CONFIG_DISCONTIGMEM
498 reserve_bootmem_node(NODE_DATA(phys_to_nid(phys)), phys, len);
499 #else
500 reserve_bootmem(phys, len);
501 #endif
502 }
503
504
free_bootmem_generic(unsigned long phys,unsigned len)505 void free_bootmem_generic(unsigned long phys, unsigned len)
506 {
507 #ifdef CONFIG_DISCONTIGMEM
508 free_bootmem_node(NODE_DATA(phys_to_nid(phys)), phys, len);
509 #else
510 free_bootmem(phys, len);
511 #endif
512 }
513