1 /*
2  *  linux/arch/x86_64/mm/init.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
6  *  Copyright (C) 2002  Andi Kleen <ak@suse.de>
7  */
8 
9 #include <linux/config.h>
10 #include <linux/signal.h>
11 #include <linux/sched.h>
12 #include <linux/kernel.h>
13 #include <linux/errno.h>
14 #include <linux/string.h>
15 #include <linux/types.h>
16 #include <linux/ptrace.h>
17 #include <linux/mman.h>
18 #include <linux/mm.h>
19 #include <linux/swap.h>
20 #include <linux/smp.h>
21 #include <linux/init.h>
22 #include <linux/blk.h>
23 #include <linux/pagemap.h>
24 #include <linux/bootmem.h>
25 
26 #include <asm/processor.h>
27 #include <asm/system.h>
28 #include <asm/uaccess.h>
29 #include <asm/pgtable.h>
30 #include <asm/pgalloc.h>
31 #include <asm/dma.h>
32 #include <asm/fixmap.h>
33 #include <asm/e820.h>
34 #include <asm/apic.h>
35 #include <asm/tlb.h>
36 #include <asm/pda.h>
37 #include <asm/mmu_context.h>
38 #include <asm/proto.h>
39 
40 mmu_gather_t mmu_gathers[NR_CPUS];
41 
42 static unsigned long totalram_pages;
43 
do_check_pgt_cache(int low,int high)44 int do_check_pgt_cache(int low, int high)
45 {
46 	int freed = 0;
47 	if(read_pda(pgtable_cache_sz) > high) {
48 		do {
49 			if (read_pda(pgd_quick)) {
50 				pgd_free_slow(pgd_alloc_one_fast());
51 				freed++;
52 			}
53 			if (read_pda(pmd_quick)) {
54 				pmd_free_slow(pmd_alloc_one_fast(NULL, 0));
55 				freed++;
56 			}
57 			if (read_pda(pte_quick)) {
58 				pte_free_slow(pte_alloc_one_fast(NULL, 0));
59 				freed++;
60 			}
61 		} while(read_pda(pgtable_cache_sz) > low);
62 	}
63 	return freed;
64 }
65 
66 #ifndef CONFIG_DISCONTIGMEM
67 /*
68  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
69  * physical space so we can cache the place of the first one and move
70  * around without checking the pgd every time.
71  */
72 
show_mem(void)73 void show_mem(void)
74 {
75 	int i, total = 0, reserved = 0;
76 	int shared = 0, cached = 0;
77 
78 	printk("Mem-info:\n");
79 	show_free_areas();
80 	printk("Free swap:       %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
81 	i = max_mapnr;
82 	while (i-- > 0) {
83 		total++;
84 		if (PageReserved(mem_map+i))
85 			reserved++;
86 		else if (PageSwapCache(mem_map+i))
87 			cached++;
88 		else if (page_count(mem_map+i))
89 			shared += page_count(mem_map+i) - 1;
90 	}
91 	printk("%d pages of RAM\n", total);
92 	printk("%d reserved pages\n",reserved);
93 	printk("%d pages shared\n",shared);
94 	printk("%d pages swap cached\n",cached);
95 	printk("%ld pages in page table cache\n",read_pda(pgtable_cache_sz));
96 	show_buffers();
97 }
98 #endif
99 
100 /* References to section boundaries */
101 
102 extern char _text, _etext, _edata, __bss_start, _end;
103 extern char __init_begin, __init_end;
104 
105 int after_bootmem;
106 
spp_getpage(void)107 static void *spp_getpage(void)
108 {
109 	void *ptr;
110 	if (after_bootmem)
111 		ptr = (void *) get_free_page(GFP_ATOMIC);
112 	else
113 		ptr = alloc_bootmem_low_pages(PAGE_SIZE);
114 	if (!ptr)
115 		panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
116 	return ptr;
117 }
118 
set_pte_phys(unsigned long vaddr,unsigned long phys,pgprot_t prot)119 static void set_pte_phys(unsigned long vaddr,
120 			 unsigned long phys, pgprot_t prot)
121 {
122 	pml4_t *level4;
123 	pgd_t *pgd;
124 	pmd_t *pmd;
125 	pte_t *pte;
126 
127 	level4 = pml4_offset_k(vaddr);
128 	if (pml4_none(*level4)) {
129 		printk("PML4 FIXMAP MISSING, it should be setup in head.S!\n");
130 		return;
131 	}
132 	pgd = level3_offset_k(level4, vaddr);
133 	if (pgd_none(*pgd)) {
134 		pmd = (pmd_t *) spp_getpage();
135 		set_pgd(pgd, __pgd(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
136 		if (pmd != pmd_offset(pgd, 0)) {
137 			printk("PAGETABLE BUG #01!\n");
138 			return;
139 		}
140 	}
141 	pmd = pmd_offset(pgd, vaddr);
142 	if (pmd_none(*pmd)) {
143 		pte = (pte_t *) spp_getpage();
144 		set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
145 		if (pte != pte_offset(pmd, 0)) {
146 			printk("PAGETABLE BUG #02!\n");
147 			return;
148 		}
149 	}
150 	pte = pte_offset(pmd, vaddr);
151 	set_pte(pte, mk_pte_phys(phys, prot));
152 
153 	/*
154 	 * It's enough to flush this one mapping.
155 	 * (PGE mappings get flushed as well)
156 	 */
157 	__flush_tlb_one(vaddr);
158 }
159 
__set_fixmap(enum fixed_addresses idx,unsigned long phys,pgprot_t prot)160 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
161 {
162 	unsigned long address = __fix_to_virt(idx);
163 
164 	if (idx >= __end_of_fixed_addresses) {
165 		printk("Invalid __set_fixmap\n");
166 		return;
167 	}
168 	set_pte_phys(address, phys, prot);
169 }
170 
171 extern pmd_t temp_boot_pmds[];
172 
173 unsigned long __initdata table_start, table_end;
174 
175 static  struct temp_map {
176 	pmd_t *pmd;
177 	void  *address;
178 	int    allocated;
179 } temp_mappings[] __initdata = {
180 	{ &temp_boot_pmds[0], (void *)(40UL * 1024 * 1024) },
181 	{ &temp_boot_pmds[1], (void *)(42UL * 1024 * 1024) },
182 	{}
183 };
184 
alloc_low_page(int * index,unsigned long * phys)185 static __init void *alloc_low_page(int *index, unsigned long *phys)
186 {
187 	struct temp_map *ti;
188 	int i;
189 	unsigned long pfn = table_end++, paddr;
190 	void *adr;
191 
192 	if (table_end >= end_pfn_map)
193 		panic("alloc_low_page: ran out of page mappings");
194 	for (i = 0; temp_mappings[i].allocated; i++) {
195 		if (!temp_mappings[i].pmd)
196 			panic("alloc_low_page: ran out of temp mappings");
197 	}
198 	ti = &temp_mappings[i];
199 	paddr = (pfn << PAGE_SHIFT) & PMD_MASK;
200 	set_pmd(ti->pmd, __pmd(paddr | _KERNPG_TABLE | _PAGE_PSE));
201 	ti->allocated = 1;
202 	__flush_tlb();
203 	adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK);
204 	*index = i;
205 	*phys  = pfn * PAGE_SIZE;
206 	return adr;
207 }
208 
unmap_low_page(int i)209 static __init void unmap_low_page(int i)
210 {
211 	struct temp_map *ti = &temp_mappings[i];
212 	set_pmd(ti->pmd, __pmd(0));
213 	ti->allocated = 0;
214 }
215 
phys_pgd_init(pgd_t * pgd,unsigned long address,unsigned long end)216 static void __init phys_pgd_init(pgd_t *pgd, unsigned long address, unsigned long end)
217 {
218 	long i, j;
219 
220 	i = pgd_index(address);
221 	pgd = pgd + i;
222 	for (; i < PTRS_PER_PGD; pgd++, i++) {
223 		int map;
224 		unsigned long paddr, pmd_phys;
225 		pmd_t *pmd;
226 
227 		paddr = (address & PML4_MASK) + i*PGDIR_SIZE;
228 		if (paddr >= end) {
229 			for (; i < PTRS_PER_PGD; i++, pgd++)
230 				set_pgd(pgd, __pgd(0));
231 			break;
232 		}
233 
234 		if (!e820_mapped(paddr, paddr+PGDIR_SIZE, 0)) {
235 			set_pgd(pgd, __pgd(0));
236 			continue;
237 		}
238 
239 		pmd = alloc_low_page(&map, &pmd_phys);
240 		set_pgd(pgd, __pgd(pmd_phys | _KERNPG_TABLE));
241 		for (j = 0; j < PTRS_PER_PMD; pmd++, j++ , paddr += PMD_SIZE) {
242 			unsigned long pe;
243 
244 			if (paddr >= end) {
245 				for (; j < PTRS_PER_PMD; j++, pmd++)
246 					set_pmd(pmd,  __pmd(0));
247 				break;
248 			}
249 			pe = _PAGE_PSE | _KERNPG_TABLE | _PAGE_NX | _PAGE_GLOBAL | paddr;
250 			pe &= __supported_pte_mask;
251 			set_pmd(pmd, __pmd(pe));
252 		}
253 		unmap_low_page(map);
254 	}
255 	__flush_tlb();
256 }
257 
258 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
259    This runs before bootmem is initialized and gets pages directly from the
260    physical memory. To access them they are temporarily mapped. */
init_memory_mapping(void)261 void __init init_memory_mapping(void)
262 {
263 	unsigned long adr;
264 	unsigned long end;
265 	unsigned long next;
266 	unsigned long pgds, pmds, tables;
267 
268 	end = end_pfn_map << PAGE_SHIFT;
269 
270 	/*
271 	 * Find space for the kernel direct mapping tables.
272 	 * Later we should allocate these tables in the local node of the memory
273 	 * mapped.  Unfortunately this is done currently before the nodes are
274 	 * discovered.
275 	 */
276 
277 	pgds = (end + PGDIR_SIZE - 1) >> PGDIR_SHIFT;
278 	pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
279 	tables = round_up(pgds*8, PAGE_SIZE) + round_up(pmds * 8, PAGE_SIZE);
280 
281 	/* Direct mapping must currently fit below the kernel in the first MB.
282 	   This is because we have no way to tell the later passes to not reuse
283 	   the memory, until bootmem is initialised */
284 	/* Should limit MAXMEM for this */
285 	table_start = find_e820_area(/*0*/ 0x8000, __pa_symbol(&_text), tables);
286 	if (table_start == -1UL)
287 		panic("Cannot find space for the kernel page tables");
288 
289 	table_start >>= PAGE_SHIFT;
290 	table_end = table_start;
291 
292 	end += __PAGE_OFFSET; /* turn virtual */
293 
294 	for (adr = PAGE_OFFSET; adr < end; adr = next) {
295 		int map;
296 		unsigned long pgd_phys;
297 		pgd_t *pgd = alloc_low_page(&map, &pgd_phys);
298 		next = adr + PML4_SIZE;
299 		if (next > end)
300 			next = end;
301 
302 		phys_pgd_init(pgd, adr-PAGE_OFFSET, next-PAGE_OFFSET);
303 		set_pml4(init_level4_pgt + pml4_index(adr),
304 			 mk_kernel_pml4(pgd_phys, KERNPG_TABLE));
305 		unmap_low_page(map);
306 	}
307 	asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
308 	__flush_tlb_all();
309 	printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end,
310 	       table_start<<PAGE_SHIFT,
311 	       table_end<<PAGE_SHIFT);
312 }
313 
zap_low_mappings(void)314 void __init zap_low_mappings (void)
315 {
316 	int i;
317 	for (i = 0; i < NR_CPUS; i++) {
318 		if (cpu_pda[i].level4_pgt)
319 			cpu_pda[i].level4_pgt[0] = 0;
320 	}
321 
322 	flush_tlb_all();
323 }
324 
325 #ifndef CONFIG_DISCONTIGMEM
paging_init(void)326 void __init paging_init(void)
327 {
328 	unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
329 	unsigned int max_dma;
330 
331 	max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
332 	if (end_pfn < max_dma)
333 		zones_size[ZONE_DMA] = end_pfn;
334 	else {
335 		zones_size[ZONE_DMA] = max_dma;
336 		zones_size[ZONE_NORMAL] = end_pfn - max_dma;
337 	}
338 	free_area_init(zones_size);
339 }
340 
page_is_ram(unsigned long pagenr)341 static inline int page_is_ram (unsigned long pagenr)
342 {
343 	int i;
344 
345 	for (i = 0; i < e820.nr_map; i++) {
346 		unsigned long addr, end;
347 
348 		if (e820.map[i].type != E820_RAM)	/* not usable memory */
349 			continue;
350 		/*
351 		 *	!!!FIXME!!! Some BIOSen report areas as RAM that
352 		 *	are not. Notably the 640->1Mb area. We need a sanity
353 		 *	check here.
354 		 */
355 		addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
356 		end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
357 		if  ((pagenr >= addr) && (pagenr < end))
358 			return 1;
359 	}
360 	return 0;
361 }
362 #endif
363 
mem_init(void)364 void __init mem_init(void)
365 {
366 	unsigned long codesize, reservedpages, datasize, initsize;
367 	unsigned long tmp;
368 
369 	max_mapnr = end_pfn;
370 	num_physpages = end_pfn; /* XXX not true because of holes */
371 	high_memory = (void *) __va(end_pfn << PAGE_SHIFT);
372 
373 	/* clear the zero-page */
374 	memset(empty_zero_page, 0, PAGE_SIZE);
375 
376 	reservedpages = 0;
377 
378 	/* this will put all low memory onto the freelists */
379 #ifdef CONFIG_DISCONTIGMEM
380 	totalram_pages += numa_free_all_bootmem();
381 	tmp = 0;
382 	/* should count reserved pages here for all nodes */
383 #else
384 	if (!mem_map) BUG();
385 
386 	totalram_pages += free_all_bootmem();
387 
388 	for (tmp = 0; tmp < end_pfn; tmp++)
389 		/*
390 		 * Only count reserved RAM pages
391 		 */
392 		if (page_is_ram(tmp) && PageReserved(mem_map+tmp))
393 			reservedpages++;
394 #endif
395 
396 	after_bootmem = 1;
397 
398 	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
399 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
400 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
401 
402 	printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
403 		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
404 		max_mapnr << (PAGE_SHIFT-10),
405 		codesize >> 10,
406 		reservedpages << (PAGE_SHIFT-10),
407 		datasize >> 10,
408 		initsize >> 10);
409 
410 	/*
411 	 * Subtle. SMP is doing its boot stuff late (because it has to
412 	 * fork idle threads) - but it also needs low mappings for the
413 	 * protected-mode entry to work. We zap these entries only after
414 	 * the WP-bit has been tested.
415 	 */
416 #ifndef CONFIG_SMP
417 	zap_low_mappings();
418 #endif
419 }
420 
421 /* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
422    from the CPU leading to inconsistent cache lines. address and size
423    must be aligned to 2MB boundaries.
424    Does nothing when the mapping doesn't exist. */
clear_kernel_mapping(unsigned long address,unsigned long size)425 void __init clear_kernel_mapping(unsigned long address, unsigned long size)
426 {
427 	unsigned long end = address + size;
428 
429 	BUG_ON(address & ~LARGE_PAGE_MASK);
430 	BUG_ON(size & ~LARGE_PAGE_MASK);
431 
432 	for (; address < end; address += LARGE_PAGE_SIZE) {
433 		pgd_t *pgd = pgd_offset_k(address);
434 		if (!pgd || pgd_none(*pgd))
435 			continue;
436 		pmd_t *pmd = pmd_offset(pgd, address);
437 		if (!pmd || pmd_none(*pmd))
438 			continue;
439 		if (0 == (pmd_val(*pmd) & _PAGE_PSE)) {
440 			/* Could handle this, but it should not happen currently. */
441 			printk(KERN_ERR
442 		"clear_kernel_mapping: mapping has been split. will leak memory\n");
443 			pmd_ERROR(*pmd);
444 		}
445 		set_pmd(pmd, __pmd(0));
446 	}
447 	__flush_tlb_all();
448 }
449 
free_initmem(void)450 void free_initmem(void)
451 {
452 	void *addr;
453 
454 	addr = (&__init_begin);
455 	for (; addr < (void *)(&__init_end); addr += PAGE_SIZE) {
456 		ClearPageReserved(virt_to_page(addr));
457 		set_page_count(virt_to_page(addr), 1);
458 #ifdef CONFIG_INIT_DEBUG
459 		memset((unsigned long)addr & ~(PAGE_SIZE-1), 0xcc, PAGE_SIZE);
460 #endif
461 		free_page((unsigned long)addr);
462 		totalram_pages++;
463 	}
464 	printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10);
465 }
466 
467 #ifdef CONFIG_BLK_DEV_INITRD
free_initrd_mem(unsigned long start,unsigned long end)468 void free_initrd_mem(unsigned long start, unsigned long end)
469 {
470 	if (start < (unsigned long)&_end)
471 		return;
472 	printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
473 	for (; start < end; start += PAGE_SIZE) {
474 		ClearPageReserved(virt_to_page(start));
475 		set_page_count(virt_to_page(start), 1);
476 		free_page(start);
477 		totalram_pages++;
478 	}
479 }
480 #endif
481 
si_meminfo(struct sysinfo * val)482 void si_meminfo(struct sysinfo *val)
483 {
484 	val->totalram = totalram_pages;
485 	val->sharedram = 0;
486 	val->freeram = nr_free_pages();
487 	val->bufferram = atomic_read(&buffermem_pages);
488 	val->totalhigh = 0;
489 	val->freehigh = nr_free_highpages();
490 	val->mem_unit = PAGE_SIZE;
491 	return;
492 }
493 
reserve_bootmem_generic(unsigned long phys,unsigned len)494 void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
495 {
496 	/* Should check here against the e820 map to avoid double free */
497 #ifdef CONFIG_DISCONTIGMEM
498 	reserve_bootmem_node(NODE_DATA(phys_to_nid(phys)), phys, len);
499 #else
500 	reserve_bootmem(phys, len);
501 #endif
502 }
503 
504 
free_bootmem_generic(unsigned long phys,unsigned len)505 void free_bootmem_generic(unsigned long phys, unsigned len)
506 {
507 #ifdef CONFIG_DISCONTIGMEM
508 	free_bootmem_node(NODE_DATA(phys_to_nid(phys)), phys, len);
509 #else
510 	free_bootmem(phys, len);
511 #endif
512 }
513