1 /*
2  * Generic VM initialization for x86-64 NUMA setups.
3  * Copyright 2002 Andi Kleen, SuSE Labs.
4  * $Id: numa.c,v 1.6 2003/04/03 12:28:08 ak Exp $
5  */
6 #include <linux/kernel.h>
7 #include <linux/mm.h>
8 #include <linux/string.h>
9 #include <linux/init.h>
10 #include <linux/bootmem.h>
11 #include <linux/mmzone.h>
12 #include <linux/blk.h>
13 #include <asm/e820.h>
14 #include <asm/proto.h>
15 #include <asm/dma.h>
16 
17 #undef Dprintk
18 #define Dprintk(...)
19 
20 plat_pg_data_t *plat_node_data[MAXNODE];
21 bootmem_data_t plat_node_bdata[MAX_NUMNODES];
22 
23 #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
24 
25 static int numa_off __initdata;
26 
27 unsigned long nodes_present;
28 int maxnode;
29 
30 /* Initialize bootmem allocator for a node */
setup_node_bootmem(int nodeid,unsigned long start,unsigned long end)31 void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
32 {
33 	unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start;
34 	unsigned long nodedata_phys;
35 	const int pgdat_size = round_up(sizeof(plat_pg_data_t), PAGE_SIZE);
36 
37 	start = round_up(start, ZONE_ALIGN);
38 
39 	printk("Bootmem setup node %d %016lx-%016lx\n", nodeid, start, end);
40 
41 	start_pfn = start >> PAGE_SHIFT;
42 	end_pfn = end >> PAGE_SHIFT;
43 
44 	nodedata_phys = find_e820_area(start, end, pgdat_size);
45 	if (nodedata_phys == -1L)
46 		panic("Cannot find memory pgdat in node %d\n", nodeid);
47 
48 	Dprintk("nodedata_phys %lx\n", nodedata_phys);
49 
50 	PLAT_NODE_DATA(nodeid) = phys_to_virt(nodedata_phys);
51 	memset(PLAT_NODE_DATA(nodeid), 0, sizeof(plat_pg_data_t));
52 	NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
53 
54 	/* Find a place for the bootmem map */
55 	bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
56 	bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
57 	bootmap_start = find_e820_area(bootmap_start, end, bootmap_pages<<PAGE_SHIFT);
58 	if (bootmap_start == -1L)
59 		panic("Not enough continuous space for bootmap on node %d", nodeid);
60 	Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages);
61 
62 	bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
63 					 bootmap_start >> PAGE_SHIFT,
64 					 start_pfn, end_pfn);
65 
66 	e820_bootmem_free(NODE_DATA(nodeid), start, end);
67 
68 	reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
69 	reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT);
70 
71 	PLAT_NODE_DATA(nodeid)->start_pfn = start_pfn;
72 	PLAT_NODE_DATA(nodeid)->end_pfn = end_pfn;
73 
74 	if (nodeid > maxnode)
75 		maxnode = nodeid;
76 	nodes_present |= (1UL << nodeid);
77 }
78 
79 /* Initialize final allocator for a zone */
setup_node_zones(int nodeid)80 void __init setup_node_zones(int nodeid)
81 {
82 	unsigned long start_pfn, end_pfn;
83 	unsigned long zones[MAX_NR_ZONES];
84 	unsigned long dma_end_pfn;
85 	unsigned long lmax_mapnr;
86 
87 	memset(zones, 0, sizeof(unsigned long) * MAX_NR_ZONES);
88 
89 	start_pfn = PLAT_NODE_DATA(nodeid)->start_pfn;
90 	end_pfn = PLAT_NODE_DATA(nodeid)->end_pfn;
91 
92 	printk("setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn);
93 
94 	/* All nodes > 0 have a zero length zone DMA */
95 	dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT;
96 	if (start_pfn < dma_end_pfn) {
97 		zones[ZONE_DMA] = dma_end_pfn - start_pfn;
98 		zones[ZONE_NORMAL] = end_pfn - dma_end_pfn;
99 	} else {
100 		zones[ZONE_NORMAL] = end_pfn - start_pfn;
101 	}
102 
103 	free_area_init_node(nodeid, NODE_DATA(nodeid), NULL, zones,
104 			    start_pfn<<PAGE_SHIFT, NULL);
105 	lmax_mapnr = PLAT_NODE_DATA_STARTNR(nodeid) + PLAT_NODE_DATA_SIZE(nodeid);
106 	if (lmax_mapnr > max_mapnr)
107 		max_mapnr = lmax_mapnr;
108 }
109 
110 int fake_node;
111 
numa_initmem_init(unsigned long start_pfn,unsigned long end_pfn)112 int __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
113 {
114 #ifdef CONFIG_K8_NUMA
115 	if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT))
116 		return 0;
117 #endif
118 	printk(KERN_INFO "%s\n",
119 	       numa_off ? "NUMA turned off" : "No NUMA configuration found");
120 
121 	printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
122 	       start_pfn << PAGE_SHIFT,
123 	       end_pfn << PAGE_SHIFT);
124 	/* setup dummy node covering all memory */
125 	fake_node = 1;
126 	memnode_shift = 63;
127 	memnodemap[0] = 0;
128 	setup_node_bootmem(0, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
129 	return -1;
130 }
131 
132 #define for_all_nodes(x) for ((x) = 0; (x) <= maxnode; (x)++) \
133 				if ((1UL << (x)) & nodes_present)
134 
numa_free_all_bootmem(void)135 unsigned long __init numa_free_all_bootmem(void)
136 {
137 	int i;
138 	unsigned long pages = 0;
139 	for_all_nodes(i) {
140 		pages += free_all_bootmem_node(NODE_DATA(i));
141 	}
142 	return pages;
143 }
144 
paging_init(void)145 void __init paging_init(void)
146 {
147 	int i;
148 	for_all_nodes(i) {
149 		setup_node_zones(i);
150 	}
151 }
152 
show_mem(void)153 void show_mem(void)
154 {
155 	long i,free = 0,total = 0,reserved = 0;
156 	long shared = 0, cached = 0;
157 	int nid;
158 
159 	printk("\nMem-info:\n");
160 	show_free_areas();
161 	printk("Free swap:       %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
162 	for_all_nodes (nid) {
163 		mem_map_t * lmem_map = NODE_MEM_MAP(nid);
164 		i = PLAT_NODE_DATA_SIZE(nid);
165 		while (i-- > 0) {
166 			total++;
167 			if (PageReserved(lmem_map+i))
168 				reserved++;
169 			else if (PageSwapCache(lmem_map+i))
170 				cached++;
171 			else if (!page_count(lmem_map+i))
172 				free++;
173 			else
174 				shared += atomic_read(&lmem_map[i].count) - 1;
175 		}
176 	}
177 	printk("%ld pages of RAM\n",total);
178 	printk("%ld free pages\n",free);
179 	printk("%ld reserved pages\n",reserved);
180 	printk("%ld pages shared\n",shared);
181 	printk("%ld pages swap cached\n",cached);
182 	show_buffers();
183 }
184 
185 /* [numa=off] */
numa_setup(char * opt)186 __init int numa_setup(char *opt)
187 {
188 	if (!strncmp(opt,"off",3))
189 		numa_off = 1;
190 	return 1;
191 }
192 
193 __setup("numa=", numa_setup);
194 
195