1 /*
2  * Hypervisor-assisted dump
3  *
4  * Linas Vepstas, Manish Ahuja 2008
5  * Copyright 2008 IBM Corp.
6  *
7  *      This program is free software; you can redistribute it and/or
8  *      modify it under the terms of the GNU General Public License
9  *      as published by the Free Software Foundation; either version
10  *      2 of the License, or (at your option) any later version.
11  *
12  */
13 
14 #include <linux/gfp.h>
15 #include <linux/init.h>
16 #include <linux/kobject.h>
17 #include <linux/mm.h>
18 #include <linux/of.h>
19 #include <linux/pfn.h>
20 #include <linux/swap.h>
21 #include <linux/sysfs.h>
22 
23 #include <asm/page.h>
24 #include <asm/phyp_dump.h>
25 #include <asm/machdep.h>
26 #include <asm/prom.h>
27 #include <asm/rtas.h>
28 
29 /* Variables, used to communicate data between early boot and late boot */
30 static struct phyp_dump phyp_dump_vars;
31 struct phyp_dump *phyp_dump_info = &phyp_dump_vars;
32 
33 static int ibm_configure_kernel_dump;
34 /* ------------------------------------------------- */
35 /* RTAS interfaces to declare the dump regions */
36 
37 struct dump_section {
38 	u32 dump_flags;
39 	u16 source_type;
40 	u16 error_flags;
41 	u64 source_address;
42 	u64 source_length;
43 	u64 length_copied;
44 	u64 destination_address;
45 };
46 
47 struct phyp_dump_header {
48 	u32 version;
49 	u16 num_of_sections;
50 	u16 status;
51 
52 	u32 first_offset_section;
53 	u32 dump_disk_section;
54 	u64 block_num_dd;
55 	u64 num_of_blocks_dd;
56 	u32 offset_dd;
57 	u32 maxtime_to_auto;
58 	/* No dump disk path string used */
59 
60 	struct dump_section cpu_data;
61 	struct dump_section hpte_data;
62 	struct dump_section kernel_data;
63 };
64 
65 /* The dump header *must be* in low memory, so .bss it */
66 static struct phyp_dump_header phdr;
67 
68 #define NUM_DUMP_SECTIONS	3
69 #define DUMP_HEADER_VERSION	0x1
70 #define DUMP_REQUEST_FLAG	0x1
71 #define DUMP_SOURCE_CPU		0x0001
72 #define DUMP_SOURCE_HPTE	0x0002
73 #define DUMP_SOURCE_RMO		0x0011
74 #define DUMP_ERROR_FLAG		0x2000
75 #define DUMP_TRIGGERED		0x4000
76 #define DUMP_PERFORMED		0x8000
77 
78 
79 /**
80  * init_dump_header() - initialize the header declaring a dump
81  * Returns: length of dump save area.
82  *
83  * When the hypervisor saves crashed state, it needs to put
84  * it somewhere. The dump header tells the hypervisor where
85  * the data can be saved.
86  */
init_dump_header(struct phyp_dump_header * ph)87 static unsigned long init_dump_header(struct phyp_dump_header *ph)
88 {
89 	unsigned long addr_offset = 0;
90 
91 	/* Set up the dump header */
92 	ph->version = DUMP_HEADER_VERSION;
93 	ph->num_of_sections = NUM_DUMP_SECTIONS;
94 	ph->status = 0;
95 
96 	ph->first_offset_section =
97 		(u32)offsetof(struct phyp_dump_header, cpu_data);
98 	ph->dump_disk_section = 0;
99 	ph->block_num_dd = 0;
100 	ph->num_of_blocks_dd = 0;
101 	ph->offset_dd = 0;
102 
103 	ph->maxtime_to_auto = 0; /* disabled */
104 
105 	/* The first two sections are mandatory */
106 	ph->cpu_data.dump_flags = DUMP_REQUEST_FLAG;
107 	ph->cpu_data.source_type = DUMP_SOURCE_CPU;
108 	ph->cpu_data.source_address = 0;
109 	ph->cpu_data.source_length = phyp_dump_info->cpu_state_size;
110 	ph->cpu_data.destination_address = addr_offset;
111 	addr_offset += phyp_dump_info->cpu_state_size;
112 
113 	ph->hpte_data.dump_flags = DUMP_REQUEST_FLAG;
114 	ph->hpte_data.source_type = DUMP_SOURCE_HPTE;
115 	ph->hpte_data.source_address = 0;
116 	ph->hpte_data.source_length = phyp_dump_info->hpte_region_size;
117 	ph->hpte_data.destination_address = addr_offset;
118 	addr_offset += phyp_dump_info->hpte_region_size;
119 
120 	/* This section describes the low kernel region */
121 	ph->kernel_data.dump_flags = DUMP_REQUEST_FLAG;
122 	ph->kernel_data.source_type = DUMP_SOURCE_RMO;
123 	ph->kernel_data.source_address = PHYP_DUMP_RMR_START;
124 	ph->kernel_data.source_length = PHYP_DUMP_RMR_END;
125 	ph->kernel_data.destination_address = addr_offset;
126 	addr_offset += ph->kernel_data.source_length;
127 
128 	return addr_offset;
129 }
130 
print_dump_header(const struct phyp_dump_header * ph)131 static void print_dump_header(const struct phyp_dump_header *ph)
132 {
133 #ifdef DEBUG
134 	if (ph == NULL)
135 		return;
136 
137 	printk(KERN_INFO "dump header:\n");
138 	/* setup some ph->sections required */
139 	printk(KERN_INFO "version = %d\n", ph->version);
140 	printk(KERN_INFO "Sections = %d\n", ph->num_of_sections);
141 	printk(KERN_INFO "Status = 0x%x\n", ph->status);
142 
143 	/* No ph->disk, so all should be set to 0 */
144 	printk(KERN_INFO "Offset to first section 0x%x\n",
145 		ph->first_offset_section);
146 	printk(KERN_INFO "dump disk sections should be zero\n");
147 	printk(KERN_INFO "dump disk section = %d\n", ph->dump_disk_section);
148 	printk(KERN_INFO "block num = %lld\n", ph->block_num_dd);
149 	printk(KERN_INFO "number of blocks = %lld\n", ph->num_of_blocks_dd);
150 	printk(KERN_INFO "dump disk offset = %d\n", ph->offset_dd);
151 	printk(KERN_INFO "Max auto time= %d\n", ph->maxtime_to_auto);
152 
153 	/*set cpu state and hpte states as well scratch pad area */
154 	printk(KERN_INFO " CPU AREA\n");
155 	printk(KERN_INFO "cpu dump_flags =%d\n", ph->cpu_data.dump_flags);
156 	printk(KERN_INFO "cpu source_type =%d\n", ph->cpu_data.source_type);
157 	printk(KERN_INFO "cpu error_flags =%d\n", ph->cpu_data.error_flags);
158 	printk(KERN_INFO "cpu source_address =%llx\n",
159 		ph->cpu_data.source_address);
160 	printk(KERN_INFO "cpu source_length =%llx\n",
161 		ph->cpu_data.source_length);
162 	printk(KERN_INFO "cpu length_copied =%llx\n",
163 		ph->cpu_data.length_copied);
164 
165 	printk(KERN_INFO " HPTE AREA\n");
166 	printk(KERN_INFO "HPTE dump_flags =%d\n", ph->hpte_data.dump_flags);
167 	printk(KERN_INFO "HPTE source_type =%d\n", ph->hpte_data.source_type);
168 	printk(KERN_INFO "HPTE error_flags =%d\n", ph->hpte_data.error_flags);
169 	printk(KERN_INFO "HPTE source_address =%llx\n",
170 		ph->hpte_data.source_address);
171 	printk(KERN_INFO "HPTE source_length =%llx\n",
172 		ph->hpte_data.source_length);
173 	printk(KERN_INFO "HPTE length_copied =%llx\n",
174 		ph->hpte_data.length_copied);
175 
176 	printk(KERN_INFO " SRSD AREA\n");
177 	printk(KERN_INFO "SRSD dump_flags =%d\n", ph->kernel_data.dump_flags);
178 	printk(KERN_INFO "SRSD source_type =%d\n", ph->kernel_data.source_type);
179 	printk(KERN_INFO "SRSD error_flags =%d\n", ph->kernel_data.error_flags);
180 	printk(KERN_INFO "SRSD source_address =%llx\n",
181 		ph->kernel_data.source_address);
182 	printk(KERN_INFO "SRSD source_length =%llx\n",
183 		ph->kernel_data.source_length);
184 	printk(KERN_INFO "SRSD length_copied =%llx\n",
185 		ph->kernel_data.length_copied);
186 #endif
187 }
188 
show_phyp_dump_active(struct kobject * kobj,struct kobj_attribute * attr,char * buf)189 static ssize_t show_phyp_dump_active(struct kobject *kobj,
190 			struct kobj_attribute *attr, char *buf)
191 {
192 
193 	/* create filesystem entry so kdump is phyp-dump aware */
194 	return sprintf(buf, "%lx\n", phyp_dump_info->phyp_dump_at_boot);
195 }
196 
197 static struct kobj_attribute pdl = __ATTR(phyp_dump_active, 0600,
198 					show_phyp_dump_active,
199 					NULL);
200 
register_dump_area(struct phyp_dump_header * ph,unsigned long addr)201 static void register_dump_area(struct phyp_dump_header *ph, unsigned long addr)
202 {
203 	int rc;
204 
205 	/* Add addr value if not initialized before */
206 	if (ph->cpu_data.destination_address == 0) {
207 		ph->cpu_data.destination_address += addr;
208 		ph->hpte_data.destination_address += addr;
209 		ph->kernel_data.destination_address += addr;
210 	}
211 
212 	/* ToDo Invalidate kdump and free memory range. */
213 
214 	do {
215 		rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
216 				1, ph, sizeof(struct phyp_dump_header));
217 	} while (rtas_busy_delay(rc));
218 
219 	if (rc) {
220 		printk(KERN_ERR "phyp-dump: unexpected error (%d) on "
221 						"register\n", rc);
222 		print_dump_header(ph);
223 		return;
224 	}
225 
226 	rc = sysfs_create_file(kernel_kobj, &pdl.attr);
227 	if (rc)
228 		printk(KERN_ERR "phyp-dump: unable to create sysfs"
229 				" file (%d)\n", rc);
230 }
231 
232 static
invalidate_last_dump(struct phyp_dump_header * ph,unsigned long addr)233 void invalidate_last_dump(struct phyp_dump_header *ph, unsigned long addr)
234 {
235 	int rc;
236 
237 	/* Add addr value if not initialized before */
238 	if (ph->cpu_data.destination_address == 0) {
239 		ph->cpu_data.destination_address += addr;
240 		ph->hpte_data.destination_address += addr;
241 		ph->kernel_data.destination_address += addr;
242 	}
243 
244 	do {
245 		rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
246 				2, ph, sizeof(struct phyp_dump_header));
247 	} while (rtas_busy_delay(rc));
248 
249 	if (rc) {
250 		printk(KERN_ERR "phyp-dump: unexpected error (%d) "
251 						"on invalidate\n", rc);
252 		print_dump_header(ph);
253 	}
254 }
255 
256 /* ------------------------------------------------- */
257 /**
258  * release_memory_range -- release memory previously memblock_reserved
259  * @start_pfn: starting physical frame number
260  * @nr_pages: number of pages to free.
261  *
262  * This routine will release memory that had been previously
263  * memblock_reserved in early boot. The released memory becomes
264  * available for genreal use.
265  */
release_memory_range(unsigned long start_pfn,unsigned long nr_pages)266 static void release_memory_range(unsigned long start_pfn,
267 			unsigned long nr_pages)
268 {
269 	struct page *rpage;
270 	unsigned long end_pfn;
271 	long i;
272 
273 	end_pfn = start_pfn + nr_pages;
274 
275 	for (i = start_pfn; i <= end_pfn; i++) {
276 		rpage = pfn_to_page(i);
277 		if (PageReserved(rpage)) {
278 			ClearPageReserved(rpage);
279 			init_page_count(rpage);
280 			__free_page(rpage);
281 			totalram_pages++;
282 		}
283 	}
284 }
285 
286 /**
287  * track_freed_range -- Counts the range being freed.
288  * Once the counter goes to zero, it re-registers dump for
289  * future use.
290  */
291 static void
track_freed_range(unsigned long addr,unsigned long length)292 track_freed_range(unsigned long addr, unsigned long length)
293 {
294 	static unsigned long scratch_area_size, reserved_area_size;
295 
296 	if (addr < phyp_dump_info->init_reserve_start)
297 		return;
298 
299 	if ((addr >= phyp_dump_info->init_reserve_start) &&
300 	    (addr <= phyp_dump_info->init_reserve_start +
301 	     phyp_dump_info->init_reserve_size))
302 		reserved_area_size += length;
303 
304 	if ((addr >= phyp_dump_info->reserved_scratch_addr) &&
305 	    (addr <= phyp_dump_info->reserved_scratch_addr +
306 	     phyp_dump_info->reserved_scratch_size))
307 		scratch_area_size += length;
308 
309 	if ((reserved_area_size == phyp_dump_info->init_reserve_size) &&
310 	    (scratch_area_size == phyp_dump_info->reserved_scratch_size)) {
311 
312 		invalidate_last_dump(&phdr,
313 				phyp_dump_info->reserved_scratch_addr);
314 		register_dump_area(&phdr,
315 				phyp_dump_info->reserved_scratch_addr);
316 	}
317 }
318 
319 /* ------------------------------------------------- */
320 /**
321  * sysfs_release_region -- sysfs interface to release memory range.
322  *
323  * Usage:
324  *   "echo <start addr> <length> > /sys/kernel/release_region"
325  *
326  * Example:
327  *   "echo 0x40000000 0x10000000 > /sys/kernel/release_region"
328  *
329  * will release 256MB starting at 1GB.
330  */
store_release_region(struct kobject * kobj,struct kobj_attribute * attr,const char * buf,size_t count)331 static ssize_t store_release_region(struct kobject *kobj,
332 				struct kobj_attribute *attr,
333 				const char *buf, size_t count)
334 {
335 	unsigned long start_addr, length, end_addr;
336 	unsigned long start_pfn, nr_pages;
337 	ssize_t ret;
338 
339 	ret = sscanf(buf, "%lx %lx", &start_addr, &length);
340 	if (ret != 2)
341 		return -EINVAL;
342 
343 	track_freed_range(start_addr, length);
344 
345 	/* Range-check - don't free any reserved memory that
346 	 * wasn't reserved for phyp-dump */
347 	if (start_addr < phyp_dump_info->init_reserve_start)
348 		start_addr = phyp_dump_info->init_reserve_start;
349 
350 	end_addr = phyp_dump_info->init_reserve_start +
351 			phyp_dump_info->init_reserve_size;
352 	if (start_addr+length > end_addr)
353 		length = end_addr - start_addr;
354 
355 	/* Release the region of memory assed in by user */
356 	start_pfn = PFN_DOWN(start_addr);
357 	nr_pages = PFN_DOWN(length);
358 	release_memory_range(start_pfn, nr_pages);
359 
360 	return count;
361 }
362 
show_release_region(struct kobject * kobj,struct kobj_attribute * attr,char * buf)363 static ssize_t show_release_region(struct kobject *kobj,
364 			struct kobj_attribute *attr, char *buf)
365 {
366 	u64 second_addr_range;
367 
368 	/* total reserved size - start of scratch area */
369 	second_addr_range = phyp_dump_info->init_reserve_size -
370 				phyp_dump_info->reserved_scratch_size;
371 	return sprintf(buf, "CPU:0x%llx-0x%llx: HPTE:0x%llx-0x%llx:"
372 			    " DUMP:0x%llx-0x%llx, 0x%lx-0x%llx:\n",
373 		phdr.cpu_data.destination_address,
374 		phdr.cpu_data.length_copied,
375 		phdr.hpte_data.destination_address,
376 		phdr.hpte_data.length_copied,
377 		phdr.kernel_data.destination_address,
378 		phdr.kernel_data.length_copied,
379 		phyp_dump_info->init_reserve_start,
380 		second_addr_range);
381 }
382 
383 static struct kobj_attribute rr = __ATTR(release_region, 0600,
384 					show_release_region,
385 					store_release_region);
386 
phyp_dump_setup(void)387 static int __init phyp_dump_setup(void)
388 {
389 	struct device_node *rtas;
390 	const struct phyp_dump_header *dump_header = NULL;
391 	unsigned long dump_area_start;
392 	unsigned long dump_area_length;
393 	int header_len = 0;
394 	int rc;
395 
396 	/* If no memory was reserved in early boot, there is nothing to do */
397 	if (phyp_dump_info->init_reserve_size == 0)
398 		return 0;
399 
400 	/* Return if phyp dump not supported */
401 	if (!phyp_dump_info->phyp_dump_configured)
402 		return -ENOSYS;
403 
404 	/* Is there dump data waiting for us? If there isn't,
405 	 * then register a new dump area, and release all of
406 	 * the rest of the reserved ram.
407 	 *
408 	 * The /rtas/ibm,kernel-dump rtas node is present only
409 	 * if there is dump data waiting for us.
410 	 */
411 	rtas = of_find_node_by_path("/rtas");
412 	if (rtas) {
413 		dump_header = of_get_property(rtas, "ibm,kernel-dump",
414 						&header_len);
415 		of_node_put(rtas);
416 	}
417 
418 	ibm_configure_kernel_dump = rtas_token("ibm,configure-kernel-dump");
419 
420 	print_dump_header(dump_header);
421 	dump_area_length = init_dump_header(&phdr);
422 	/* align down */
423 	dump_area_start = phyp_dump_info->init_reserve_start & PAGE_MASK;
424 
425 	if (dump_header == NULL) {
426 		register_dump_area(&phdr, dump_area_start);
427 		return 0;
428 	}
429 
430 	/* re-register the dump area, if old dump was invalid */
431 	if ((dump_header) && (dump_header->status & DUMP_ERROR_FLAG)) {
432 		invalidate_last_dump(&phdr, dump_area_start);
433 		register_dump_area(&phdr, dump_area_start);
434 		return 0;
435 	}
436 
437 	if (dump_header) {
438 		phyp_dump_info->reserved_scratch_addr =
439 				dump_header->cpu_data.destination_address;
440 		phyp_dump_info->reserved_scratch_size =
441 				dump_header->cpu_data.source_length +
442 				dump_header->hpte_data.source_length +
443 				dump_header->kernel_data.source_length;
444 	}
445 
446 	/* Should we create a dump_subsys, analogous to s390/ipl.c ? */
447 	rc = sysfs_create_file(kernel_kobj, &rr.attr);
448 	if (rc)
449 		printk(KERN_ERR "phyp-dump: unable to create sysfs file (%d)\n",
450 									rc);
451 
452 	/* ToDo: re-register the dump area, for next time. */
453 	return 0;
454 }
455 machine_subsys_initcall(pseries, phyp_dump_setup);
456 
early_init_dt_scan_phyp_dump(unsigned long node,const char * uname,int depth,void * data)457 int __init early_init_dt_scan_phyp_dump(unsigned long node,
458 		const char *uname, int depth, void *data)
459 {
460 	const unsigned int *sizes;
461 
462 	phyp_dump_info->phyp_dump_configured = 0;
463 	phyp_dump_info->phyp_dump_is_active = 0;
464 
465 	if (depth != 1 || strcmp(uname, "rtas") != 0)
466 		return 0;
467 
468 	if (of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL))
469 		phyp_dump_info->phyp_dump_configured++;
470 
471 	if (of_get_flat_dt_prop(node, "ibm,dump-kernel", NULL))
472 		phyp_dump_info->phyp_dump_is_active++;
473 
474 	sizes = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
475 				    NULL);
476 	if (!sizes)
477 		return 0;
478 
479 	if (sizes[0] == 1)
480 		phyp_dump_info->cpu_state_size = *((unsigned long *)&sizes[1]);
481 
482 	if (sizes[3] == 2)
483 		phyp_dump_info->hpte_region_size =
484 						*((unsigned long *)&sizes[4]);
485 	return 1;
486 }
487 
488 /* Look for phyp_dump= cmdline option */
early_phyp_dump_enabled(char * p)489 static int __init early_phyp_dump_enabled(char *p)
490 {
491 	phyp_dump_info->phyp_dump_at_boot = 1;
492 
493         if (!p)
494                 return 0;
495 
496         if (strncmp(p, "1", 1) == 0)
497 		phyp_dump_info->phyp_dump_at_boot = 1;
498         else if (strncmp(p, "0", 1) == 0)
499 		phyp_dump_info->phyp_dump_at_boot = 0;
500 
501         return 0;
502 }
503 early_param("phyp_dump", early_phyp_dump_enabled);
504 
505 /* Look for phyp_dump_reserve_size= cmdline option */
early_phyp_dump_reserve_size(char * p)506 static int __init early_phyp_dump_reserve_size(char *p)
507 {
508         if (p)
509 		phyp_dump_info->reserve_bootvar = memparse(p, &p);
510 
511         return 0;
512 }
513 early_param("phyp_dump_reserve_size", early_phyp_dump_reserve_size);
514