1 /*
2  * Copyright (C) 2009 Matt Fleming <matt@console-pimps.org>
3  *
4  * This file is subject to the terms and conditions of the GNU General Public
5  * License.  See the file "COPYING" in the main directory of this archive
6  * for more details.
7  *
8  * This is an implementation of a DWARF unwinder. Its main purpose is
9  * for generating stacktrace information. Based on the DWARF 3
10  * specification from http://www.dwarfstd.org.
11  *
12  * TODO:
13  *	- DWARF64 doesn't work.
14  *	- Registers with DWARF_VAL_OFFSET rules aren't handled properly.
15  */
16 
17 /* #define DEBUG */
18 #include <linux/kernel.h>
19 #include <linux/io.h>
20 #include <linux/list.h>
21 #include <linux/mempool.h>
22 #include <linux/mm.h>
23 #include <linux/elf.h>
24 #include <linux/ftrace.h>
25 #include <linux/module.h>
26 #include <linux/slab.h>
27 #include <asm/dwarf.h>
28 #include <asm/unwinder.h>
29 #include <asm/sections.h>
30 #include <asm/unaligned.h>
31 #include <asm/stacktrace.h>
32 
33 /* Reserve enough memory for two stack frames */
34 #define DWARF_FRAME_MIN_REQ	2
35 /* ... with 4 registers per frame. */
36 #define DWARF_REG_MIN_REQ	(DWARF_FRAME_MIN_REQ * 4)
37 
38 static struct kmem_cache *dwarf_frame_cachep;
39 static mempool_t *dwarf_frame_pool;
40 
41 static struct kmem_cache *dwarf_reg_cachep;
42 static mempool_t *dwarf_reg_pool;
43 
44 static struct rb_root cie_root;
45 static DEFINE_SPINLOCK(dwarf_cie_lock);
46 
47 static struct rb_root fde_root;
48 static DEFINE_SPINLOCK(dwarf_fde_lock);
49 
50 static struct dwarf_cie *cached_cie;
51 
52 static unsigned int dwarf_unwinder_ready;
53 
54 /**
55  *	dwarf_frame_alloc_reg - allocate memory for a DWARF register
56  *	@frame: the DWARF frame whose list of registers we insert on
57  *	@reg_num: the register number
58  *
59  *	Allocate space for, and initialise, a dwarf reg from
60  *	dwarf_reg_pool and insert it onto the (unsorted) linked-list of
61  *	dwarf registers for @frame.
62  *
63  *	Return the initialised DWARF reg.
64  */
dwarf_frame_alloc_reg(struct dwarf_frame * frame,unsigned int reg_num)65 static struct dwarf_reg *dwarf_frame_alloc_reg(struct dwarf_frame *frame,
66 					       unsigned int reg_num)
67 {
68 	struct dwarf_reg *reg;
69 
70 	reg = mempool_alloc(dwarf_reg_pool, GFP_ATOMIC);
71 	if (!reg) {
72 		printk(KERN_WARNING "Unable to allocate a DWARF register\n");
73 		/*
74 		 * Let's just bomb hard here, we have no way to
75 		 * gracefully recover.
76 		 */
77 		UNWINDER_BUG();
78 	}
79 
80 	reg->number = reg_num;
81 	reg->addr = 0;
82 	reg->flags = 0;
83 
84 	list_add(&reg->link, &frame->reg_list);
85 
86 	return reg;
87 }
88 
dwarf_frame_free_regs(struct dwarf_frame * frame)89 static void dwarf_frame_free_regs(struct dwarf_frame *frame)
90 {
91 	struct dwarf_reg *reg, *n;
92 
93 	list_for_each_entry_safe(reg, n, &frame->reg_list, link) {
94 		list_del(&reg->link);
95 		mempool_free(reg, dwarf_reg_pool);
96 	}
97 }
98 
99 /**
100  *	dwarf_frame_reg - return a DWARF register
101  *	@frame: the DWARF frame to search in for @reg_num
102  *	@reg_num: the register number to search for
103  *
104  *	Lookup and return the dwarf reg @reg_num for this frame. Return
105  *	NULL if @reg_num is an register invalid number.
106  */
dwarf_frame_reg(struct dwarf_frame * frame,unsigned int reg_num)107 static struct dwarf_reg *dwarf_frame_reg(struct dwarf_frame *frame,
108 					 unsigned int reg_num)
109 {
110 	struct dwarf_reg *reg;
111 
112 	list_for_each_entry(reg, &frame->reg_list, link) {
113 		if (reg->number == reg_num)
114 			return reg;
115 	}
116 
117 	return NULL;
118 }
119 
120 /**
121  *	dwarf_read_addr - read dwarf data
122  *	@src: source address of data
123  *	@dst: destination address to store the data to
124  *
125  *	Read 'n' bytes from @src, where 'n' is the size of an address on
126  *	the native machine. We return the number of bytes read, which
127  *	should always be 'n'. We also have to be careful when reading
128  *	from @src and writing to @dst, because they can be arbitrarily
129  *	aligned. Return 'n' - the number of bytes read.
130  */
dwarf_read_addr(unsigned long * src,unsigned long * dst)131 static inline int dwarf_read_addr(unsigned long *src, unsigned long *dst)
132 {
133 	u32 val = get_unaligned(src);
134 	put_unaligned(val, dst);
135 	return sizeof(unsigned long *);
136 }
137 
138 /**
139  *	dwarf_read_uleb128 - read unsigned LEB128 data
140  *	@addr: the address where the ULEB128 data is stored
141  *	@ret: address to store the result
142  *
143  *	Decode an unsigned LEB128 encoded datum. The algorithm is taken
144  *	from Appendix C of the DWARF 3 spec. For information on the
145  *	encodings refer to section "7.6 - Variable Length Data". Return
146  *	the number of bytes read.
147  */
dwarf_read_uleb128(char * addr,unsigned int * ret)148 static inline unsigned long dwarf_read_uleb128(char *addr, unsigned int *ret)
149 {
150 	unsigned int result;
151 	unsigned char byte;
152 	int shift, count;
153 
154 	result = 0;
155 	shift = 0;
156 	count = 0;
157 
158 	while (1) {
159 		byte = __raw_readb(addr);
160 		addr++;
161 		count++;
162 
163 		result |= (byte & 0x7f) << shift;
164 		shift += 7;
165 
166 		if (!(byte & 0x80))
167 			break;
168 	}
169 
170 	*ret = result;
171 
172 	return count;
173 }
174 
175 /**
176  *	dwarf_read_leb128 - read signed LEB128 data
177  *	@addr: the address of the LEB128 encoded data
178  *	@ret: address to store the result
179  *
180  *	Decode signed LEB128 data. The algorithm is taken from Appendix
181  *	C of the DWARF 3 spec. Return the number of bytes read.
182  */
dwarf_read_leb128(char * addr,int * ret)183 static inline unsigned long dwarf_read_leb128(char *addr, int *ret)
184 {
185 	unsigned char byte;
186 	int result, shift;
187 	int num_bits;
188 	int count;
189 
190 	result = 0;
191 	shift = 0;
192 	count = 0;
193 
194 	while (1) {
195 		byte = __raw_readb(addr);
196 		addr++;
197 		result |= (byte & 0x7f) << shift;
198 		shift += 7;
199 		count++;
200 
201 		if (!(byte & 0x80))
202 			break;
203 	}
204 
205 	/* The number of bits in a signed integer. */
206 	num_bits = 8 * sizeof(result);
207 
208 	if ((shift < num_bits) && (byte & 0x40))
209 		result |= (-1 << shift);
210 
211 	*ret = result;
212 
213 	return count;
214 }
215 
216 /**
217  *	dwarf_read_encoded_value - return the decoded value at @addr
218  *	@addr: the address of the encoded value
219  *	@val: where to write the decoded value
220  *	@encoding: the encoding with which we can decode @addr
221  *
222  *	GCC emits encoded address in the .eh_frame FDE entries. Decode
223  *	the value at @addr using @encoding. The decoded value is written
224  *	to @val and the number of bytes read is returned.
225  */
dwarf_read_encoded_value(char * addr,unsigned long * val,char encoding)226 static int dwarf_read_encoded_value(char *addr, unsigned long *val,
227 				    char encoding)
228 {
229 	unsigned long decoded_addr = 0;
230 	int count = 0;
231 
232 	switch (encoding & 0x70) {
233 	case DW_EH_PE_absptr:
234 		break;
235 	case DW_EH_PE_pcrel:
236 		decoded_addr = (unsigned long)addr;
237 		break;
238 	default:
239 		pr_debug("encoding=0x%x\n", (encoding & 0x70));
240 		UNWINDER_BUG();
241 	}
242 
243 	if ((encoding & 0x07) == 0x00)
244 		encoding |= DW_EH_PE_udata4;
245 
246 	switch (encoding & 0x0f) {
247 	case DW_EH_PE_sdata4:
248 	case DW_EH_PE_udata4:
249 		count += 4;
250 		decoded_addr += get_unaligned((u32 *)addr);
251 		__raw_writel(decoded_addr, val);
252 		break;
253 	default:
254 		pr_debug("encoding=0x%x\n", encoding);
255 		UNWINDER_BUG();
256 	}
257 
258 	return count;
259 }
260 
261 /**
262  *	dwarf_entry_len - return the length of an FDE or CIE
263  *	@addr: the address of the entry
264  *	@len: the length of the entry
265  *
266  *	Read the initial_length field of the entry and store the size of
267  *	the entry in @len. We return the number of bytes read. Return a
268  *	count of 0 on error.
269  */
dwarf_entry_len(char * addr,unsigned long * len)270 static inline int dwarf_entry_len(char *addr, unsigned long *len)
271 {
272 	u32 initial_len;
273 	int count;
274 
275 	initial_len = get_unaligned((u32 *)addr);
276 	count = 4;
277 
278 	/*
279 	 * An initial length field value in the range DW_LEN_EXT_LO -
280 	 * DW_LEN_EXT_HI indicates an extension, and should not be
281 	 * interpreted as a length. The only extension that we currently
282 	 * understand is the use of DWARF64 addresses.
283 	 */
284 	if (initial_len >= DW_EXT_LO && initial_len <= DW_EXT_HI) {
285 		/*
286 		 * The 64-bit length field immediately follows the
287 		 * compulsory 32-bit length field.
288 		 */
289 		if (initial_len == DW_EXT_DWARF64) {
290 			*len = get_unaligned((u64 *)addr + 4);
291 			count = 12;
292 		} else {
293 			printk(KERN_WARNING "Unknown DWARF extension\n");
294 			count = 0;
295 		}
296 	} else
297 		*len = initial_len;
298 
299 	return count;
300 }
301 
302 /**
303  *	dwarf_lookup_cie - locate the cie
304  *	@cie_ptr: pointer to help with lookup
305  */
dwarf_lookup_cie(unsigned long cie_ptr)306 static struct dwarf_cie *dwarf_lookup_cie(unsigned long cie_ptr)
307 {
308 	struct rb_node **rb_node = &cie_root.rb_node;
309 	struct dwarf_cie *cie = NULL;
310 	unsigned long flags;
311 
312 	spin_lock_irqsave(&dwarf_cie_lock, flags);
313 
314 	/*
315 	 * We've cached the last CIE we looked up because chances are
316 	 * that the FDE wants this CIE.
317 	 */
318 	if (cached_cie && cached_cie->cie_pointer == cie_ptr) {
319 		cie = cached_cie;
320 		goto out;
321 	}
322 
323 	while (*rb_node) {
324 		struct dwarf_cie *cie_tmp;
325 
326 		cie_tmp = rb_entry(*rb_node, struct dwarf_cie, node);
327 		BUG_ON(!cie_tmp);
328 
329 		if (cie_ptr == cie_tmp->cie_pointer) {
330 			cie = cie_tmp;
331 			cached_cie = cie_tmp;
332 			goto out;
333 		} else {
334 			if (cie_ptr < cie_tmp->cie_pointer)
335 				rb_node = &(*rb_node)->rb_left;
336 			else
337 				rb_node = &(*rb_node)->rb_right;
338 		}
339 	}
340 
341 out:
342 	spin_unlock_irqrestore(&dwarf_cie_lock, flags);
343 	return cie;
344 }
345 
346 /**
347  *	dwarf_lookup_fde - locate the FDE that covers pc
348  *	@pc: the program counter
349  */
dwarf_lookup_fde(unsigned long pc)350 struct dwarf_fde *dwarf_lookup_fde(unsigned long pc)
351 {
352 	struct rb_node **rb_node = &fde_root.rb_node;
353 	struct dwarf_fde *fde = NULL;
354 	unsigned long flags;
355 
356 	spin_lock_irqsave(&dwarf_fde_lock, flags);
357 
358 	while (*rb_node) {
359 		struct dwarf_fde *fde_tmp;
360 		unsigned long tmp_start, tmp_end;
361 
362 		fde_tmp = rb_entry(*rb_node, struct dwarf_fde, node);
363 		BUG_ON(!fde_tmp);
364 
365 		tmp_start = fde_tmp->initial_location;
366 		tmp_end = fde_tmp->initial_location + fde_tmp->address_range;
367 
368 		if (pc < tmp_start) {
369 			rb_node = &(*rb_node)->rb_left;
370 		} else {
371 			if (pc < tmp_end) {
372 				fde = fde_tmp;
373 				goto out;
374 			} else
375 				rb_node = &(*rb_node)->rb_right;
376 		}
377 	}
378 
379 out:
380 	spin_unlock_irqrestore(&dwarf_fde_lock, flags);
381 
382 	return fde;
383 }
384 
385 /**
386  *	dwarf_cfa_execute_insns - execute instructions to calculate a CFA
387  *	@insn_start: address of the first instruction
388  *	@insn_end: address of the last instruction
389  *	@cie: the CIE for this function
390  *	@fde: the FDE for this function
391  *	@frame: the instructions calculate the CFA for this frame
392  *	@pc: the program counter of the address we're interested in
393  *
394  *	Execute the Call Frame instruction sequence starting at
395  *	@insn_start and ending at @insn_end. The instructions describe
396  *	how to calculate the Canonical Frame Address of a stackframe.
397  *	Store the results in @frame.
398  */
dwarf_cfa_execute_insns(unsigned char * insn_start,unsigned char * insn_end,struct dwarf_cie * cie,struct dwarf_fde * fde,struct dwarf_frame * frame,unsigned long pc)399 static int dwarf_cfa_execute_insns(unsigned char *insn_start,
400 				   unsigned char *insn_end,
401 				   struct dwarf_cie *cie,
402 				   struct dwarf_fde *fde,
403 				   struct dwarf_frame *frame,
404 				   unsigned long pc)
405 {
406 	unsigned char insn;
407 	unsigned char *current_insn;
408 	unsigned int count, delta, reg, expr_len, offset;
409 	struct dwarf_reg *regp;
410 
411 	current_insn = insn_start;
412 
413 	while (current_insn < insn_end && frame->pc <= pc) {
414 		insn = __raw_readb(current_insn++);
415 
416 		/*
417 		 * Firstly, handle the opcodes that embed their operands
418 		 * in the instructions.
419 		 */
420 		switch (DW_CFA_opcode(insn)) {
421 		case DW_CFA_advance_loc:
422 			delta = DW_CFA_operand(insn);
423 			delta *= cie->code_alignment_factor;
424 			frame->pc += delta;
425 			continue;
426 			/* NOTREACHED */
427 		case DW_CFA_offset:
428 			reg = DW_CFA_operand(insn);
429 			count = dwarf_read_uleb128(current_insn, &offset);
430 			current_insn += count;
431 			offset *= cie->data_alignment_factor;
432 			regp = dwarf_frame_alloc_reg(frame, reg);
433 			regp->addr = offset;
434 			regp->flags |= DWARF_REG_OFFSET;
435 			continue;
436 			/* NOTREACHED */
437 		case DW_CFA_restore:
438 			reg = DW_CFA_operand(insn);
439 			continue;
440 			/* NOTREACHED */
441 		}
442 
443 		/*
444 		 * Secondly, handle the opcodes that don't embed their
445 		 * operands in the instruction.
446 		 */
447 		switch (insn) {
448 		case DW_CFA_nop:
449 			continue;
450 		case DW_CFA_advance_loc1:
451 			delta = *current_insn++;
452 			frame->pc += delta * cie->code_alignment_factor;
453 			break;
454 		case DW_CFA_advance_loc2:
455 			delta = get_unaligned((u16 *)current_insn);
456 			current_insn += 2;
457 			frame->pc += delta * cie->code_alignment_factor;
458 			break;
459 		case DW_CFA_advance_loc4:
460 			delta = get_unaligned((u32 *)current_insn);
461 			current_insn += 4;
462 			frame->pc += delta * cie->code_alignment_factor;
463 			break;
464 		case DW_CFA_offset_extended:
465 			count = dwarf_read_uleb128(current_insn, &reg);
466 			current_insn += count;
467 			count = dwarf_read_uleb128(current_insn, &offset);
468 			current_insn += count;
469 			offset *= cie->data_alignment_factor;
470 			break;
471 		case DW_CFA_restore_extended:
472 			count = dwarf_read_uleb128(current_insn, &reg);
473 			current_insn += count;
474 			break;
475 		case DW_CFA_undefined:
476 			count = dwarf_read_uleb128(current_insn, &reg);
477 			current_insn += count;
478 			regp = dwarf_frame_alloc_reg(frame, reg);
479 			regp->flags |= DWARF_UNDEFINED;
480 			break;
481 		case DW_CFA_def_cfa:
482 			count = dwarf_read_uleb128(current_insn,
483 						   &frame->cfa_register);
484 			current_insn += count;
485 			count = dwarf_read_uleb128(current_insn,
486 						   &frame->cfa_offset);
487 			current_insn += count;
488 
489 			frame->flags |= DWARF_FRAME_CFA_REG_OFFSET;
490 			break;
491 		case DW_CFA_def_cfa_register:
492 			count = dwarf_read_uleb128(current_insn,
493 						   &frame->cfa_register);
494 			current_insn += count;
495 			frame->flags |= DWARF_FRAME_CFA_REG_OFFSET;
496 			break;
497 		case DW_CFA_def_cfa_offset:
498 			count = dwarf_read_uleb128(current_insn, &offset);
499 			current_insn += count;
500 			frame->cfa_offset = offset;
501 			break;
502 		case DW_CFA_def_cfa_expression:
503 			count = dwarf_read_uleb128(current_insn, &expr_len);
504 			current_insn += count;
505 
506 			frame->cfa_expr = current_insn;
507 			frame->cfa_expr_len = expr_len;
508 			current_insn += expr_len;
509 
510 			frame->flags |= DWARF_FRAME_CFA_REG_EXP;
511 			break;
512 		case DW_CFA_offset_extended_sf:
513 			count = dwarf_read_uleb128(current_insn, &reg);
514 			current_insn += count;
515 			count = dwarf_read_leb128(current_insn, &offset);
516 			current_insn += count;
517 			offset *= cie->data_alignment_factor;
518 			regp = dwarf_frame_alloc_reg(frame, reg);
519 			regp->flags |= DWARF_REG_OFFSET;
520 			regp->addr = offset;
521 			break;
522 		case DW_CFA_val_offset:
523 			count = dwarf_read_uleb128(current_insn, &reg);
524 			current_insn += count;
525 			count = dwarf_read_leb128(current_insn, &offset);
526 			offset *= cie->data_alignment_factor;
527 			regp = dwarf_frame_alloc_reg(frame, reg);
528 			regp->flags |= DWARF_VAL_OFFSET;
529 			regp->addr = offset;
530 			break;
531 		case DW_CFA_GNU_args_size:
532 			count = dwarf_read_uleb128(current_insn, &offset);
533 			current_insn += count;
534 			break;
535 		case DW_CFA_GNU_negative_offset_extended:
536 			count = dwarf_read_uleb128(current_insn, &reg);
537 			current_insn += count;
538 			count = dwarf_read_uleb128(current_insn, &offset);
539 			offset *= cie->data_alignment_factor;
540 
541 			regp = dwarf_frame_alloc_reg(frame, reg);
542 			regp->flags |= DWARF_REG_OFFSET;
543 			regp->addr = -offset;
544 			break;
545 		default:
546 			pr_debug("unhandled DWARF instruction 0x%x\n", insn);
547 			UNWINDER_BUG();
548 			break;
549 		}
550 	}
551 
552 	return 0;
553 }
554 
555 /**
556  *	dwarf_free_frame - free the memory allocated for @frame
557  *	@frame: the frame to free
558  */
dwarf_free_frame(struct dwarf_frame * frame)559 void dwarf_free_frame(struct dwarf_frame *frame)
560 {
561 	dwarf_frame_free_regs(frame);
562 	mempool_free(frame, dwarf_frame_pool);
563 }
564 
565 extern void ret_from_irq(void);
566 
567 /**
568  *	dwarf_unwind_stack - unwind the stack
569  *
570  *	@pc: address of the function to unwind
571  *	@prev: struct dwarf_frame of the previous stackframe on the callstack
572  *
573  *	Return a struct dwarf_frame representing the most recent frame
574  *	on the callstack. Each of the lower (older) stack frames are
575  *	linked via the "prev" member.
576  */
dwarf_unwind_stack(unsigned long pc,struct dwarf_frame * prev)577 struct dwarf_frame *dwarf_unwind_stack(unsigned long pc,
578 				       struct dwarf_frame *prev)
579 {
580 	struct dwarf_frame *frame;
581 	struct dwarf_cie *cie;
582 	struct dwarf_fde *fde;
583 	struct dwarf_reg *reg;
584 	unsigned long addr;
585 
586 	/*
587 	 * If we've been called in to before initialization has
588 	 * completed, bail out immediately.
589 	 */
590 	if (!dwarf_unwinder_ready)
591 		return NULL;
592 
593 	/*
594 	 * If we're starting at the top of the stack we need get the
595 	 * contents of a physical register to get the CFA in order to
596 	 * begin the virtual unwinding of the stack.
597 	 *
598 	 * NOTE: the return address is guaranteed to be setup by the
599 	 * time this function makes its first function call.
600 	 */
601 	if (!pc || !prev)
602 		pc = (unsigned long)current_text_addr();
603 
604 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
605 	/*
606 	 * If our stack has been patched by the function graph tracer
607 	 * then we might see the address of return_to_handler() where we
608 	 * expected to find the real return address.
609 	 */
610 	if (pc == (unsigned long)&return_to_handler) {
611 		int index = current->curr_ret_stack;
612 
613 		/*
614 		 * We currently have no way of tracking how many
615 		 * return_to_handler()'s we've seen. If there is more
616 		 * than one patched return address on our stack,
617 		 * complain loudly.
618 		 */
619 		WARN_ON(index > 0);
620 
621 		pc = current->ret_stack[index].ret;
622 	}
623 #endif
624 
625 	frame = mempool_alloc(dwarf_frame_pool, GFP_ATOMIC);
626 	if (!frame) {
627 		printk(KERN_ERR "Unable to allocate a dwarf frame\n");
628 		UNWINDER_BUG();
629 	}
630 
631 	INIT_LIST_HEAD(&frame->reg_list);
632 	frame->flags = 0;
633 	frame->prev = prev;
634 	frame->return_addr = 0;
635 
636 	fde = dwarf_lookup_fde(pc);
637 	if (!fde) {
638 		/*
639 		 * This is our normal exit path. There are two reasons
640 		 * why we might exit here,
641 		 *
642 		 *	a) pc has no asscociated DWARF frame info and so
643 		 *	we don't know how to unwind this frame. This is
644 		 *	usually the case when we're trying to unwind a
645 		 *	frame that was called from some assembly code
646 		 *	that has no DWARF info, e.g. syscalls.
647 		 *
648 		 *	b) the DEBUG info for pc is bogus. There's
649 		 *	really no way to distinguish this case from the
650 		 *	case above, which sucks because we could print a
651 		 *	warning here.
652 		 */
653 		goto bail;
654 	}
655 
656 	cie = dwarf_lookup_cie(fde->cie_pointer);
657 
658 	frame->pc = fde->initial_location;
659 
660 	/* CIE initial instructions */
661 	dwarf_cfa_execute_insns(cie->initial_instructions,
662 				cie->instructions_end, cie, fde,
663 				frame, pc);
664 
665 	/* FDE instructions */
666 	dwarf_cfa_execute_insns(fde->instructions, fde->end, cie,
667 				fde, frame, pc);
668 
669 	/* Calculate the CFA */
670 	switch (frame->flags) {
671 	case DWARF_FRAME_CFA_REG_OFFSET:
672 		if (prev) {
673 			reg = dwarf_frame_reg(prev, frame->cfa_register);
674 			UNWINDER_BUG_ON(!reg);
675 			UNWINDER_BUG_ON(reg->flags != DWARF_REG_OFFSET);
676 
677 			addr = prev->cfa + reg->addr;
678 			frame->cfa = __raw_readl(addr);
679 
680 		} else {
681 			/*
682 			 * Again, we're starting from the top of the
683 			 * stack. We need to physically read
684 			 * the contents of a register in order to get
685 			 * the Canonical Frame Address for this
686 			 * function.
687 			 */
688 			frame->cfa = dwarf_read_arch_reg(frame->cfa_register);
689 		}
690 
691 		frame->cfa += frame->cfa_offset;
692 		break;
693 	default:
694 		UNWINDER_BUG();
695 	}
696 
697 	reg = dwarf_frame_reg(frame, DWARF_ARCH_RA_REG);
698 
699 	/*
700 	 * If we haven't seen the return address register or the return
701 	 * address column is undefined then we must assume that this is
702 	 * the end of the callstack.
703 	 */
704 	if (!reg || reg->flags == DWARF_UNDEFINED)
705 		goto bail;
706 
707 	UNWINDER_BUG_ON(reg->flags != DWARF_REG_OFFSET);
708 
709 	addr = frame->cfa + reg->addr;
710 	frame->return_addr = __raw_readl(addr);
711 
712 	/*
713 	 * Ah, the joys of unwinding through interrupts.
714 	 *
715 	 * Interrupts are tricky - the DWARF info needs to be _really_
716 	 * accurate and unfortunately I'm seeing a lot of bogus DWARF
717 	 * info. For example, I've seen interrupts occur in epilogues
718 	 * just after the frame pointer (r14) had been restored. The
719 	 * problem was that the DWARF info claimed that the CFA could be
720 	 * reached by using the value of the frame pointer before it was
721 	 * restored.
722 	 *
723 	 * So until the compiler can be trusted to produce reliable
724 	 * DWARF info when it really matters, let's stop unwinding once
725 	 * we've calculated the function that was interrupted.
726 	 */
727 	if (prev && prev->pc == (unsigned long)ret_from_irq)
728 		frame->return_addr = 0;
729 
730 	return frame;
731 
732 bail:
733 	dwarf_free_frame(frame);
734 	return NULL;
735 }
736 
dwarf_parse_cie(void * entry,void * p,unsigned long len,unsigned char * end,struct module * mod)737 static int dwarf_parse_cie(void *entry, void *p, unsigned long len,
738 			   unsigned char *end, struct module *mod)
739 {
740 	struct rb_node **rb_node = &cie_root.rb_node;
741 	struct rb_node *parent = *rb_node;
742 	struct dwarf_cie *cie;
743 	unsigned long flags;
744 	int count;
745 
746 	cie = kzalloc(sizeof(*cie), GFP_KERNEL);
747 	if (!cie)
748 		return -ENOMEM;
749 
750 	cie->length = len;
751 
752 	/*
753 	 * Record the offset into the .eh_frame section
754 	 * for this CIE. It allows this CIE to be
755 	 * quickly and easily looked up from the
756 	 * corresponding FDE.
757 	 */
758 	cie->cie_pointer = (unsigned long)entry;
759 
760 	cie->version = *(char *)p++;
761 	UNWINDER_BUG_ON(cie->version != 1);
762 
763 	cie->augmentation = p;
764 	p += strlen(cie->augmentation) + 1;
765 
766 	count = dwarf_read_uleb128(p, &cie->code_alignment_factor);
767 	p += count;
768 
769 	count = dwarf_read_leb128(p, &cie->data_alignment_factor);
770 	p += count;
771 
772 	/*
773 	 * Which column in the rule table contains the
774 	 * return address?
775 	 */
776 	if (cie->version == 1) {
777 		cie->return_address_reg = __raw_readb(p);
778 		p++;
779 	} else {
780 		count = dwarf_read_uleb128(p, &cie->return_address_reg);
781 		p += count;
782 	}
783 
784 	if (cie->augmentation[0] == 'z') {
785 		unsigned int length, count;
786 		cie->flags |= DWARF_CIE_Z_AUGMENTATION;
787 
788 		count = dwarf_read_uleb128(p, &length);
789 		p += count;
790 
791 		UNWINDER_BUG_ON((unsigned char *)p > end);
792 
793 		cie->initial_instructions = p + length;
794 		cie->augmentation++;
795 	}
796 
797 	while (*cie->augmentation) {
798 		/*
799 		 * "L" indicates a byte showing how the
800 		 * LSDA pointer is encoded. Skip it.
801 		 */
802 		if (*cie->augmentation == 'L') {
803 			p++;
804 			cie->augmentation++;
805 		} else if (*cie->augmentation == 'R') {
806 			/*
807 			 * "R" indicates a byte showing
808 			 * how FDE addresses are
809 			 * encoded.
810 			 */
811 			cie->encoding = *(char *)p++;
812 			cie->augmentation++;
813 		} else if (*cie->augmentation == 'P') {
814 			/*
815 			 * "R" indicates a personality
816 			 * routine in the CIE
817 			 * augmentation.
818 			 */
819 			UNWINDER_BUG();
820 		} else if (*cie->augmentation == 'S') {
821 			UNWINDER_BUG();
822 		} else {
823 			/*
824 			 * Unknown augmentation. Assume
825 			 * 'z' augmentation.
826 			 */
827 			p = cie->initial_instructions;
828 			UNWINDER_BUG_ON(!p);
829 			break;
830 		}
831 	}
832 
833 	cie->initial_instructions = p;
834 	cie->instructions_end = end;
835 
836 	/* Add to list */
837 	spin_lock_irqsave(&dwarf_cie_lock, flags);
838 
839 	while (*rb_node) {
840 		struct dwarf_cie *cie_tmp;
841 
842 		cie_tmp = rb_entry(*rb_node, struct dwarf_cie, node);
843 
844 		parent = *rb_node;
845 
846 		if (cie->cie_pointer < cie_tmp->cie_pointer)
847 			rb_node = &parent->rb_left;
848 		else if (cie->cie_pointer >= cie_tmp->cie_pointer)
849 			rb_node = &parent->rb_right;
850 		else
851 			WARN_ON(1);
852 	}
853 
854 	rb_link_node(&cie->node, parent, rb_node);
855 	rb_insert_color(&cie->node, &cie_root);
856 
857 #ifdef CONFIG_MODULES
858 	if (mod != NULL)
859 		list_add_tail(&cie->link, &mod->arch.cie_list);
860 #endif
861 
862 	spin_unlock_irqrestore(&dwarf_cie_lock, flags);
863 
864 	return 0;
865 }
866 
dwarf_parse_fde(void * entry,u32 entry_type,void * start,unsigned long len,unsigned char * end,struct module * mod)867 static int dwarf_parse_fde(void *entry, u32 entry_type,
868 			   void *start, unsigned long len,
869 			   unsigned char *end, struct module *mod)
870 {
871 	struct rb_node **rb_node = &fde_root.rb_node;
872 	struct rb_node *parent = *rb_node;
873 	struct dwarf_fde *fde;
874 	struct dwarf_cie *cie;
875 	unsigned long flags;
876 	int count;
877 	void *p = start;
878 
879 	fde = kzalloc(sizeof(*fde), GFP_KERNEL);
880 	if (!fde)
881 		return -ENOMEM;
882 
883 	fde->length = len;
884 
885 	/*
886 	 * In a .eh_frame section the CIE pointer is the
887 	 * delta between the address within the FDE
888 	 */
889 	fde->cie_pointer = (unsigned long)(p - entry_type - 4);
890 
891 	cie = dwarf_lookup_cie(fde->cie_pointer);
892 	fde->cie = cie;
893 
894 	if (cie->encoding)
895 		count = dwarf_read_encoded_value(p, &fde->initial_location,
896 						 cie->encoding);
897 	else
898 		count = dwarf_read_addr(p, &fde->initial_location);
899 
900 	p += count;
901 
902 	if (cie->encoding)
903 		count = dwarf_read_encoded_value(p, &fde->address_range,
904 						 cie->encoding & 0x0f);
905 	else
906 		count = dwarf_read_addr(p, &fde->address_range);
907 
908 	p += count;
909 
910 	if (fde->cie->flags & DWARF_CIE_Z_AUGMENTATION) {
911 		unsigned int length;
912 		count = dwarf_read_uleb128(p, &length);
913 		p += count + length;
914 	}
915 
916 	/* Call frame instructions. */
917 	fde->instructions = p;
918 	fde->end = end;
919 
920 	/* Add to list. */
921 	spin_lock_irqsave(&dwarf_fde_lock, flags);
922 
923 	while (*rb_node) {
924 		struct dwarf_fde *fde_tmp;
925 		unsigned long tmp_start, tmp_end;
926 		unsigned long start, end;
927 
928 		fde_tmp = rb_entry(*rb_node, struct dwarf_fde, node);
929 
930 		start = fde->initial_location;
931 		end = fde->initial_location + fde->address_range;
932 
933 		tmp_start = fde_tmp->initial_location;
934 		tmp_end = fde_tmp->initial_location + fde_tmp->address_range;
935 
936 		parent = *rb_node;
937 
938 		if (start < tmp_start)
939 			rb_node = &parent->rb_left;
940 		else if (start >= tmp_end)
941 			rb_node = &parent->rb_right;
942 		else
943 			WARN_ON(1);
944 	}
945 
946 	rb_link_node(&fde->node, parent, rb_node);
947 	rb_insert_color(&fde->node, &fde_root);
948 
949 #ifdef CONFIG_MODULES
950 	if (mod != NULL)
951 		list_add_tail(&fde->link, &mod->arch.fde_list);
952 #endif
953 
954 	spin_unlock_irqrestore(&dwarf_fde_lock, flags);
955 
956 	return 0;
957 }
958 
dwarf_unwinder_dump(struct task_struct * task,struct pt_regs * regs,unsigned long * sp,const struct stacktrace_ops * ops,void * data)959 static void dwarf_unwinder_dump(struct task_struct *task,
960 				struct pt_regs *regs,
961 				unsigned long *sp,
962 				const struct stacktrace_ops *ops,
963 				void *data)
964 {
965 	struct dwarf_frame *frame, *_frame;
966 	unsigned long return_addr;
967 
968 	_frame = NULL;
969 	return_addr = 0;
970 
971 	while (1) {
972 		frame = dwarf_unwind_stack(return_addr, _frame);
973 
974 		if (_frame)
975 			dwarf_free_frame(_frame);
976 
977 		_frame = frame;
978 
979 		if (!frame || !frame->return_addr)
980 			break;
981 
982 		return_addr = frame->return_addr;
983 		ops->address(data, return_addr, 1);
984 	}
985 
986 	if (frame)
987 		dwarf_free_frame(frame);
988 }
989 
990 static struct unwinder dwarf_unwinder = {
991 	.name = "dwarf-unwinder",
992 	.dump = dwarf_unwinder_dump,
993 	.rating = 150,
994 };
995 
dwarf_unwinder_cleanup(void)996 static void dwarf_unwinder_cleanup(void)
997 {
998 	struct rb_node **fde_rb_node = &fde_root.rb_node;
999 	struct rb_node **cie_rb_node = &cie_root.rb_node;
1000 
1001 	/*
1002 	 * Deallocate all the memory allocated for the DWARF unwinder.
1003 	 * Traverse all the FDE/CIE lists and remove and free all the
1004 	 * memory associated with those data structures.
1005 	 */
1006 	while (*fde_rb_node) {
1007 		struct dwarf_fde *fde;
1008 
1009 		fde = rb_entry(*fde_rb_node, struct dwarf_fde, node);
1010 		rb_erase(*fde_rb_node, &fde_root);
1011 		kfree(fde);
1012 	}
1013 
1014 	while (*cie_rb_node) {
1015 		struct dwarf_cie *cie;
1016 
1017 		cie = rb_entry(*cie_rb_node, struct dwarf_cie, node);
1018 		rb_erase(*cie_rb_node, &cie_root);
1019 		kfree(cie);
1020 	}
1021 
1022 	kmem_cache_destroy(dwarf_reg_cachep);
1023 	kmem_cache_destroy(dwarf_frame_cachep);
1024 }
1025 
1026 /**
1027  *	dwarf_parse_section - parse DWARF section
1028  *	@eh_frame_start: start address of the .eh_frame section
1029  *	@eh_frame_end: end address of the .eh_frame section
1030  *	@mod: the kernel module containing the .eh_frame section
1031  *
1032  *	Parse the information in a .eh_frame section.
1033  */
dwarf_parse_section(char * eh_frame_start,char * eh_frame_end,struct module * mod)1034 static int dwarf_parse_section(char *eh_frame_start, char *eh_frame_end,
1035 			       struct module *mod)
1036 {
1037 	u32 entry_type;
1038 	void *p, *entry;
1039 	int count, err = 0;
1040 	unsigned long len = 0;
1041 	unsigned int c_entries, f_entries;
1042 	unsigned char *end;
1043 
1044 	c_entries = 0;
1045 	f_entries = 0;
1046 	entry = eh_frame_start;
1047 
1048 	while ((char *)entry < eh_frame_end) {
1049 		p = entry;
1050 
1051 		count = dwarf_entry_len(p, &len);
1052 		if (count == 0) {
1053 			/*
1054 			 * We read a bogus length field value. There is
1055 			 * nothing we can do here apart from disabling
1056 			 * the DWARF unwinder. We can't even skip this
1057 			 * entry and move to the next one because 'len'
1058 			 * tells us where our next entry is.
1059 			 */
1060 			err = -EINVAL;
1061 			goto out;
1062 		} else
1063 			p += count;
1064 
1065 		/* initial length does not include itself */
1066 		end = p + len;
1067 
1068 		entry_type = get_unaligned((u32 *)p);
1069 		p += 4;
1070 
1071 		if (entry_type == DW_EH_FRAME_CIE) {
1072 			err = dwarf_parse_cie(entry, p, len, end, mod);
1073 			if (err < 0)
1074 				goto out;
1075 			else
1076 				c_entries++;
1077 		} else {
1078 			err = dwarf_parse_fde(entry, entry_type, p, len,
1079 					      end, mod);
1080 			if (err < 0)
1081 				goto out;
1082 			else
1083 				f_entries++;
1084 		}
1085 
1086 		entry = (char *)entry + len + 4;
1087 	}
1088 
1089 	printk(KERN_INFO "DWARF unwinder initialised: read %u CIEs, %u FDEs\n",
1090 	       c_entries, f_entries);
1091 
1092 	return 0;
1093 
1094 out:
1095 	return err;
1096 }
1097 
1098 #ifdef CONFIG_MODULES
module_dwarf_finalize(const Elf_Ehdr * hdr,const Elf_Shdr * sechdrs,struct module * me)1099 int module_dwarf_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
1100 			  struct module *me)
1101 {
1102 	unsigned int i, err;
1103 	unsigned long start, end;
1104 	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
1105 
1106 	start = end = 0;
1107 
1108 	for (i = 1; i < hdr->e_shnum; i++) {
1109 		/* Alloc bit cleared means "ignore it." */
1110 		if ((sechdrs[i].sh_flags & SHF_ALLOC)
1111 		    && !strcmp(secstrings+sechdrs[i].sh_name, ".eh_frame")) {
1112 			start = sechdrs[i].sh_addr;
1113 			end = start + sechdrs[i].sh_size;
1114 			break;
1115 		}
1116 	}
1117 
1118 	/* Did we find the .eh_frame section? */
1119 	if (i != hdr->e_shnum) {
1120 		INIT_LIST_HEAD(&me->arch.cie_list);
1121 		INIT_LIST_HEAD(&me->arch.fde_list);
1122 		err = dwarf_parse_section((char *)start, (char *)end, me);
1123 		if (err) {
1124 			printk(KERN_WARNING "%s: failed to parse DWARF info\n",
1125 			       me->name);
1126 			return err;
1127 		}
1128 	}
1129 
1130 	return 0;
1131 }
1132 
1133 /**
1134  *	module_dwarf_cleanup - remove FDE/CIEs associated with @mod
1135  *	@mod: the module that is being unloaded
1136  *
1137  *	Remove any FDEs and CIEs from the global lists that came from
1138  *	@mod's .eh_frame section because @mod is being unloaded.
1139  */
module_dwarf_cleanup(struct module * mod)1140 void module_dwarf_cleanup(struct module *mod)
1141 {
1142 	struct dwarf_fde *fde, *ftmp;
1143 	struct dwarf_cie *cie, *ctmp;
1144 	unsigned long flags;
1145 
1146 	spin_lock_irqsave(&dwarf_cie_lock, flags);
1147 
1148 	list_for_each_entry_safe(cie, ctmp, &mod->arch.cie_list, link) {
1149 		list_del(&cie->link);
1150 		rb_erase(&cie->node, &cie_root);
1151 		kfree(cie);
1152 	}
1153 
1154 	spin_unlock_irqrestore(&dwarf_cie_lock, flags);
1155 
1156 	spin_lock_irqsave(&dwarf_fde_lock, flags);
1157 
1158 	list_for_each_entry_safe(fde, ftmp, &mod->arch.fde_list, link) {
1159 		list_del(&fde->link);
1160 		rb_erase(&fde->node, &fde_root);
1161 		kfree(fde);
1162 	}
1163 
1164 	spin_unlock_irqrestore(&dwarf_fde_lock, flags);
1165 }
1166 #endif /* CONFIG_MODULES */
1167 
1168 /**
1169  *	dwarf_unwinder_init - initialise the dwarf unwinder
1170  *
1171  *	Build the data structures describing the .dwarf_frame section to
1172  *	make it easier to lookup CIE and FDE entries. Because the
1173  *	.eh_frame section is packed as tightly as possible it is not
1174  *	easy to lookup the FDE for a given PC, so we build a list of FDE
1175  *	and CIE entries that make it easier.
1176  */
dwarf_unwinder_init(void)1177 static int __init dwarf_unwinder_init(void)
1178 {
1179 	int err = -ENOMEM;
1180 
1181 	dwarf_frame_cachep = kmem_cache_create("dwarf_frames",
1182 			sizeof(struct dwarf_frame), 0,
1183 			SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL);
1184 
1185 	dwarf_reg_cachep = kmem_cache_create("dwarf_regs",
1186 			sizeof(struct dwarf_reg), 0,
1187 			SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL);
1188 
1189 	dwarf_frame_pool = mempool_create(DWARF_FRAME_MIN_REQ,
1190 					  mempool_alloc_slab,
1191 					  mempool_free_slab,
1192 					  dwarf_frame_cachep);
1193 	if (!dwarf_frame_pool)
1194 		goto out;
1195 
1196 	dwarf_reg_pool = mempool_create(DWARF_REG_MIN_REQ,
1197 					 mempool_alloc_slab,
1198 					 mempool_free_slab,
1199 					 dwarf_reg_cachep);
1200 	if (!dwarf_reg_pool)
1201 		goto out;
1202 
1203 	err = dwarf_parse_section(__start_eh_frame, __stop_eh_frame, NULL);
1204 	if (err)
1205 		goto out;
1206 
1207 	err = unwinder_register(&dwarf_unwinder);
1208 	if (err)
1209 		goto out;
1210 
1211 	dwarf_unwinder_ready = 1;
1212 
1213 	return 0;
1214 
1215 out:
1216 	printk(KERN_ERR "Failed to initialise DWARF unwinder: %d\n", err);
1217 	dwarf_unwinder_cleanup();
1218 	return err;
1219 }
1220 early_initcall(dwarf_unwinder_init);
1221