1 #include <linux/bitops.h>
2 #include <linux/types.h>
3 #include <linux/slab.h>
4 
5 #include <asm/perf_event.h>
6 #include <asm/insn.h>
7 
8 #include "perf_event.h"
9 
10 /* The size of a BTS record in bytes: */
11 #define BTS_RECORD_SIZE		24
12 
13 #define BTS_BUFFER_SIZE		(PAGE_SIZE << 4)
14 #define PEBS_BUFFER_SIZE	PAGE_SIZE
15 
16 /*
17  * pebs_record_32 for p4 and core not supported
18 
19 struct pebs_record_32 {
20 	u32 flags, ip;
21 	u32 ax, bc, cx, dx;
22 	u32 si, di, bp, sp;
23 };
24 
25  */
26 
27 struct pebs_record_core {
28 	u64 flags, ip;
29 	u64 ax, bx, cx, dx;
30 	u64 si, di, bp, sp;
31 	u64 r8,  r9,  r10, r11;
32 	u64 r12, r13, r14, r15;
33 };
34 
35 struct pebs_record_nhm {
36 	u64 flags, ip;
37 	u64 ax, bx, cx, dx;
38 	u64 si, di, bp, sp;
39 	u64 r8,  r9,  r10, r11;
40 	u64 r12, r13, r14, r15;
41 	u64 status, dla, dse, lat;
42 };
43 
init_debug_store_on_cpu(int cpu)44 void init_debug_store_on_cpu(int cpu)
45 {
46 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
47 
48 	if (!ds)
49 		return;
50 
51 	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
52 		     (u32)((u64)(unsigned long)ds),
53 		     (u32)((u64)(unsigned long)ds >> 32));
54 }
55 
fini_debug_store_on_cpu(int cpu)56 void fini_debug_store_on_cpu(int cpu)
57 {
58 	if (!per_cpu(cpu_hw_events, cpu).ds)
59 		return;
60 
61 	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
62 }
63 
alloc_pebs_buffer(int cpu)64 static int alloc_pebs_buffer(int cpu)
65 {
66 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
67 	int node = cpu_to_node(cpu);
68 	int max, thresh = 1; /* always use a single PEBS record */
69 	void *buffer;
70 
71 	if (!x86_pmu.pebs)
72 		return 0;
73 
74 	buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
75 	if (unlikely(!buffer))
76 		return -ENOMEM;
77 
78 	max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
79 
80 	ds->pebs_buffer_base = (u64)(unsigned long)buffer;
81 	ds->pebs_index = ds->pebs_buffer_base;
82 	ds->pebs_absolute_maximum = ds->pebs_buffer_base +
83 		max * x86_pmu.pebs_record_size;
84 
85 	ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
86 		thresh * x86_pmu.pebs_record_size;
87 
88 	return 0;
89 }
90 
release_pebs_buffer(int cpu)91 static void release_pebs_buffer(int cpu)
92 {
93 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
94 
95 	if (!ds || !x86_pmu.pebs)
96 		return;
97 
98 	kfree((void *)(unsigned long)ds->pebs_buffer_base);
99 	ds->pebs_buffer_base = 0;
100 }
101 
alloc_bts_buffer(int cpu)102 static int alloc_bts_buffer(int cpu)
103 {
104 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
105 	int node = cpu_to_node(cpu);
106 	int max, thresh;
107 	void *buffer;
108 
109 	if (!x86_pmu.bts)
110 		return 0;
111 
112 	buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
113 	if (unlikely(!buffer))
114 		return -ENOMEM;
115 
116 	max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
117 	thresh = max / 16;
118 
119 	ds->bts_buffer_base = (u64)(unsigned long)buffer;
120 	ds->bts_index = ds->bts_buffer_base;
121 	ds->bts_absolute_maximum = ds->bts_buffer_base +
122 		max * BTS_RECORD_SIZE;
123 	ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
124 		thresh * BTS_RECORD_SIZE;
125 
126 	return 0;
127 }
128 
release_bts_buffer(int cpu)129 static void release_bts_buffer(int cpu)
130 {
131 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
132 
133 	if (!ds || !x86_pmu.bts)
134 		return;
135 
136 	kfree((void *)(unsigned long)ds->bts_buffer_base);
137 	ds->bts_buffer_base = 0;
138 }
139 
alloc_ds_buffer(int cpu)140 static int alloc_ds_buffer(int cpu)
141 {
142 	int node = cpu_to_node(cpu);
143 	struct debug_store *ds;
144 
145 	ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node);
146 	if (unlikely(!ds))
147 		return -ENOMEM;
148 
149 	per_cpu(cpu_hw_events, cpu).ds = ds;
150 
151 	return 0;
152 }
153 
release_ds_buffer(int cpu)154 static void release_ds_buffer(int cpu)
155 {
156 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
157 
158 	if (!ds)
159 		return;
160 
161 	per_cpu(cpu_hw_events, cpu).ds = NULL;
162 	kfree(ds);
163 }
164 
release_ds_buffers(void)165 void release_ds_buffers(void)
166 {
167 	int cpu;
168 
169 	if (!x86_pmu.bts && !x86_pmu.pebs)
170 		return;
171 
172 	get_online_cpus();
173 	for_each_online_cpu(cpu)
174 		fini_debug_store_on_cpu(cpu);
175 
176 	for_each_possible_cpu(cpu) {
177 		release_pebs_buffer(cpu);
178 		release_bts_buffer(cpu);
179 		release_ds_buffer(cpu);
180 	}
181 	put_online_cpus();
182 }
183 
reserve_ds_buffers(void)184 void reserve_ds_buffers(void)
185 {
186 	int bts_err = 0, pebs_err = 0;
187 	int cpu;
188 
189 	x86_pmu.bts_active = 0;
190 	x86_pmu.pebs_active = 0;
191 
192 	if (!x86_pmu.bts && !x86_pmu.pebs)
193 		return;
194 
195 	if (!x86_pmu.bts)
196 		bts_err = 1;
197 
198 	if (!x86_pmu.pebs)
199 		pebs_err = 1;
200 
201 	get_online_cpus();
202 
203 	for_each_possible_cpu(cpu) {
204 		if (alloc_ds_buffer(cpu)) {
205 			bts_err = 1;
206 			pebs_err = 1;
207 		}
208 
209 		if (!bts_err && alloc_bts_buffer(cpu))
210 			bts_err = 1;
211 
212 		if (!pebs_err && alloc_pebs_buffer(cpu))
213 			pebs_err = 1;
214 
215 		if (bts_err && pebs_err)
216 			break;
217 	}
218 
219 	if (bts_err) {
220 		for_each_possible_cpu(cpu)
221 			release_bts_buffer(cpu);
222 	}
223 
224 	if (pebs_err) {
225 		for_each_possible_cpu(cpu)
226 			release_pebs_buffer(cpu);
227 	}
228 
229 	if (bts_err && pebs_err) {
230 		for_each_possible_cpu(cpu)
231 			release_ds_buffer(cpu);
232 	} else {
233 		if (x86_pmu.bts && !bts_err)
234 			x86_pmu.bts_active = 1;
235 
236 		if (x86_pmu.pebs && !pebs_err)
237 			x86_pmu.pebs_active = 1;
238 
239 		for_each_online_cpu(cpu)
240 			init_debug_store_on_cpu(cpu);
241 	}
242 
243 	put_online_cpus();
244 }
245 
246 /*
247  * BTS
248  */
249 
250 struct event_constraint bts_constraint =
251 	EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
252 
intel_pmu_enable_bts(u64 config)253 void intel_pmu_enable_bts(u64 config)
254 {
255 	unsigned long debugctlmsr;
256 
257 	debugctlmsr = get_debugctlmsr();
258 
259 	debugctlmsr |= DEBUGCTLMSR_TR;
260 	debugctlmsr |= DEBUGCTLMSR_BTS;
261 	debugctlmsr |= DEBUGCTLMSR_BTINT;
262 
263 	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
264 		debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
265 
266 	if (!(config & ARCH_PERFMON_EVENTSEL_USR))
267 		debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
268 
269 	update_debugctlmsr(debugctlmsr);
270 }
271 
intel_pmu_disable_bts(void)272 void intel_pmu_disable_bts(void)
273 {
274 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
275 	unsigned long debugctlmsr;
276 
277 	if (!cpuc->ds)
278 		return;
279 
280 	debugctlmsr = get_debugctlmsr();
281 
282 	debugctlmsr &=
283 		~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
284 		  DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
285 
286 	update_debugctlmsr(debugctlmsr);
287 }
288 
intel_pmu_drain_bts_buffer(void)289 int intel_pmu_drain_bts_buffer(void)
290 {
291 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
292 	struct debug_store *ds = cpuc->ds;
293 	struct bts_record {
294 		u64	from;
295 		u64	to;
296 		u64	flags;
297 	};
298 	struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
299 	struct bts_record *at, *top;
300 	struct perf_output_handle handle;
301 	struct perf_event_header header;
302 	struct perf_sample_data data;
303 	struct pt_regs regs;
304 
305 	if (!event)
306 		return 0;
307 
308 	if (!x86_pmu.bts_active)
309 		return 0;
310 
311 	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
312 	top = (struct bts_record *)(unsigned long)ds->bts_index;
313 
314 	if (top <= at)
315 		return 0;
316 
317 	ds->bts_index = ds->bts_buffer_base;
318 
319 	perf_sample_data_init(&data, 0);
320 	data.period = event->hw.last_period;
321 	regs.ip     = 0;
322 
323 	/*
324 	 * Prepare a generic sample, i.e. fill in the invariant fields.
325 	 * We will overwrite the from and to address before we output
326 	 * the sample.
327 	 */
328 	perf_prepare_sample(&header, &data, event, &regs);
329 
330 	if (perf_output_begin(&handle, event, header.size * (top - at)))
331 		return 1;
332 
333 	for (; at < top; at++) {
334 		data.ip		= at->from;
335 		data.addr	= at->to;
336 
337 		perf_output_sample(&handle, &header, &data, event);
338 	}
339 
340 	perf_output_end(&handle);
341 
342 	/* There's new data available. */
343 	event->hw.interrupts++;
344 	event->pending_kill = POLL_IN;
345 	return 1;
346 }
347 
348 /*
349  * PEBS
350  */
351 struct event_constraint intel_core2_pebs_event_constraints[] = {
352 	INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
353 	INTEL_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
354 	INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
355 	INTEL_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
356 	INTEL_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
357 	EVENT_CONSTRAINT_END
358 };
359 
360 struct event_constraint intel_atom_pebs_event_constraints[] = {
361 	INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
362 	INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
363 	INTEL_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
364 	EVENT_CONSTRAINT_END
365 };
366 
367 struct event_constraint intel_nehalem_pebs_event_constraints[] = {
368 	INTEL_EVENT_CONSTRAINT(0x0b, 0xf),    /* MEM_INST_RETIRED.* */
369 	INTEL_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
370 	INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
371 	INTEL_EVENT_CONSTRAINT(0xc0, 0xf),    /* INST_RETIRED.ANY */
372 	INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
373 	INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
374 	INTEL_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
375 	INTEL_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
376 	INTEL_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
377 	INTEL_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
378 	INTEL_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
379 	EVENT_CONSTRAINT_END
380 };
381 
382 struct event_constraint intel_westmere_pebs_event_constraints[] = {
383 	INTEL_EVENT_CONSTRAINT(0x0b, 0xf),    /* MEM_INST_RETIRED.* */
384 	INTEL_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
385 	INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
386 	INTEL_EVENT_CONSTRAINT(0xc0, 0xf),    /* INSTR_RETIRED.* */
387 	INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
388 	INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
389 	INTEL_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
390 	INTEL_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
391 	INTEL_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
392 	INTEL_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
393 	INTEL_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
394 	EVENT_CONSTRAINT_END
395 };
396 
397 struct event_constraint intel_snb_pebs_event_constraints[] = {
398 	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
399 	INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
400 	INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
401 	INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
402 	INTEL_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
403 	INTEL_EVENT_CONSTRAINT(0xcd, 0x8),    /* MEM_TRANS_RETIRED.* */
404 	INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */
405 	INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */
406 	INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */
407 	INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */
408 	INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */
409 	INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */
410 	INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */
411 	INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */
412 	INTEL_EVENT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
413 	INTEL_EVENT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
414 	INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
415 	EVENT_CONSTRAINT_END
416 };
417 
intel_pebs_constraints(struct perf_event * event)418 struct event_constraint *intel_pebs_constraints(struct perf_event *event)
419 {
420 	struct event_constraint *c;
421 
422 	if (!event->attr.precise_ip)
423 		return NULL;
424 
425 	if (x86_pmu.pebs_constraints) {
426 		for_each_event_constraint(c, x86_pmu.pebs_constraints) {
427 			if ((event->hw.config & c->cmask) == c->code)
428 				return c;
429 		}
430 	}
431 
432 	return &emptyconstraint;
433 }
434 
intel_pmu_pebs_enable(struct perf_event * event)435 void intel_pmu_pebs_enable(struct perf_event *event)
436 {
437 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
438 	struct hw_perf_event *hwc = &event->hw;
439 
440 	hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
441 
442 	cpuc->pebs_enabled |= 1ULL << hwc->idx;
443 }
444 
intel_pmu_pebs_disable(struct perf_event * event)445 void intel_pmu_pebs_disable(struct perf_event *event)
446 {
447 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
448 	struct hw_perf_event *hwc = &event->hw;
449 
450 	cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
451 	if (cpuc->enabled)
452 		wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
453 
454 	hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
455 }
456 
intel_pmu_pebs_enable_all(void)457 void intel_pmu_pebs_enable_all(void)
458 {
459 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
460 
461 	if (cpuc->pebs_enabled)
462 		wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
463 }
464 
intel_pmu_pebs_disable_all(void)465 void intel_pmu_pebs_disable_all(void)
466 {
467 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
468 
469 	if (cpuc->pebs_enabled)
470 		wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
471 }
472 
intel_pmu_pebs_fixup_ip(struct pt_regs * regs)473 static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
474 {
475 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
476 	unsigned long from = cpuc->lbr_entries[0].from;
477 	unsigned long old_to, to = cpuc->lbr_entries[0].to;
478 	unsigned long ip = regs->ip;
479 	int is_64bit = 0;
480 
481 	/*
482 	 * We don't need to fixup if the PEBS assist is fault like
483 	 */
484 	if (!x86_pmu.intel_cap.pebs_trap)
485 		return 1;
486 
487 	/*
488 	 * No LBR entry, no basic block, no rewinding
489 	 */
490 	if (!cpuc->lbr_stack.nr || !from || !to)
491 		return 0;
492 
493 	/*
494 	 * Basic blocks should never cross user/kernel boundaries
495 	 */
496 	if (kernel_ip(ip) != kernel_ip(to))
497 		return 0;
498 
499 	/*
500 	 * unsigned math, either ip is before the start (impossible) or
501 	 * the basic block is larger than 1 page (sanity)
502 	 */
503 	if ((ip - to) > PAGE_SIZE)
504 		return 0;
505 
506 	/*
507 	 * We sampled a branch insn, rewind using the LBR stack
508 	 */
509 	if (ip == to) {
510 		regs->ip = from;
511 		return 1;
512 	}
513 
514 	do {
515 		struct insn insn;
516 		u8 buf[MAX_INSN_SIZE];
517 		void *kaddr;
518 
519 		old_to = to;
520 		if (!kernel_ip(ip)) {
521 			int bytes, size = MAX_INSN_SIZE;
522 
523 			bytes = copy_from_user_nmi(buf, (void __user *)to, size);
524 			if (bytes != size)
525 				return 0;
526 
527 			kaddr = buf;
528 		} else
529 			kaddr = (void *)to;
530 
531 #ifdef CONFIG_X86_64
532 		is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
533 #endif
534 		insn_init(&insn, kaddr, is_64bit);
535 		insn_get_length(&insn);
536 		to += insn.length;
537 	} while (to < ip);
538 
539 	if (to == ip) {
540 		regs->ip = old_to;
541 		return 1;
542 	}
543 
544 	/*
545 	 * Even though we decoded the basic block, the instruction stream
546 	 * never matched the given IP, either the TO or the IP got corrupted.
547 	 */
548 	return 0;
549 }
550 
__intel_pmu_pebs_event(struct perf_event * event,struct pt_regs * iregs,void * __pebs)551 static void __intel_pmu_pebs_event(struct perf_event *event,
552 				   struct pt_regs *iregs, void *__pebs)
553 {
554 	/*
555 	 * We cast to pebs_record_core since that is a subset of
556 	 * both formats and we don't use the other fields in this
557 	 * routine.
558 	 */
559 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
560 	struct pebs_record_core *pebs = __pebs;
561 	struct perf_sample_data data;
562 	struct pt_regs regs;
563 
564 	if (!intel_pmu_save_and_restart(event))
565 		return;
566 
567 	perf_sample_data_init(&data, 0);
568 	data.period = event->hw.last_period;
569 
570 	/*
571 	 * We use the interrupt regs as a base because the PEBS record
572 	 * does not contain a full regs set, specifically it seems to
573 	 * lack segment descriptors, which get used by things like
574 	 * user_mode().
575 	 *
576 	 * In the simple case fix up only the IP and BP,SP regs, for
577 	 * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
578 	 * A possible PERF_SAMPLE_REGS will have to transfer all regs.
579 	 */
580 	regs = *iregs;
581 	regs.ip = pebs->ip;
582 	regs.bp = pebs->bp;
583 	regs.sp = pebs->sp;
584 
585 	if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
586 		regs.flags |= PERF_EFLAGS_EXACT;
587 	else
588 		regs.flags &= ~PERF_EFLAGS_EXACT;
589 
590 	if (has_branch_stack(event))
591 		data.br_stack = &cpuc->lbr_stack;
592 
593 	if (perf_event_overflow(event, &data, &regs))
594 		x86_pmu_stop(event, 0);
595 }
596 
intel_pmu_drain_pebs_core(struct pt_regs * iregs)597 static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
598 {
599 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
600 	struct debug_store *ds = cpuc->ds;
601 	struct perf_event *event = cpuc->events[0]; /* PMC0 only */
602 	struct pebs_record_core *at, *top;
603 	int n;
604 
605 	if (!x86_pmu.pebs_active)
606 		return;
607 
608 	at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
609 	top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
610 
611 	/*
612 	 * Whatever else happens, drain the thing
613 	 */
614 	ds->pebs_index = ds->pebs_buffer_base;
615 
616 	if (!test_bit(0, cpuc->active_mask))
617 		return;
618 
619 	WARN_ON_ONCE(!event);
620 
621 	if (!event->attr.precise_ip)
622 		return;
623 
624 	n = top - at;
625 	if (n <= 0)
626 		return;
627 
628 	/*
629 	 * Should not happen, we program the threshold at 1 and do not
630 	 * set a reset value.
631 	 */
632 	WARN_ON_ONCE(n > 1);
633 	at += n - 1;
634 
635 	__intel_pmu_pebs_event(event, iregs, at);
636 }
637 
intel_pmu_drain_pebs_nhm(struct pt_regs * iregs)638 static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
639 {
640 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
641 	struct debug_store *ds = cpuc->ds;
642 	struct pebs_record_nhm *at, *top;
643 	struct perf_event *event = NULL;
644 	u64 status = 0;
645 	int bit, n;
646 
647 	if (!x86_pmu.pebs_active)
648 		return;
649 
650 	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
651 	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
652 
653 	ds->pebs_index = ds->pebs_buffer_base;
654 
655 	n = top - at;
656 	if (n <= 0)
657 		return;
658 
659 	/*
660 	 * Should not happen, we program the threshold at 1 and do not
661 	 * set a reset value.
662 	 */
663 	WARN_ON_ONCE(n > MAX_PEBS_EVENTS);
664 
665 	for ( ; at < top; at++) {
666 		for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) {
667 			event = cpuc->events[bit];
668 			if (!test_bit(bit, cpuc->active_mask))
669 				continue;
670 
671 			WARN_ON_ONCE(!event);
672 
673 			if (!event->attr.precise_ip)
674 				continue;
675 
676 			if (__test_and_set_bit(bit, (unsigned long *)&status))
677 				continue;
678 
679 			break;
680 		}
681 
682 		if (!event || bit >= MAX_PEBS_EVENTS)
683 			continue;
684 
685 		__intel_pmu_pebs_event(event, iregs, at);
686 	}
687 }
688 
689 /*
690  * BTS, PEBS probe and setup
691  */
692 
intel_ds_init(void)693 void intel_ds_init(void)
694 {
695 	/*
696 	 * No support for 32bit formats
697 	 */
698 	if (!boot_cpu_has(X86_FEATURE_DTES64))
699 		return;
700 
701 	x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
702 	x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
703 	if (x86_pmu.pebs) {
704 		char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
705 		int format = x86_pmu.intel_cap.pebs_format;
706 
707 		switch (format) {
708 		case 0:
709 			printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
710 			x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
711 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
712 			break;
713 
714 		case 1:
715 			printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
716 			x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
717 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
718 			break;
719 
720 		default:
721 			printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
722 			x86_pmu.pebs = 0;
723 		}
724 	}
725 }
726 
perf_restore_debug_store(void)727 void perf_restore_debug_store(void)
728 {
729 	struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
730 
731 	if (!x86_pmu.bts && !x86_pmu.pebs)
732 		return;
733 
734 	wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
735 }
736