1 /*
2  * ARMv6 Performance counter handling code.
3  *
4  * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
5  *
6  * ARMv6 has 2 configurable performance counters and a single cycle counter.
7  * They all share a single reset bit but can be written to zero so we can use
8  * that for a reset.
9  *
10  * The counters can't be individually enabled or disabled so when we remove
11  * one event and replace it with another we could get spurious counts from the
12  * wrong event. However, we can take advantage of the fact that the
13  * performance counters can export events to the event bus, and the event bus
14  * itself can be monitored. This requires that we *don't* export the events to
15  * the event bus. The procedure for disabling a configurable counter is:
16  *	- change the counter to count the ETMEXTOUT[0] signal (0x20). This
17  *	  effectively stops the counter from counting.
18  *	- disable the counter's interrupt generation (each counter has it's
19  *	  own interrupt enable bit).
20  * Once stopped, the counter value can be written as 0 to reset.
21  *
22  * To enable a counter:
23  *	- enable the counter's interrupt generation.
24  *	- set the new event type.
25  *
26  * Note: the dedicated cycle counter only counts cycles and can't be
27  * enabled/disabled independently of the others. When we want to disable the
28  * cycle counter, we have to just disable the interrupt reporting and start
29  * ignoring that counter. When re-enabling, we have to reset the value and
30  * enable the interrupt.
31  */
32 
33 #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K)
34 enum armv6_perf_types {
35 	ARMV6_PERFCTR_ICACHE_MISS	    = 0x0,
36 	ARMV6_PERFCTR_IBUF_STALL	    = 0x1,
37 	ARMV6_PERFCTR_DDEP_STALL	    = 0x2,
38 	ARMV6_PERFCTR_ITLB_MISS		    = 0x3,
39 	ARMV6_PERFCTR_DTLB_MISS		    = 0x4,
40 	ARMV6_PERFCTR_BR_EXEC		    = 0x5,
41 	ARMV6_PERFCTR_BR_MISPREDICT	    = 0x6,
42 	ARMV6_PERFCTR_INSTR_EXEC	    = 0x7,
43 	ARMV6_PERFCTR_DCACHE_HIT	    = 0x9,
44 	ARMV6_PERFCTR_DCACHE_ACCESS	    = 0xA,
45 	ARMV6_PERFCTR_DCACHE_MISS	    = 0xB,
46 	ARMV6_PERFCTR_DCACHE_WBACK	    = 0xC,
47 	ARMV6_PERFCTR_SW_PC_CHANGE	    = 0xD,
48 	ARMV6_PERFCTR_MAIN_TLB_MISS	    = 0xF,
49 	ARMV6_PERFCTR_EXPL_D_ACCESS	    = 0x10,
50 	ARMV6_PERFCTR_LSU_FULL_STALL	    = 0x11,
51 	ARMV6_PERFCTR_WBUF_DRAINED	    = 0x12,
52 	ARMV6_PERFCTR_CPU_CYCLES	    = 0xFF,
53 	ARMV6_PERFCTR_NOP		    = 0x20,
54 };
55 
56 enum armv6_counters {
57 	ARMV6_CYCLE_COUNTER = 0,
58 	ARMV6_COUNTER0,
59 	ARMV6_COUNTER1,
60 };
61 
62 /*
63  * The hardware events that we support. We do support cache operations but
64  * we have harvard caches and no way to combine instruction and data
65  * accesses/misses in hardware.
66  */
67 static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
68 	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV6_PERFCTR_CPU_CYCLES,
69 	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV6_PERFCTR_INSTR_EXEC,
70 	[PERF_COUNT_HW_CACHE_REFERENCES]	= HW_OP_UNSUPPORTED,
71 	[PERF_COUNT_HW_CACHE_MISSES]		= HW_OP_UNSUPPORTED,
72 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV6_PERFCTR_BR_EXEC,
73 	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV6_PERFCTR_BR_MISPREDICT,
74 	[PERF_COUNT_HW_BUS_CYCLES]		= HW_OP_UNSUPPORTED,
75 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= ARMV6_PERFCTR_IBUF_STALL,
76 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= ARMV6_PERFCTR_LSU_FULL_STALL,
77 };
78 
79 static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
80 					  [PERF_COUNT_HW_CACHE_OP_MAX]
81 					  [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
82 	[C(L1D)] = {
83 		/*
84 		 * The performance counters don't differentiate between read
85 		 * and write accesses/misses so this isn't strictly correct,
86 		 * but it's the best we can do. Writes and reads get
87 		 * combined.
88 		 */
89 		[C(OP_READ)] = {
90 			[C(RESULT_ACCESS)]	= ARMV6_PERFCTR_DCACHE_ACCESS,
91 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DCACHE_MISS,
92 		},
93 		[C(OP_WRITE)] = {
94 			[C(RESULT_ACCESS)]	= ARMV6_PERFCTR_DCACHE_ACCESS,
95 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DCACHE_MISS,
96 		},
97 		[C(OP_PREFETCH)] = {
98 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
99 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
100 		},
101 	},
102 	[C(L1I)] = {
103 		[C(OP_READ)] = {
104 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
105 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ICACHE_MISS,
106 		},
107 		[C(OP_WRITE)] = {
108 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
109 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ICACHE_MISS,
110 		},
111 		[C(OP_PREFETCH)] = {
112 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
113 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
114 		},
115 	},
116 	[C(LL)] = {
117 		[C(OP_READ)] = {
118 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
119 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
120 		},
121 		[C(OP_WRITE)] = {
122 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
123 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
124 		},
125 		[C(OP_PREFETCH)] = {
126 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
127 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
128 		},
129 	},
130 	[C(DTLB)] = {
131 		/*
132 		 * The ARM performance counters can count micro DTLB misses,
133 		 * micro ITLB misses and main TLB misses. There isn't an event
134 		 * for TLB misses, so use the micro misses here and if users
135 		 * want the main TLB misses they can use a raw counter.
136 		 */
137 		[C(OP_READ)] = {
138 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
139 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DTLB_MISS,
140 		},
141 		[C(OP_WRITE)] = {
142 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
143 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DTLB_MISS,
144 		},
145 		[C(OP_PREFETCH)] = {
146 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
147 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
148 		},
149 	},
150 	[C(ITLB)] = {
151 		[C(OP_READ)] = {
152 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
153 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ITLB_MISS,
154 		},
155 		[C(OP_WRITE)] = {
156 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
157 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ITLB_MISS,
158 		},
159 		[C(OP_PREFETCH)] = {
160 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
161 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
162 		},
163 	},
164 	[C(BPU)] = {
165 		[C(OP_READ)] = {
166 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
167 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
168 		},
169 		[C(OP_WRITE)] = {
170 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
171 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
172 		},
173 		[C(OP_PREFETCH)] = {
174 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
175 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
176 		},
177 	},
178 	[C(NODE)] = {
179 		[C(OP_READ)] = {
180 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
181 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
182 		},
183 		[C(OP_WRITE)] = {
184 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
185 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
186 		},
187 		[C(OP_PREFETCH)] = {
188 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
189 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
190 		},
191 	},
192 };
193 
194 enum armv6mpcore_perf_types {
195 	ARMV6MPCORE_PERFCTR_ICACHE_MISS	    = 0x0,
196 	ARMV6MPCORE_PERFCTR_IBUF_STALL	    = 0x1,
197 	ARMV6MPCORE_PERFCTR_DDEP_STALL	    = 0x2,
198 	ARMV6MPCORE_PERFCTR_ITLB_MISS	    = 0x3,
199 	ARMV6MPCORE_PERFCTR_DTLB_MISS	    = 0x4,
200 	ARMV6MPCORE_PERFCTR_BR_EXEC	    = 0x5,
201 	ARMV6MPCORE_PERFCTR_BR_NOTPREDICT   = 0x6,
202 	ARMV6MPCORE_PERFCTR_BR_MISPREDICT   = 0x7,
203 	ARMV6MPCORE_PERFCTR_INSTR_EXEC	    = 0x8,
204 	ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
205 	ARMV6MPCORE_PERFCTR_DCACHE_RDMISS   = 0xB,
206 	ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
207 	ARMV6MPCORE_PERFCTR_DCACHE_WRMISS   = 0xD,
208 	ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
209 	ARMV6MPCORE_PERFCTR_SW_PC_CHANGE    = 0xF,
210 	ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS   = 0x10,
211 	ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
212 	ARMV6MPCORE_PERFCTR_LSU_FULL_STALL  = 0x12,
213 	ARMV6MPCORE_PERFCTR_WBUF_DRAINED    = 0x13,
214 	ARMV6MPCORE_PERFCTR_CPU_CYCLES	    = 0xFF,
215 };
216 
217 /*
218  * The hardware events that we support. We do support cache operations but
219  * we have harvard caches and no way to combine instruction and data
220  * accesses/misses in hardware.
221  */
222 static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
223 	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV6MPCORE_PERFCTR_CPU_CYCLES,
224 	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV6MPCORE_PERFCTR_INSTR_EXEC,
225 	[PERF_COUNT_HW_CACHE_REFERENCES]	= HW_OP_UNSUPPORTED,
226 	[PERF_COUNT_HW_CACHE_MISSES]		= HW_OP_UNSUPPORTED,
227 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV6MPCORE_PERFCTR_BR_EXEC,
228 	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
229 	[PERF_COUNT_HW_BUS_CYCLES]		= HW_OP_UNSUPPORTED,
230 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= ARMV6MPCORE_PERFCTR_IBUF_STALL,
231 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= ARMV6MPCORE_PERFCTR_LSU_FULL_STALL,
232 };
233 
234 static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
235 					[PERF_COUNT_HW_CACHE_OP_MAX]
236 					[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
237 	[C(L1D)] = {
238 		[C(OP_READ)] = {
239 			[C(RESULT_ACCESS)]  =
240 				ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
241 			[C(RESULT_MISS)]    =
242 				ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
243 		},
244 		[C(OP_WRITE)] = {
245 			[C(RESULT_ACCESS)]  =
246 				ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
247 			[C(RESULT_MISS)]    =
248 				ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
249 		},
250 		[C(OP_PREFETCH)] = {
251 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
252 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
253 		},
254 	},
255 	[C(L1I)] = {
256 		[C(OP_READ)] = {
257 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
258 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
259 		},
260 		[C(OP_WRITE)] = {
261 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
262 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
263 		},
264 		[C(OP_PREFETCH)] = {
265 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
266 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
267 		},
268 	},
269 	[C(LL)] = {
270 		[C(OP_READ)] = {
271 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
272 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
273 		},
274 		[C(OP_WRITE)] = {
275 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
276 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
277 		},
278 		[C(OP_PREFETCH)] = {
279 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
280 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
281 		},
282 	},
283 	[C(DTLB)] = {
284 		/*
285 		 * The ARM performance counters can count micro DTLB misses,
286 		 * micro ITLB misses and main TLB misses. There isn't an event
287 		 * for TLB misses, so use the micro misses here and if users
288 		 * want the main TLB misses they can use a raw counter.
289 		 */
290 		[C(OP_READ)] = {
291 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
292 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
293 		},
294 		[C(OP_WRITE)] = {
295 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
296 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
297 		},
298 		[C(OP_PREFETCH)] = {
299 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
300 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
301 		},
302 	},
303 	[C(ITLB)] = {
304 		[C(OP_READ)] = {
305 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
306 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
307 		},
308 		[C(OP_WRITE)] = {
309 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
310 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
311 		},
312 		[C(OP_PREFETCH)] = {
313 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
314 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
315 		},
316 	},
317 	[C(BPU)] = {
318 		[C(OP_READ)] = {
319 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
320 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
321 		},
322 		[C(OP_WRITE)] = {
323 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
324 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
325 		},
326 		[C(OP_PREFETCH)] = {
327 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
328 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
329 		},
330 	},
331 	[C(NODE)] = {
332 		[C(OP_READ)] = {
333 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
334 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
335 		},
336 		[C(OP_WRITE)] = {
337 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
338 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
339 		},
340 		[C(OP_PREFETCH)] = {
341 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
342 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
343 		},
344 	},
345 };
346 
347 static inline unsigned long
armv6_pmcr_read(void)348 armv6_pmcr_read(void)
349 {
350 	u32 val;
351 	asm volatile("mrc   p15, 0, %0, c15, c12, 0" : "=r"(val));
352 	return val;
353 }
354 
355 static inline void
armv6_pmcr_write(unsigned long val)356 armv6_pmcr_write(unsigned long val)
357 {
358 	asm volatile("mcr   p15, 0, %0, c15, c12, 0" : : "r"(val));
359 }
360 
361 #define ARMV6_PMCR_ENABLE		(1 << 0)
362 #define ARMV6_PMCR_CTR01_RESET		(1 << 1)
363 #define ARMV6_PMCR_CCOUNT_RESET		(1 << 2)
364 #define ARMV6_PMCR_CCOUNT_DIV		(1 << 3)
365 #define ARMV6_PMCR_COUNT0_IEN		(1 << 4)
366 #define ARMV6_PMCR_COUNT1_IEN		(1 << 5)
367 #define ARMV6_PMCR_CCOUNT_IEN		(1 << 6)
368 #define ARMV6_PMCR_COUNT0_OVERFLOW	(1 << 8)
369 #define ARMV6_PMCR_COUNT1_OVERFLOW	(1 << 9)
370 #define ARMV6_PMCR_CCOUNT_OVERFLOW	(1 << 10)
371 #define ARMV6_PMCR_EVT_COUNT0_SHIFT	20
372 #define ARMV6_PMCR_EVT_COUNT0_MASK	(0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
373 #define ARMV6_PMCR_EVT_COUNT1_SHIFT	12
374 #define ARMV6_PMCR_EVT_COUNT1_MASK	(0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
375 
376 #define ARMV6_PMCR_OVERFLOWED_MASK \
377 	(ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
378 	 ARMV6_PMCR_CCOUNT_OVERFLOW)
379 
380 static inline int
armv6_pmcr_has_overflowed(unsigned long pmcr)381 armv6_pmcr_has_overflowed(unsigned long pmcr)
382 {
383 	return pmcr & ARMV6_PMCR_OVERFLOWED_MASK;
384 }
385 
386 static inline int
armv6_pmcr_counter_has_overflowed(unsigned long pmcr,enum armv6_counters counter)387 armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
388 				  enum armv6_counters counter)
389 {
390 	int ret = 0;
391 
392 	if (ARMV6_CYCLE_COUNTER == counter)
393 		ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
394 	else if (ARMV6_COUNTER0 == counter)
395 		ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
396 	else if (ARMV6_COUNTER1 == counter)
397 		ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
398 	else
399 		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
400 
401 	return ret;
402 }
403 
404 static inline u32
armv6pmu_read_counter(int counter)405 armv6pmu_read_counter(int counter)
406 {
407 	unsigned long value = 0;
408 
409 	if (ARMV6_CYCLE_COUNTER == counter)
410 		asm volatile("mrc   p15, 0, %0, c15, c12, 1" : "=r"(value));
411 	else if (ARMV6_COUNTER0 == counter)
412 		asm volatile("mrc   p15, 0, %0, c15, c12, 2" : "=r"(value));
413 	else if (ARMV6_COUNTER1 == counter)
414 		asm volatile("mrc   p15, 0, %0, c15, c12, 3" : "=r"(value));
415 	else
416 		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
417 
418 	return value;
419 }
420 
421 static inline void
armv6pmu_write_counter(int counter,u32 value)422 armv6pmu_write_counter(int counter,
423 		       u32 value)
424 {
425 	if (ARMV6_CYCLE_COUNTER == counter)
426 		asm volatile("mcr   p15, 0, %0, c15, c12, 1" : : "r"(value));
427 	else if (ARMV6_COUNTER0 == counter)
428 		asm volatile("mcr   p15, 0, %0, c15, c12, 2" : : "r"(value));
429 	else if (ARMV6_COUNTER1 == counter)
430 		asm volatile("mcr   p15, 0, %0, c15, c12, 3" : : "r"(value));
431 	else
432 		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
433 }
434 
435 static void
armv6pmu_enable_event(struct hw_perf_event * hwc,int idx)436 armv6pmu_enable_event(struct hw_perf_event *hwc,
437 		      int idx)
438 {
439 	unsigned long val, mask, evt, flags;
440 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
441 
442 	if (ARMV6_CYCLE_COUNTER == idx) {
443 		mask	= 0;
444 		evt	= ARMV6_PMCR_CCOUNT_IEN;
445 	} else if (ARMV6_COUNTER0 == idx) {
446 		mask	= ARMV6_PMCR_EVT_COUNT0_MASK;
447 		evt	= (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
448 			  ARMV6_PMCR_COUNT0_IEN;
449 	} else if (ARMV6_COUNTER1 == idx) {
450 		mask	= ARMV6_PMCR_EVT_COUNT1_MASK;
451 		evt	= (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
452 			  ARMV6_PMCR_COUNT1_IEN;
453 	} else {
454 		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
455 		return;
456 	}
457 
458 	/*
459 	 * Mask out the current event and set the counter to count the event
460 	 * that we're interested in.
461 	 */
462 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
463 	val = armv6_pmcr_read();
464 	val &= ~mask;
465 	val |= evt;
466 	armv6_pmcr_write(val);
467 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
468 }
469 
470 static irqreturn_t
armv6pmu_handle_irq(int irq_num,void * dev)471 armv6pmu_handle_irq(int irq_num,
472 		    void *dev)
473 {
474 	unsigned long pmcr = armv6_pmcr_read();
475 	struct perf_sample_data data;
476 	struct pmu_hw_events *cpuc;
477 	struct pt_regs *regs;
478 	int idx;
479 
480 	if (!armv6_pmcr_has_overflowed(pmcr))
481 		return IRQ_NONE;
482 
483 	regs = get_irq_regs();
484 
485 	/*
486 	 * The interrupts are cleared by writing the overflow flags back to
487 	 * the control register. All of the other bits don't have any effect
488 	 * if they are rewritten, so write the whole value back.
489 	 */
490 	armv6_pmcr_write(pmcr);
491 
492 	perf_sample_data_init(&data, 0);
493 
494 	cpuc = &__get_cpu_var(cpu_hw_events);
495 	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
496 		struct perf_event *event = cpuc->events[idx];
497 		struct hw_perf_event *hwc;
498 
499 		/* Ignore if we don't have an event. */
500 		if (!event)
501 			continue;
502 
503 		/*
504 		 * We have a single interrupt for all counters. Check that
505 		 * each counter has overflowed before we process it.
506 		 */
507 		if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
508 			continue;
509 
510 		hwc = &event->hw;
511 		armpmu_event_update(event, hwc, idx);
512 		data.period = event->hw.last_period;
513 		if (!armpmu_event_set_period(event, hwc, idx))
514 			continue;
515 
516 		if (perf_event_overflow(event, &data, regs))
517 			cpu_pmu->disable(hwc, idx);
518 	}
519 
520 	/*
521 	 * Handle the pending perf events.
522 	 *
523 	 * Note: this call *must* be run with interrupts disabled. For
524 	 * platforms that can have the PMU interrupts raised as an NMI, this
525 	 * will not work.
526 	 */
527 	irq_work_run();
528 
529 	return IRQ_HANDLED;
530 }
531 
532 static void
armv6pmu_start(void)533 armv6pmu_start(void)
534 {
535 	unsigned long flags, val;
536 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
537 
538 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
539 	val = armv6_pmcr_read();
540 	val |= ARMV6_PMCR_ENABLE;
541 	armv6_pmcr_write(val);
542 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
543 }
544 
545 static void
armv6pmu_stop(void)546 armv6pmu_stop(void)
547 {
548 	unsigned long flags, val;
549 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
550 
551 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
552 	val = armv6_pmcr_read();
553 	val &= ~ARMV6_PMCR_ENABLE;
554 	armv6_pmcr_write(val);
555 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
556 }
557 
558 static int
armv6pmu_get_event_idx(struct pmu_hw_events * cpuc,struct hw_perf_event * event)559 armv6pmu_get_event_idx(struct pmu_hw_events *cpuc,
560 		       struct hw_perf_event *event)
561 {
562 	/* Always place a cycle counter into the cycle counter. */
563 	if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
564 		if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
565 			return -EAGAIN;
566 
567 		return ARMV6_CYCLE_COUNTER;
568 	} else {
569 		/*
570 		 * For anything other than a cycle counter, try and use
571 		 * counter0 and counter1.
572 		 */
573 		if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask))
574 			return ARMV6_COUNTER1;
575 
576 		if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask))
577 			return ARMV6_COUNTER0;
578 
579 		/* The counters are all in use. */
580 		return -EAGAIN;
581 	}
582 }
583 
584 static void
armv6pmu_disable_event(struct hw_perf_event * hwc,int idx)585 armv6pmu_disable_event(struct hw_perf_event *hwc,
586 		       int idx)
587 {
588 	unsigned long val, mask, evt, flags;
589 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
590 
591 	if (ARMV6_CYCLE_COUNTER == idx) {
592 		mask	= ARMV6_PMCR_CCOUNT_IEN;
593 		evt	= 0;
594 	} else if (ARMV6_COUNTER0 == idx) {
595 		mask	= ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
596 		evt	= ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
597 	} else if (ARMV6_COUNTER1 == idx) {
598 		mask	= ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
599 		evt	= ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
600 	} else {
601 		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
602 		return;
603 	}
604 
605 	/*
606 	 * Mask out the current event and set the counter to count the number
607 	 * of ETM bus signal assertion cycles. The external reporting should
608 	 * be disabled and so this should never increment.
609 	 */
610 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
611 	val = armv6_pmcr_read();
612 	val &= ~mask;
613 	val |= evt;
614 	armv6_pmcr_write(val);
615 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
616 }
617 
618 static void
armv6mpcore_pmu_disable_event(struct hw_perf_event * hwc,int idx)619 armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
620 			      int idx)
621 {
622 	unsigned long val, mask, flags, evt = 0;
623 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
624 
625 	if (ARMV6_CYCLE_COUNTER == idx) {
626 		mask	= ARMV6_PMCR_CCOUNT_IEN;
627 	} else if (ARMV6_COUNTER0 == idx) {
628 		mask	= ARMV6_PMCR_COUNT0_IEN;
629 	} else if (ARMV6_COUNTER1 == idx) {
630 		mask	= ARMV6_PMCR_COUNT1_IEN;
631 	} else {
632 		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
633 		return;
634 	}
635 
636 	/*
637 	 * Unlike UP ARMv6, we don't have a way of stopping the counters. We
638 	 * simply disable the interrupt reporting.
639 	 */
640 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
641 	val = armv6_pmcr_read();
642 	val &= ~mask;
643 	val |= evt;
644 	armv6_pmcr_write(val);
645 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
646 }
647 
armv6_map_event(struct perf_event * event)648 static int armv6_map_event(struct perf_event *event)
649 {
650 	return map_cpu_event(event, &armv6_perf_map,
651 				&armv6_perf_cache_map, 0xFF);
652 }
653 
654 static struct arm_pmu armv6pmu = {
655 	.id			= ARM_PERF_PMU_ID_V6,
656 	.name			= "v6",
657 	.handle_irq		= armv6pmu_handle_irq,
658 	.enable			= armv6pmu_enable_event,
659 	.disable		= armv6pmu_disable_event,
660 	.read_counter		= armv6pmu_read_counter,
661 	.write_counter		= armv6pmu_write_counter,
662 	.get_event_idx		= armv6pmu_get_event_idx,
663 	.start			= armv6pmu_start,
664 	.stop			= armv6pmu_stop,
665 	.map_event		= armv6_map_event,
666 	.num_events		= 3,
667 	.max_period		= (1LLU << 32) - 1,
668 };
669 
armv6pmu_init(void)670 static struct arm_pmu *__init armv6pmu_init(void)
671 {
672 	return &armv6pmu;
673 }
674 
675 /*
676  * ARMv6mpcore is almost identical to single core ARMv6 with the exception
677  * that some of the events have different enumerations and that there is no
678  * *hack* to stop the programmable counters. To stop the counters we simply
679  * disable the interrupt reporting and update the event. When unthrottling we
680  * reset the period and enable the interrupt reporting.
681  */
682 
armv6mpcore_map_event(struct perf_event * event)683 static int armv6mpcore_map_event(struct perf_event *event)
684 {
685 	return map_cpu_event(event, &armv6mpcore_perf_map,
686 				&armv6mpcore_perf_cache_map, 0xFF);
687 }
688 
689 static struct arm_pmu armv6mpcore_pmu = {
690 	.id			= ARM_PERF_PMU_ID_V6MP,
691 	.name			= "v6mpcore",
692 	.handle_irq		= armv6pmu_handle_irq,
693 	.enable			= armv6pmu_enable_event,
694 	.disable		= armv6mpcore_pmu_disable_event,
695 	.read_counter		= armv6pmu_read_counter,
696 	.write_counter		= armv6pmu_write_counter,
697 	.get_event_idx		= armv6pmu_get_event_idx,
698 	.start			= armv6pmu_start,
699 	.stop			= armv6pmu_stop,
700 	.map_event		= armv6mpcore_map_event,
701 	.num_events		= 3,
702 	.max_period		= (1LLU << 32) - 1,
703 };
704 
armv6mpcore_pmu_init(void)705 static struct arm_pmu *__init armv6mpcore_pmu_init(void)
706 {
707 	return &armv6mpcore_pmu;
708 }
709 #else
armv6pmu_init(void)710 static struct arm_pmu *__init armv6pmu_init(void)
711 {
712 	return NULL;
713 }
714 
armv6mpcore_pmu_init(void)715 static struct arm_pmu *__init armv6mpcore_pmu_init(void)
716 {
717 	return NULL;
718 }
719 #endif	/* CONFIG_CPU_V6 || CONFIG_CPU_V6K */
720