1 /*
2 * This file implements the perfmon subsystem which is used
3 * to program the IA-64 Performance Monitoring Unit (PMU).
4 *
5 * Originaly Written by Ganesh Venkitachalam, IBM Corp.
6 * Copyright (C) 1999 Ganesh Venkitachalam <venkitac@us.ibm.com>
7 *
8 * Modifications by Stephane Eranian, Hewlett-Packard Co.
9 * Modifications by David Mosberger-Tang, Hewlett-Packard Co.
10 *
11 * Copyright (C) 1999-2003 Hewlett Packard Co
12 * Stephane Eranian <eranian@hpl.hp.com>
13 * David Mosberger-Tang <davidm@hpl.hp.com>
14 */
15
16 #include <linux/config.h>
17 #include <linux/kernel.h>
18 #include <linux/sched.h>
19 #include <linux/interrupt.h>
20 #include <linux/smp_lock.h>
21 #include <linux/proc_fs.h>
22 #include <linux/init.h>
23 #include <linux/vmalloc.h>
24 #include <linux/wrapper.h>
25 #include <linux/mm.h>
26 #include <linux/sysctl.h>
27 #include <linux/smp.h>
28 #include <linux/seq_file.h>
29
30 #include <asm/bitops.h>
31 #include <asm/errno.h>
32 #include <asm/page.h>
33 #include <asm/perfmon.h>
34 #include <asm/processor.h>
35 #include <asm/signal.h>
36 #include <asm/system.h>
37 #include <asm/uaccess.h>
38 #include <asm/delay.h> /* for ia64_get_itc() */
39
40 #ifdef CONFIG_PERFMON
41
42 /*
43 * For PMUs which rely on the debug registers for some features, you must
44 * you must enable the following flag to activate the support for
45 * accessing the registers via the perfmonctl() interface.
46 */
47 #if defined(CONFIG_ITANIUM) || defined(CONFIG_MCKINLEY)
48 #define PFM_PMU_USES_DBR 1
49 #endif
50
51 /*
52 * perfmon context states
53 */
54 #define PFM_CTX_DISABLED 0
55 #define PFM_CTX_ENABLED 1
56
57 /*
58 * Reset register flags
59 */
60 #define PFM_PMD_LONG_RESET 1
61 #define PFM_PMD_SHORT_RESET 2
62
63 /*
64 * Misc macros and definitions
65 */
66 #define PMU_FIRST_COUNTER 4
67 #define PMU_MAX_PMCS 256
68 #define PMU_MAX_PMDS 256
69
70 /*
71 * type of a PMU register (bitmask).
72 * bitmask structure:
73 * bit0 : register implemented
74 * bit1 : end marker
75 * bit2-3 : reserved
76 * bit4-7 : register type
77 * bit8-31: reserved
78 */
79 #define PFM_REG_IMPL 0x1 /* register implemented */
80 #define PFM_REG_END 0x2 /* end marker */
81 #define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */
82 #define PFM_REG_COUNTING (0x2<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm AND pmc.oi, a PMD used as a counter */
83 #define PFM_REG_CONTROL (0x3<<4|PFM_REG_IMPL) /* PMU control register */
84 #define PFM_REG_CONFIG (0x4<<4|PFM_REG_IMPL) /* refine configuration */
85 #define PFM_REG_BUFFER (0x5<<4|PFM_REG_IMPL) /* PMD used as buffer */
86
87 #define PMC_IS_LAST(i) (pmu_conf.pmc_desc[i].type & PFM_REG_END)
88 #define PMD_IS_LAST(i) (pmu_conf.pmd_desc[i].type & PFM_REG_END)
89
90 #define PFM_IS_DISABLED() pmu_conf.disabled
91
92 #define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_soft_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY)
93 #define PFM_FL_INHERIT_MASK (PFM_FL_INHERIT_NONE|PFM_FL_INHERIT_ONCE|PFM_FL_INHERIT_ALL)
94
95 /* i assume unsigned */
96 #define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf.pmc_desc[i].type & PFM_REG_IMPL))
97 #define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf.pmd_desc[i].type & PFM_REG_IMPL))
98
99 /* XXX: these three assume that register i is implemented */
100 #define PMD_IS_COUNTING(i) (pmu_conf.pmd_desc[i].type == PFM_REG_COUNTING)
101 #define PMC_IS_COUNTING(i) (pmu_conf.pmc_desc[i].type == PFM_REG_COUNTING)
102 #define PMC_IS_MONITOR(i) (pmu_conf.pmc_desc[i].type == PFM_REG_MONITOR)
103 #define PMC_DFL_VAL(i) pmu_conf.pmc_desc[i].default_value
104 #define PMC_RSVD_MASK(i) pmu_conf.pmc_desc[i].reserved_mask
105 #define PMD_PMD_DEP(i) pmu_conf.pmd_desc[i].dep_pmd[0]
106 #define PMC_PMD_DEP(i) pmu_conf.pmc_desc[i].dep_pmd[0]
107
108 /* k assume unsigned */
109 #define IBR_IS_IMPL(k) (k<pmu_conf.num_ibrs)
110 #define DBR_IS_IMPL(k) (k<pmu_conf.num_dbrs)
111
112 #define CTX_IS_ENABLED(c) ((c)->ctx_flags.state == PFM_CTX_ENABLED)
113 #define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0)
114 #define CTX_INHERIT_MODE(c) ((c)->ctx_fl_inherit)
115 #define CTX_HAS_SMPL(c) ((c)->ctx_psb != NULL)
116 /* XXX: does not support more than 64 PMDs */
117 #define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask)
118 #define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL)
119
120
121 #define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64)
122 #define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64)
123 #define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1)
124
125 #ifdef CONFIG_SMP
126 #define GET_ACTIVATION() pmu_owners[smp_processor_id()].activation_number
127 #define INC_ACTIVATION() pmu_owners[smp_processor_id()].activation_number++
128 #define SET_ACTIVATION(c) (c)->ctx_last_activation = GET_ACTIVATION()
129 #define SET_LAST_CPU(ctx, v) (ctx)->ctx_last_cpu = (v)
130 #define GET_LAST_CPU(ctx) (ctx)->ctx_last_cpu
131 #else /* !CONFIG_SMP */
132 #define SET_ACTIVATION(t) do {} while(0)
133 #define GET_ACTIVATION(t) do {} while(0)
134 #define INC_ACTIVATION(t) do {} while(0)
135 #define SET_LAST_CPU(ctx, v) do {} while(0)
136 #define GET_LAST_CPU(ctx) do {} while(0)
137 #endif /* CONFIG_SMP */
138
139
140 #define PFM_INVALID_ACTIVATION (~0UL)
141
142 #define SET_PMU_OWNER(t) do { pmu_owners[smp_processor_id()].owner = (t); } while(0)
143 #define PMU_OWNER() pmu_owners[smp_processor_id()].owner
144
145 #define LOCK_PFS() spin_lock(&pfm_sessions.pfs_lock)
146 #define UNLOCK_PFS() spin_unlock(&pfm_sessions.pfs_lock)
147
148 #define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0)
149
150 #define TASK_PTREGS(t) (((struct pt_regs *)((unsigned long) (t) + IA64_STK_OFFSET))-1)
151
152 /*
153 * cmp0 must be the value of pmc0
154 */
155 #define PMC0_HAS_OVFL(cmp0) (cmp0 & ~0x1UL)
156
157
158 /*
159 * debugging
160 */
161 #define DBprintk(a) \
162 do { \
163 if (pfm_sysctl.debug >0) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
164 } while (0)
165
166 #define DBprintk_ovfl(a) \
167 do { \
168 if (pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
169 } while (0)
170
171
172
173 /*
174 * Architected PMC structure
175 */
176 typedef struct {
177 unsigned long pmc_plm:4; /* privilege level mask */
178 unsigned long pmc_ev:1; /* external visibility */
179 unsigned long pmc_oi:1; /* overflow interrupt */
180 unsigned long pmc_pm:1; /* privileged monitor */
181 unsigned long pmc_ig1:1; /* reserved */
182 unsigned long pmc_es:8; /* event select */
183 unsigned long pmc_ig2:48; /* reserved */
184 } pfm_monitor_t;
185
186 /*
187 * There is one such data structure per perfmon context. It is used to describe the
188 * sampling buffer. It is to be shared among siblings whereas the pfm_context
189 * is not.
190 * Therefore we maintain a refcnt which is incremented on fork().
191 * This buffer is private to the kernel only the actual sampling buffer
192 * including its header are exposed to the user. This construct allows us to
193 * export the buffer read-write, if needed, without worrying about security
194 * problems.
195 */
196 typedef struct _pfm_smpl_buffer_desc {
197 spinlock_t psb_lock; /* protection lock */
198 unsigned long psb_refcnt; /* how many users for the buffer */
199 int psb_flags; /* bitvector of flags (not yet used) */
200
201 void *psb_addr; /* points to location of first entry */
202 unsigned long psb_entries; /* maximum number of entries */
203 unsigned long psb_size; /* aligned size of buffer */
204 unsigned long psb_index; /* next free entry slot XXX: must use the one in buffer */
205 unsigned long psb_entry_size; /* size of each entry including entry header */
206
207 perfmon_smpl_hdr_t *psb_hdr; /* points to sampling buffer header */
208
209 struct _pfm_smpl_buffer_desc *psb_next; /* next psb, used for rvfreeing of psb_hdr */
210
211 } pfm_smpl_buffer_desc_t;
212
213 /*
214 * psb_flags
215 */
216 #define PSB_HAS_VMA 0x1 /* a virtual mapping for the buffer exists */
217
218 #define LOCK_PSB(p) spin_lock(&(p)->psb_lock)
219 #define UNLOCK_PSB(p) spin_unlock(&(p)->psb_lock)
220
221 /*
222 * 64-bit software counter structure
223 */
224 typedef struct {
225 u64 val; /* virtual 64bit counter value */
226 u64 lval; /* last value */
227 u64 long_reset; /* reset value on sampling overflow */
228 u64 short_reset;/* reset value on overflow */
229 u64 reset_pmds[4]; /* which other pmds to reset when this counter overflows */
230 u64 seed; /* seed for random-number generator */
231 u64 mask; /* mask for random-number generator */
232 unsigned int flags; /* notify/do not notify */
233 } pfm_counter_t;
234
235 /*
236 * perfmon context. One per process, is cloned on fork() depending on
237 * inheritance flags
238 */
239 typedef struct {
240 unsigned int state:1; /* 0=disabled, 1=enabled */
241 unsigned int inherit:2; /* inherit mode */
242 unsigned int block:1; /* when 1, task will blocked on user notifications */
243 unsigned int system:1; /* do system wide monitoring */
244 unsigned int frozen:1; /* pmu must be kept frozen on ctxsw in */
245 unsigned int protected:1; /* allow access to creator of context only */
246 unsigned int using_dbreg:1; /* using range restrictions (debug registers) */
247 unsigned int excl_idle:1; /* exclude idle task in system wide session */
248 unsigned int unsecure:1; /* sp = 0 for non self-monitored task */
249 unsigned int reserved:22;
250 } pfm_context_flags_t;
251
252 /*
253 * perfmon context: encapsulates all the state of a monitoring session
254 * XXX: probably need to change layout
255 */
256 typedef struct pfm_context {
257 pfm_smpl_buffer_desc_t *ctx_psb; /* sampling buffer, if any */
258 unsigned long ctx_smpl_vaddr; /* user level virtual address of smpl buffer */
259
260 spinlock_t ctx_lock;
261 pfm_context_flags_t ctx_flags; /* block/noblock */
262
263 struct task_struct *ctx_notify_task; /* who to notify on overflow */
264 struct task_struct *ctx_owner; /* pid of creator (debug) */
265
266 unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */
267 unsigned long ctx_smpl_regs[4]; /* which registers to record on overflow */
268
269 struct semaphore ctx_restart_sem; /* use for blocking notification mode */
270
271 unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */
272 unsigned long ctx_reload_pmds[4]; /* bitmask of PMD to reload on ctxsw */
273
274 unsigned long ctx_used_pmcs[4]; /* bitmask PMC used by context */
275 unsigned long ctx_reload_pmcs[4]; /* bitmask of PMC to reload on ctxsw */
276
277 unsigned long ctx_used_ibrs[4]; /* bitmask of used IBR (speedup ctxsw) */
278 unsigned long ctx_used_dbrs[4]; /* bitmask of used DBR (speedup ctxsw) */
279
280 pfm_counter_t ctx_soft_pmds[IA64_NUM_PMD_REGS]; /* XXX: size should be dynamic */
281
282 u64 ctx_saved_psr; /* copy of psr used for lazy ctxsw */
283 unsigned long ctx_saved_cpus_allowed; /* copy of the task cpus_allowed (system wide) */
284 unsigned long ctx_last_activation; /* context last activation number for last_cpu */
285 unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */
286 unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */
287
288 struct tasklet_struct ctx_tasklet; /* used for sending signal-based notifications */
289 } pfm_context_t;
290
291 #define PFM_GET_CTX(t) ((pfm_context_t *)(t)->thread.pfm_context)
292 #define LOCK_CTX(ctx) spin_lock(&(ctx)->ctx_lock)
293 #define UNLOCK_CTX(ctx) spin_unlock(&(ctx)->ctx_lock)
294
295 #define ctx_fl_inherit ctx_flags.inherit
296 #define ctx_fl_block ctx_flags.block
297 #define ctx_fl_system ctx_flags.system
298 #define ctx_fl_frozen ctx_flags.frozen
299 #define ctx_fl_protected ctx_flags.protected
300 #define ctx_fl_using_dbreg ctx_flags.using_dbreg
301 #define ctx_fl_excl_idle ctx_flags.excl_idle
302 #define ctx_fl_unsecure ctx_flags.unsecure
303
304 /*
305 * global information about all sessions
306 * mostly used to synchronize between system wide and per-process
307 */
308 typedef struct {
309 spinlock_t pfs_lock; /* lock the structure */
310
311 unsigned int pfs_task_sessions; /* number of per task sessions */
312 unsigned int pfs_sys_sessions; /* number of per system wide sessions */
313 unsigned int pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */
314 unsigned int pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */
315 struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */
316 } pfm_session_t;
317
318 /*
319 * information about a PMC or PMD.
320 * dep_pmd[]: a bitmask of dependent PMD registers
321 * dep_pmc[]: a bitmask of dependent PMC registers
322 */
323 typedef struct {
324 unsigned int type;
325 int pm_pos;
326 unsigned long default_value; /* power-on default value */
327 unsigned long reserved_mask; /* bitmask of reserved bits */
328 int (*read_check)(struct task_struct *task, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
329 int (*write_check)(struct task_struct *task, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
330 unsigned long dep_pmd[4];
331 unsigned long dep_pmc[4];
332 } pfm_reg_desc_t;
333
334 /* assume cnum is a valid monitor */
335 #define PMC_PM(cnum, val) (((val) >> (pmu_conf.pmc_desc[cnum].pm_pos)) & 0x1)
336 #define PMC_WR_FUNC(cnum) (pmu_conf.pmc_desc[cnum].write_check)
337 #define PMD_WR_FUNC(cnum) (pmu_conf.pmd_desc[cnum].write_check)
338 #define PMD_RD_FUNC(cnum) (pmu_conf.pmd_desc[cnum].read_check)
339
340 /*
341 * This structure is initialized at boot time and contains
342 * a description of the PMU main characteristics.
343 */
344 typedef struct {
345 unsigned int disabled; /* indicates if perfmon is working properly */
346 unsigned long ovfl_val; /* overflow value for generic counters */
347 unsigned long impl_pmcs[4]; /* bitmask of implemented PMCS */
348 unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */
349 unsigned int num_pmcs; /* number of implemented PMCS */
350 unsigned int num_pmds; /* number of implemented PMDS */
351 unsigned int num_ibrs; /* number of implemented IBRS */
352 unsigned int num_dbrs; /* number of implemented DBRS */
353 unsigned int num_counters; /* number of PMD/PMC counters */
354 pfm_reg_desc_t *pmc_desc; /* detailed PMC register dependencies descriptions */
355 pfm_reg_desc_t *pmd_desc; /* detailed PMD register dependencies descriptions */
356 } pmu_config_t;
357
358 /*
359 * perfmon command descriptions
360 */
361 typedef struct {
362 int (*cmd_func)(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
363 int cmd_flags;
364 unsigned int cmd_narg;
365 size_t cmd_argsize;
366 } pfm_cmd_desc_t;
367
368 #define PFM_CMD_PID 0x1 /* command requires pid argument */
369 #define PFM_CMD_ARG_READ 0x2 /* command must read argument(s) */
370 #define PFM_CMD_ARG_RW 0x4 /* command must read/write argument(s) */
371 #define PFM_CMD_CTX 0x8 /* command needs a perfmon context */
372 #define PFM_CMD_NOCHK 0x10 /* command does not need to check task's state */
373
374 #define PFM_CMD_IDX(cmd) (cmd)
375
376 #define PFM_CMD_IS_VALID(cmd) ((PFM_CMD_IDX(cmd) >= 0) && (PFM_CMD_IDX(cmd) < PFM_CMD_COUNT) \
377 && pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_func != NULL)
378
379 #define PFM_CMD_USE_PID(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_PID) != 0)
380 #define PFM_CMD_READ_ARG(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_ARG_READ) != 0)
381 #define PFM_CMD_RW_ARG(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_ARG_RW) != 0)
382 #define PFM_CMD_USE_CTX(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_CTX) != 0)
383 #define PFM_CMD_CHK(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_NOCHK) == 0)
384
385 #define PFM_CMD_ARG_MANY -1 /* cannot be zero */
386 #define PFM_CMD_NARG(cmd) (pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_narg)
387 #define PFM_CMD_ARG_SIZE(cmd) (pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_argsize)
388
389 typedef struct {
390 int debug; /* turn on/off debugging via syslog */
391 int debug_ovfl; /* turn on/off debug printk in overflow handler */
392 int fastctxsw; /* turn on/off fast (unsecure) ctxsw */
393 } pfm_sysctl_t;
394
395 typedef struct {
396 unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */
397 unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */
398 unsigned long pfm_recorded_samples_count;
399 unsigned long pfm_full_smpl_buffer_count; /* how many times the sampling buffer was full */
400 char pad[SMP_CACHE_BYTES] ____cacheline_aligned;
401 } pfm_stats_t;
402
403 /*
404 * perfmon internal variables
405 */
406 static pfm_session_t pfm_sessions; /* global sessions information */
407 static struct proc_dir_entry *perfmon_dir; /* for debug only */
408 static pfm_stats_t pfm_stats[NR_CPUS];
409 static pfm_intr_handler_desc_t *pfm_alternate_intr_handler;
410
411 /* sysctl() controls */
412 static pfm_sysctl_t pfm_sysctl;
413
414 static ctl_table pfm_ctl_table[]={
415 {1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
416 {2, "debug_ovfl", &pfm_sysctl.debug_ovfl, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
417 {3, "fastctxsw", &pfm_sysctl.fastctxsw, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
418 { 0, },
419 };
420 static ctl_table pfm_sysctl_dir[] = {
421 {1, "perfmon", NULL, 0, 0755, pfm_ctl_table, },
422 {0,},
423 };
424 static ctl_table pfm_sysctl_root[] = {
425 {1, "kernel", NULL, 0, 0755, pfm_sysctl_dir, },
426 {0,},
427 };
428 static struct ctl_table_header *pfm_sysctl_header;
429
430 static void pfm_vm_close(struct vm_area_struct * area);
431
432 static struct vm_operations_struct pfm_vm_ops={
433 .close = pfm_vm_close
434 };
435
436 /*
437 * keep track of task owning the PMU per CPU.
438 */
439 static struct {
440 struct task_struct *owner;
441 unsigned long activation_number;
442 char pad[SMP_CACHE_BYTES] ____cacheline_aligned;
443 } pmu_owners[NR_CPUS];
444
445
446
447 /*
448 * forward declarations
449 */
450 static void pfm_reset_pmu(struct task_struct *);
451 #ifndef CONFIG_SMP
452 static unsigned long pfm_lazy_save_regs (struct task_struct *ta);
453 #endif
454
455 #if defined(CONFIG_ITANIUM)
456 #include "perfmon_itanium.h"
457 #elif defined(CONFIG_MCKINLEY)
458 #include "perfmon_mckinley.h"
459 #else
460 #include "perfmon_generic.h"
461 #endif
462
463 static inline void
pfm_clear_psr_pp(void)464 pfm_clear_psr_pp(void)
465 {
466 __asm__ __volatile__ ("rsm psr.pp;; srlz.i;;"::: "memory");
467 }
468
469 static inline void
pfm_set_psr_pp(void)470 pfm_set_psr_pp(void)
471 {
472 __asm__ __volatile__ ("ssm psr.pp;; srlz.i;;"::: "memory");
473 }
474
475 static inline void
pfm_clear_psr_up(void)476 pfm_clear_psr_up(void)
477 {
478 __asm__ __volatile__ ("rsm psr.up;; srlz.i;;"::: "memory");
479 }
480
481 static inline void
pfm_set_psr_up(void)482 pfm_set_psr_up(void)
483 {
484 __asm__ __volatile__ ("ssm psr.up;; srlz.i;;"::: "memory");
485 }
486
487 static inline unsigned long
pfm_get_psr(void)488 pfm_get_psr(void)
489 {
490 unsigned long tmp;
491 __asm__ __volatile__ ("mov %0=psr;;": "=r"(tmp) :: "memory");
492 return tmp;
493 }
494
495 static inline void
pfm_set_psr_l(unsigned long val)496 pfm_set_psr_l(unsigned long val)
497 {
498 __asm__ __volatile__ ("mov psr.l=%0;; srlz.i;;"::"r"(val): "memory");
499 }
500
501
502
503 static inline void
pfm_freeze_pmu(void)504 pfm_freeze_pmu(void)
505 {
506 ia64_set_pmc(0,1UL);
507 ia64_srlz_d();
508 }
509
510 static inline void
pfm_unfreeze_pmu(void)511 pfm_unfreeze_pmu(void)
512 {
513 ia64_set_pmc(0,0UL);
514 ia64_srlz_d();
515 }
516
517 static inline void
pfm_restore_ibrs(unsigned long * ibrs,unsigned int nibrs)518 pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs)
519 {
520 int i;
521
522 for (i=0; i < nibrs; i++) {
523 ia64_set_ibr(i, ibrs[i]);
524 }
525 ia64_srlz_i();
526 }
527
528 static inline void
pfm_restore_dbrs(unsigned long * dbrs,unsigned int ndbrs)529 pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs)
530 {
531 int i;
532
533 for (i=0; i < ndbrs; i++) {
534 ia64_set_dbr(i, dbrs[i]);
535 }
536 ia64_srlz_d();
537 }
538
539 static inline void
pfm_restore_pmcs(unsigned long * pmcs,unsigned long mask)540 pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask)
541 {
542 int i;
543
544 DBprintk(("mask=0x%lx\n", mask));
545 for (i=0; mask; i++, mask>>=1) {
546 if ((mask & 0x1) == 0) continue;
547 ia64_set_pmc(i, pmcs[i]);
548 DBprintk(("pmc[%d]=0x%lx\n", i, pmcs[i]));
549 }
550 ia64_srlz_d();
551 }
552
553 static inline void
pfm_restore_pmds(unsigned long * pmds,unsigned long mask)554 pfm_restore_pmds(unsigned long *pmds, unsigned long mask)
555 {
556 int i;
557 unsigned long val, ovfl_val = pmu_conf.ovfl_val;
558
559 DBprintk(("mask=0x%lx\n", mask));
560 for (i=0; mask; i++, mask>>=1) {
561 if ((mask & 0x1) == 0) continue;
562 val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i];
563 ia64_set_pmd(i, val);
564 DBprintk(("pmd[%d]=0x%lx\n", i, val));
565 }
566 ia64_srlz_d();
567 }
568
569 static inline void
pfm_save_pmds(unsigned long * pmds,unsigned long mask)570 pfm_save_pmds(unsigned long *pmds, unsigned long mask)
571 {
572 int i;
573
574 ia64_srlz_d();
575
576 for (i=0; mask; i++, mask>>=1) {
577 if (mask & 0x1) pmds[i] = ia64_get_pmd(i);
578 }
579 }
580
581 static inline unsigned long
pfm_read_soft_counter(pfm_context_t * ctx,int i)582 pfm_read_soft_counter(pfm_context_t *ctx, int i)
583 {
584 return ctx->ctx_soft_pmds[i].val + (ia64_get_pmd(i) & pmu_conf.ovfl_val);
585 }
586
587 static inline void
pfm_write_soft_counter(pfm_context_t * ctx,int i,unsigned long val)588 pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val)
589 {
590 ctx->ctx_soft_pmds[i].val = val & ~pmu_conf.ovfl_val;
591 /*
592 * writing to unimplemented part is ignore, so we do not need to
593 * mask off top part
594 */
595 ia64_set_pmd(i, val & pmu_conf.ovfl_val);
596 }
597
598 /*
599 * Generates a unique (per CPU) timestamp
600 */
601 static inline unsigned long
pfm_get_stamp(void)602 pfm_get_stamp(void)
603 {
604 /*
605 * XXX: must find something more efficient
606 */
607 return ia64_get_itc();
608 }
609
610 /* Here we want the physical address of the memory.
611 * This is used when initializing the contents of the
612 * area and marking the pages as reserved.
613 */
614 static inline unsigned long
pfm_kvirt_to_pa(unsigned long adr)615 pfm_kvirt_to_pa(unsigned long adr)
616 {
617 __u64 pa = ia64_tpa(adr);
618 //DBprintk(("kv2pa(%lx-->%lx)\n", adr, pa));
619 return pa;
620 }
621
622 static void *
pfm_rvmalloc(unsigned long size)623 pfm_rvmalloc(unsigned long size)
624 {
625 void *mem;
626 unsigned long adr, page;
627
628 mem=vmalloc(size);
629 if (mem) {
630 //printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem);
631 memset(mem, 0, size); /* Clear the ram out, no junk to the user */
632 adr=(unsigned long) mem;
633 while (size > 0) {
634 page = pfm_kvirt_to_pa(adr);
635 mem_map_reserve(virt_to_page(__va(page)));
636 adr += PAGE_SIZE;
637 size -= PAGE_SIZE;
638 }
639 }
640 return mem;
641 }
642
643 static void
pfm_rvfree(void * mem,unsigned long size)644 pfm_rvfree(void *mem, unsigned long size)
645 {
646 unsigned long adr, page = 0;
647
648 if (mem) {
649 adr=(unsigned long) mem;
650 while (size > 0) {
651 page = pfm_kvirt_to_pa(adr);
652 mem_map_unreserve(virt_to_page(__va(page)));
653 adr+=PAGE_SIZE;
654 size-=PAGE_SIZE;
655 }
656 vfree(mem);
657 }
658 return;
659 }
660
661 /*
662 * This function gets called from mm/mmap.c:exit_mmap() only when there is a sampling buffer
663 * attached to the context AND the current task has a mapping for it, i.e., it is the original
664 * creator of the context.
665 *
666 * This function is used to remember the fact that the vma describing the sampling buffer
667 * has now been removed. It can only be called when no other tasks share the same mm context.
668 *
669 */
670 static void
pfm_vm_close(struct vm_area_struct * vma)671 pfm_vm_close(struct vm_area_struct *vma)
672 {
673 pfm_smpl_buffer_desc_t *psb = (pfm_smpl_buffer_desc_t *)vma->vm_private_data;
674
675 if (psb == NULL) {
676 printk(KERN_DEBUG "perfmon: psb is null in [%d]\n", current->pid);
677 return;
678 }
679 /*
680 * Add PSB to list of buffers to free on release_thread() when no more users
681 *
682 * This call is safe because, once the count is zero is cannot be modified anymore.
683 * This is not because there is no more user of the mm context, that the sampling
684 * buffer is not being used anymore outside of this task. In fact, it can still
685 * be accessed from within the kernel by another task (such as the monitored task).
686 *
687 * Therefore, we only move the psb into the list of buffers to free when we know
688 * nobody else is using it.
689 * The linked list if independent of the perfmon context, because in the case of
690 * multi-threaded processes, the last thread may not have been involved with
691 * monitoring however it will be the one removing the vma and it should therefore
692 * also remove the sampling buffer. This buffer cannot be removed until the vma
693 * is removed.
694 *
695 * This function cannot remove the buffer from here, because exit_mmap() must first
696 * complete. Given that there is no other vma related callback in the generic code,
697 * we have created our own with the linked list of sampling buffers to free. The list
698 * is part of the thread structure. In release_thread() we check if the list is
699 * empty. If not we call into perfmon to free the buffer and psb. That is the only
700 * way to ensure a safe deallocation of the sampling buffer which works when
701 * the buffer is shared between distinct processes or with multi-threaded programs.
702 *
703 * We need to lock the psb because the refcnt test and flag manipulation must
704 * looked like an atomic operation vis a vis pfm_context_exit()
705 */
706 LOCK_PSB(psb);
707
708 if (psb->psb_refcnt == 0) {
709
710 psb->psb_next = current->thread.pfm_smpl_buf_list;
711 current->thread.pfm_smpl_buf_list = psb;
712
713 DBprintk(("[%d] add smpl @%p size %lu to smpl_buf_list psb_flags=0x%x\n",
714 current->pid, psb->psb_hdr, psb->psb_size, psb->psb_flags));
715 }
716 DBprintk(("[%d] clearing psb_flags=0x%x smpl @%p size %lu\n",
717 current->pid, psb->psb_flags, psb->psb_hdr, psb->psb_size));
718 /*
719 * decrement the number vma for the buffer
720 */
721 psb->psb_flags &= ~PSB_HAS_VMA;
722
723 UNLOCK_PSB(psb);
724 }
725
726 /*
727 * This function is called from pfm_destroy_context() and also from pfm_inherit()
728 * to explicitely remove the sampling buffer mapping from the user level address space.
729 */
730 static int
pfm_remove_smpl_mapping(struct task_struct * task)731 pfm_remove_smpl_mapping(struct task_struct *task)
732 {
733 pfm_context_t *ctx = task->thread.pfm_context;
734 pfm_smpl_buffer_desc_t *psb;
735 int r;
736
737 /*
738 * some sanity checks first
739 */
740 if (ctx == NULL || task->mm == NULL || ctx->ctx_smpl_vaddr == 0 || ctx->ctx_psb == NULL) {
741 printk(KERN_DEBUG "perfmon: invalid context mm=%p\n", task->mm);
742 return -1;
743 }
744 psb = ctx->ctx_psb;
745
746 down_write(&task->mm->mmap_sem);
747
748 r = do_munmap(task->mm, ctx->ctx_smpl_vaddr, psb->psb_size);
749
750 up_write(&task->mm->mmap_sem);
751 if (r !=0) {
752 printk(KERN_DEBUG "perfmon: pid %d unable to unmap sampling buffer "
753 "@0x%lx size=%ld\n", task->pid, ctx->ctx_smpl_vaddr, psb->psb_size);
754 }
755
756 DBprintk(("[%d] do_unmap(0x%lx, %ld)=%d refcnt=%lu psb_flags=0x%x\n",
757 task->pid, ctx->ctx_smpl_vaddr, psb->psb_size, r, psb->psb_refcnt, psb->psb_flags));
758
759 return 0;
760 }
761
762 static pfm_context_t *
pfm_context_alloc(void)763 pfm_context_alloc(void)
764 {
765 pfm_context_t *ctx;
766
767 /* allocate context descriptor */
768 ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL);
769 if (ctx) memset(ctx, 0, sizeof(pfm_context_t));
770
771 return ctx;
772 }
773
774 static void
pfm_context_free(pfm_context_t * ctx)775 pfm_context_free(pfm_context_t *ctx)
776 {
777 if (ctx) {
778 DBprintk(("kill tasklet for ctx %p\n", ctx));
779
780 tasklet_kill(&ctx->ctx_tasklet);
781
782 DBprintk(("free ctx @%p\n", ctx));
783 kfree(ctx);
784 }
785 }
786
787 static int
pfm_remap_buffer(unsigned long buf,unsigned long addr,unsigned long size)788 pfm_remap_buffer(unsigned long buf, unsigned long addr, unsigned long size)
789 {
790 unsigned long page;
791
792 DBprintk(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size));
793
794 while (size > 0) {
795 page = pfm_kvirt_to_pa(buf);
796
797 if (remap_page_range(addr, page, PAGE_SIZE, PAGE_READONLY)) return -ENOMEM;
798
799 addr += PAGE_SIZE;
800 buf += PAGE_SIZE;
801 size -= PAGE_SIZE;
802 }
803 return 0;
804 }
805
806 /*
807 * counts the number of PMDS to save per entry.
808 * This code is generic enough to accomodate more than 64 PMDS when they become available
809 */
810 static unsigned long
pfm_smpl_entry_size(unsigned long * which,unsigned long size)811 pfm_smpl_entry_size(unsigned long *which, unsigned long size)
812 {
813 unsigned long res = 0;
814 int i;
815
816 for (i=0; i < size; i++, which++) res += hweight64(*which);
817
818 DBprintk(("weight=%ld\n", res));
819
820 return res;
821 }
822
823 /*
824 * Allocates the sampling buffer and remaps it into caller's address space
825 */
826 static int
pfm_smpl_buffer_alloc(pfm_context_t * ctx,unsigned long * which_pmds,unsigned long entries,void ** user_vaddr)827 pfm_smpl_buffer_alloc(pfm_context_t *ctx, unsigned long *which_pmds, unsigned long entries,
828 void **user_vaddr)
829 {
830 struct mm_struct *mm = current->mm;
831 struct vm_area_struct *vma = NULL;
832 unsigned long size, regcount;
833 void *smpl_buf;
834 pfm_smpl_buffer_desc_t *psb;
835
836
837 /* note that regcount might be 0, in this case only the header for each
838 * entry will be recorded.
839 */
840 regcount = pfm_smpl_entry_size(which_pmds, 1);
841
842 if ((sizeof(perfmon_smpl_hdr_t)+ entries*sizeof(perfmon_smpl_entry_t)) <= entries) {
843 DBprintk(("requested entries %lu is too big\n", entries));
844 return -EINVAL;
845 }
846
847 /*
848 * 1 buffer hdr and for each entry a header + regcount PMDs to save
849 */
850 size = PAGE_ALIGN( sizeof(perfmon_smpl_hdr_t)
851 + entries * (sizeof(perfmon_smpl_entry_t) + regcount*sizeof(u64)));
852
853 DBprintk(("sampling buffer size=%lu bytes\n", size));
854
855 /*
856 * check requested size to avoid Denial-of-service attacks
857 * XXX: may have to refine this test
858 * Check against address space limit.
859 *
860 * if ((mm->total_vm << PAGE_SHIFT) + len> current->rlim[RLIMIT_AS].rlim_cur)
861 * return -ENOMEM;
862 */
863 if (size > current->rlim[RLIMIT_MEMLOCK].rlim_cur) return -EAGAIN;
864
865 /*
866 * We do the easy to undo allocations first.
867 *
868 * pfm_rvmalloc(), clears the buffer, so there is no leak
869 */
870 smpl_buf = pfm_rvmalloc(size);
871 if (smpl_buf == NULL) {
872 DBprintk(("Can't allocate sampling buffer\n"));
873 return -ENOMEM;
874 }
875
876 DBprintk(("smpl_buf @%p\n", smpl_buf));
877
878 /* allocate sampling buffer descriptor now */
879 psb = kmalloc(sizeof(*psb), GFP_KERNEL);
880 if (psb == NULL) {
881 DBprintk(("Can't allocate sampling buffer descriptor\n"));
882 goto error_kmalloc;
883 }
884
885 /* allocate vma */
886 vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
887 if (!vma) {
888 DBprintk(("Cannot allocate vma\n"));
889 goto error_kmem;
890 }
891 memset(vma, 0, sizeof(*vma));
892
893 /*
894 * partially initialize the vma for the sampling buffer
895 *
896 * The VM_DONTCOPY flag is very important as it ensures that the mapping
897 * will never be inherited for any child process (via fork()) which is always
898 * what we want.
899 */
900 vma->vm_mm = mm;
901 vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED|VM_DONTCOPY;
902 vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */
903 vma->vm_ops = &pfm_vm_ops; /* necesarry to get the close() callback */
904 vma->vm_pgoff = 0;
905 vma->vm_file = NULL;
906 vma->vm_raend = 0;
907 vma->vm_private_data = psb; /* information needed by the pfm_vm_close() function */
908
909 /*
910 * Now we have everything we need and we can initialize
911 * and connect all the data structures
912 */
913
914 psb->psb_hdr = smpl_buf;
915 psb->psb_addr = ((char *)smpl_buf)+sizeof(perfmon_smpl_hdr_t); /* first entry */
916 psb->psb_size = size; /* aligned size */
917 psb->psb_index = 0;
918 psb->psb_entries = entries;
919 psb->psb_refcnt = 1;
920 psb->psb_flags = PSB_HAS_VMA;
921
922 spin_lock_init(&psb->psb_lock);
923
924 /*
925 * XXX: will need to do cacheline alignment to avoid false sharing in SMP mode and
926 * multitask monitoring.
927 */
928 psb->psb_entry_size = sizeof(perfmon_smpl_entry_t) + regcount*sizeof(u64);
929
930 DBprintk(("psb @%p entry_size=%ld hdr=%p addr=%p refcnt=%lu psb_flags=0x%x\n",
931 (void *)psb,psb->psb_entry_size, (void *)psb->psb_hdr,
932 (void *)psb->psb_addr, psb->psb_refcnt, psb->psb_flags));
933
934 /* initialize some of the fields of user visible buffer header */
935 psb->psb_hdr->hdr_version = PFM_SMPL_VERSION;
936 psb->psb_hdr->hdr_entry_size = psb->psb_entry_size;
937 psb->psb_hdr->hdr_pmds[0] = which_pmds[0];
938
939 /*
940 * Let's do the difficult operations next.
941 *
942 * now we atomically find some area in the address space and
943 * remap the buffer in it.
944 */
945 down_write(¤t->mm->mmap_sem);
946
947
948 /* find some free area in address space, must have mmap sem held */
949 vma->vm_start = get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS);
950 if (vma->vm_start == 0UL) {
951 DBprintk(("Cannot find unmapped area for size %ld\n", size));
952 up_write(¤t->mm->mmap_sem);
953 goto error;
954 }
955 vma->vm_end = vma->vm_start + size;
956
957 DBprintk(("entries=%ld aligned size=%ld, unmapped @0x%lx\n", entries, size, vma->vm_start));
958
959 /* can only be applied to current, need to have the mm semaphore held when called */
960 if (pfm_remap_buffer((unsigned long)smpl_buf, vma->vm_start, size)) {
961 DBprintk(("Can't remap buffer\n"));
962 up_write(¤t->mm->mmap_sem);
963 goto error;
964 }
965
966 /*
967 * now insert the vma in the vm list for the process, must be
968 * done with mmap lock held
969 */
970 insert_vm_struct(mm, vma);
971
972 mm->total_vm += size >> PAGE_SHIFT;
973
974 up_write(¤t->mm->mmap_sem);
975
976 /* store which PMDS to record */
977 ctx->ctx_smpl_regs[0] = which_pmds[0];
978
979
980 /* link to perfmon context */
981 ctx->ctx_psb = psb;
982
983 /*
984 * keep track of user level virtual address
985 */
986 ctx->ctx_smpl_vaddr = *(unsigned long *)user_vaddr = vma->vm_start;
987
988 return 0;
989
990 error:
991 kmem_cache_free(vm_area_cachep, vma);
992 error_kmem:
993 kfree(psb);
994 error_kmalloc:
995 pfm_rvfree(smpl_buf, size);
996 return -ENOMEM;
997 }
998
999 static int
pfm_reserve_session(struct task_struct * task,int is_syswide,unsigned long cpu_mask)1000 pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned long cpu_mask)
1001 {
1002 unsigned long m, undo_mask;
1003 unsigned int n, i;
1004
1005 /*
1006 * validy checks on cpu_mask have been done upstream
1007 */
1008 LOCK_PFS();
1009
1010 if (is_syswide) {
1011 /*
1012 * cannot mix system wide and per-task sessions
1013 */
1014 if (pfm_sessions.pfs_task_sessions > 0UL) {
1015 DBprintk(("system wide not possible, %u conflicting task_sessions\n",
1016 pfm_sessions.pfs_task_sessions));
1017 goto abort;
1018 }
1019
1020 m = cpu_mask; undo_mask = 0UL; n = 0;
1021 DBprintk(("cpu_mask=0x%lx\n", cpu_mask));
1022 for(i=0; m; i++, m>>=1) {
1023
1024 if ((m & 0x1) == 0UL) continue;
1025
1026 if (pfm_sessions.pfs_sys_session[i]) goto undo;
1027
1028 DBprintk(("reserving CPU%d currently on CPU%d\n", i, smp_processor_id()));
1029
1030 pfm_sessions.pfs_sys_session[i] = task;
1031 undo_mask |= 1UL << i;
1032 n++;
1033 }
1034 pfm_sessions.pfs_sys_sessions += n;
1035 } else {
1036 if (pfm_sessions.pfs_sys_sessions) goto abort;
1037 pfm_sessions.pfs_task_sessions++;
1038 }
1039 UNLOCK_PFS();
1040 return 0;
1041 undo:
1042 DBprintk(("system wide not possible, conflicting session [%d] on CPU%d\n",
1043 pfm_sessions.pfs_sys_session[i]->pid, i));
1044
1045 for(i=0; undo_mask; i++, undo_mask >>=1) {
1046 pfm_sessions.pfs_sys_session[i] = NULL;
1047 }
1048 abort:
1049 UNLOCK_PFS();
1050
1051 return -EBUSY;
1052
1053 }
1054
1055 static int
pfm_unreserve_session(struct task_struct * task,int is_syswide,unsigned long cpu_mask)1056 pfm_unreserve_session(struct task_struct *task, int is_syswide, unsigned long cpu_mask)
1057 {
1058 pfm_context_t *ctx;
1059 unsigned long m;
1060 unsigned int n, i;
1061
1062 ctx = task ? task->thread.pfm_context : NULL;
1063
1064 /*
1065 * validy checks on cpu_mask have been done upstream
1066 */
1067 LOCK_PFS();
1068
1069 DBprintk(("[%d] sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu_mask=0x%lx\n",
1070 task->pid,
1071 pfm_sessions.pfs_sys_sessions,
1072 pfm_sessions.pfs_task_sessions,
1073 pfm_sessions.pfs_sys_use_dbregs,
1074 is_syswide,
1075 cpu_mask));
1076
1077
1078 if (is_syswide) {
1079 m = cpu_mask; n = 0;
1080 for(i=0; m; i++, m>>=1) {
1081 if ((m & 0x1) == 0UL) continue;
1082 pfm_sessions.pfs_sys_session[i] = NULL;
1083 n++;
1084 }
1085 /*
1086 * would not work with perfmon+more than one bit in cpu_mask
1087 */
1088 if (ctx && ctx->ctx_fl_using_dbreg) {
1089 if (pfm_sessions.pfs_sys_use_dbregs == 0) {
1090 printk(KERN_DEBUG "perfmon: invalid release for [%d] "
1091 "sys_use_dbregs=0\n", task->pid);
1092 } else {
1093 pfm_sessions.pfs_sys_use_dbregs--;
1094 }
1095 }
1096 pfm_sessions.pfs_sys_sessions -= n;
1097
1098 DBprintk(("CPU%d sys_sessions=%u\n",
1099 smp_processor_id(), pfm_sessions.pfs_sys_sessions));
1100 } else {
1101 pfm_sessions.pfs_task_sessions--;
1102 DBprintk(("[%d] task_sessions=%u\n",
1103 task->pid, pfm_sessions.pfs_task_sessions));
1104 }
1105
1106 UNLOCK_PFS();
1107
1108 return 0;
1109 }
1110
1111 static void
pfm_send_notification_signal(unsigned long data)1112 pfm_send_notification_signal(unsigned long data)
1113 {
1114 pfm_context_t *ctx = (pfm_context_t *)data;
1115 struct siginfo si;
1116 int ret;
1117
1118 DBprintk(("[%d] tasklet called\n", current->pid));
1119
1120 LOCK_CTX(ctx);
1121
1122 if (ctx->ctx_notify_task == NULL) {
1123 printk(KERN_INFO "perfmon: tasklet lost notify_task\n");
1124 goto nothing_to_do;
1125 }
1126 /* no leak */
1127 memset(&si,0, sizeof(si));
1128
1129 si.si_addr = NULL;
1130 si.si_pid = current->pid; /* irrelevant */
1131 si.si_signo = SIGPROF;
1132 si.si_code = PROF_OVFL; /* indicates a perfmon SIGPROF signal */
1133 si.si_pfm_ovfl[0] = ctx->ctx_ovfl_regs[0];
1134
1135 if (ctx->ctx_notify_task != current) read_lock(&tasklist_lock);
1136
1137 DBprintk_ovfl(("[%d] tasklet sending notification to [%d]\n", current->pid, ctx->ctx_notify_task->pid));
1138
1139 ret = send_sig_info(SIGPROF, &si, ctx->ctx_notify_task);
1140 if (ret != 0) printk(KERN_ERR "send_sig_info(process %d, SIGPROF)=%d\n", ctx->ctx_notify_task->pid, ret);
1141
1142 /*
1143 * now undo the protections in order
1144 */
1145 if (ctx->ctx_notify_task != current) read_unlock(&tasklist_lock);
1146 nothing_to_do:
1147 UNLOCK_CTX(ctx);
1148 }
1149
1150 /*
1151 * XXX: do something better here
1152 */
1153 static int
pfm_bad_permissions(struct task_struct * task)1154 pfm_bad_permissions(struct task_struct *task)
1155 {
1156 /* stolen from bad_signal() */
1157 return (current->session != task->session)
1158 && (current->euid ^ task->suid) && (current->euid ^ task->uid)
1159 && (current->uid ^ task->suid) && (current->uid ^ task->uid);
1160 }
1161
1162 static int
pfx_is_sane(struct task_struct * task,pfarg_context_t * pfx)1163 pfx_is_sane(struct task_struct *task, pfarg_context_t *pfx)
1164 {
1165 unsigned long smpl_pmds = pfx->ctx_smpl_regs[0];
1166 int ctx_flags;
1167 int cpu;
1168
1169 /* valid signal */
1170
1171 /* cannot send to process 1, 0 means do not notify */
1172 if (pfx->ctx_notify_pid == 1) {
1173 DBprintk(("invalid notify_pid %d\n", pfx->ctx_notify_pid));
1174 return -EINVAL;
1175 }
1176 ctx_flags = pfx->ctx_flags;
1177
1178 if ((ctx_flags & PFM_FL_INHERIT_MASK) == (PFM_FL_INHERIT_ONCE|PFM_FL_INHERIT_ALL)) {
1179 DBprintk(("invalid inherit mask 0x%x\n",ctx_flags & PFM_FL_INHERIT_MASK));
1180 return -EINVAL;
1181 }
1182
1183 if (ctx_flags & PFM_FL_SYSTEM_WIDE) {
1184 DBprintk(("cpu_mask=0x%lx\n", pfx->ctx_cpu_mask));
1185 /*
1186 * cannot block in this mode
1187 */
1188 if (ctx_flags & PFM_FL_NOTIFY_BLOCK) {
1189 DBprintk(("cannot use blocking mode when in system wide monitoring\n"));
1190 return -EINVAL;
1191 }
1192 /*
1193 * must only have one bit set in the CPU mask
1194 */
1195 if (hweight64(pfx->ctx_cpu_mask) != 1UL) {
1196 DBprintk(("invalid CPU mask specified\n"));
1197 return -EINVAL;
1198 }
1199 /*
1200 * and it must be a valid CPU
1201 */
1202 cpu = ffz(~pfx->ctx_cpu_mask);
1203 if (cpu_online(cpu) == 0) {
1204 DBprintk(("CPU%d is not online\n", cpu));
1205 return -EINVAL;
1206 }
1207 /*
1208 * check for pre-existing pinning, if conflicting reject
1209 */
1210 if (task->cpus_allowed != ~0UL && (task->cpus_allowed & (1UL<<cpu)) == 0) {
1211 DBprintk(("[%d] pinned on 0x%lx, mask for CPU%d \n", task->pid,
1212 task->cpus_allowed, cpu));
1213 return -EINVAL;
1214 }
1215
1216 } else {
1217 /*
1218 * must provide a target for the signal in blocking mode even when
1219 * no counter is configured with PFM_FL_REG_OVFL_NOTIFY
1220 */
1221 if ((ctx_flags & PFM_FL_NOTIFY_BLOCK) && pfx->ctx_notify_pid == 0) {
1222 DBprintk(("must have notify_pid when blocking for [%d]\n", task->pid));
1223 return -EINVAL;
1224 }
1225 #if 0
1226 if ((ctx_flags & PFM_FL_NOTIFY_BLOCK) && pfx->ctx_notify_pid == task->pid) {
1227 DBprintk(("cannot notify self when blocking for [%d]\n", task->pid));
1228 return -EINVAL;
1229 }
1230 #endif
1231 }
1232 /* verify validity of smpl_regs */
1233 if ((smpl_pmds & pmu_conf.impl_pmds[0]) != smpl_pmds) {
1234 DBprintk(("invalid smpl_regs 0x%lx\n", smpl_pmds));
1235 return -EINVAL;
1236 }
1237 /* probably more to add here */
1238
1239 return 0;
1240 }
1241
1242 static int
pfm_context_create(struct task_struct * task,pfm_context_t * ctx,void * req,int count,struct pt_regs * regs)1243 pfm_context_create(struct task_struct *task, pfm_context_t *ctx, void *req, int count,
1244 struct pt_regs *regs)
1245 {
1246 pfarg_context_t tmp;
1247 void *uaddr = NULL;
1248 int ret;
1249 int ctx_flags;
1250 pid_t notify_pid;
1251
1252 /* a context has already been defined */
1253 if (ctx) return -EBUSY;
1254
1255 /*
1256 * not yet supported
1257 */
1258 if (task != current) return -EINVAL;
1259
1260 if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
1261
1262 ret = pfx_is_sane(task, &tmp);
1263 if (ret < 0) return ret;
1264
1265 ctx_flags = tmp.ctx_flags;
1266
1267 ret = pfm_reserve_session(task, ctx_flags & PFM_FL_SYSTEM_WIDE, tmp.ctx_cpu_mask);
1268 if (ret) goto abort;
1269
1270 ret = -ENOMEM;
1271
1272 ctx = pfm_context_alloc();
1273 if (!ctx) goto error;
1274
1275 /* record the creator (important for inheritance) */
1276 ctx->ctx_owner = current;
1277
1278 notify_pid = tmp.ctx_notify_pid;
1279
1280 spin_lock_init(&ctx->ctx_lock);
1281
1282 if (notify_pid == current->pid) {
1283
1284 ctx->ctx_notify_task = current;
1285 task->thread.pfm_context = ctx;
1286
1287 } else if (notify_pid!=0) {
1288 struct task_struct *notify_task;
1289
1290 read_lock(&tasklist_lock);
1291
1292 notify_task = find_task_by_pid(notify_pid);
1293
1294 if (notify_task) {
1295
1296 ret = -EPERM;
1297
1298 /*
1299 * check if we can send this task a signal
1300 */
1301 if (pfm_bad_permissions(notify_task)) {
1302 read_unlock(&tasklist_lock);
1303 goto buffer_error;
1304 }
1305
1306 /*
1307 * make visible
1308 * must be done inside critical section
1309 *
1310 * if the initialization does not go through it is still
1311 * okay because child will do the scan for nothing which
1312 * won't hurt.
1313 */
1314 task->thread.pfm_context = ctx;
1315
1316 /*
1317 * will cause task to check on exit for monitored
1318 * processes that would notify it. see release_thread()
1319 * Note: the scan MUST be done in release thread, once the
1320 * task has been detached from the tasklist otherwise you are
1321 * exposed to race conditions.
1322 */
1323 atomic_add(1, &ctx->ctx_notify_task->thread.pfm_notifiers_check);
1324
1325 ctx->ctx_notify_task = notify_task;
1326 }
1327 read_unlock(&tasklist_lock);
1328 }
1329
1330 /*
1331 * notification process does not exist
1332 */
1333 if (notify_pid != 0 && ctx->ctx_notify_task == NULL) {
1334 ret = -EINVAL;
1335 goto buffer_error;
1336 }
1337
1338 if (tmp.ctx_smpl_entries) {
1339 DBprintk(("sampling entries=%lu\n",tmp.ctx_smpl_entries));
1340
1341 ret = pfm_smpl_buffer_alloc(ctx, tmp.ctx_smpl_regs,
1342 tmp.ctx_smpl_entries, &uaddr);
1343 if (ret<0) goto buffer_error;
1344
1345 tmp.ctx_smpl_vaddr = uaddr;
1346 }
1347 /* initialization of context's flags */
1348 ctx->ctx_fl_inherit = ctx_flags & PFM_FL_INHERIT_MASK;
1349 ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
1350 ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
1351 ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0;
1352 ctx->ctx_fl_unsecure = (ctx_flags & PFM_FL_UNSECURE) ? 1: 0;
1353 ctx->ctx_fl_frozen = 0;
1354 /*
1355 * setting this flag to 0 here means, that the creator or the task that the
1356 * context is being attached are granted access. Given that a context can only
1357 * be created for the calling process this, in effect only allows the creator
1358 * to access the context. See pfm_protect() for more.
1359 */
1360 ctx->ctx_fl_protected = 0;
1361
1362 /* for system wide mode only (only 1 bit set) */
1363 ctx->ctx_cpu = ffz(~tmp.ctx_cpu_mask);
1364
1365 /* SMP only, means no CPU */
1366 ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
1367 SET_LAST_CPU(ctx, -1);
1368
1369 sema_init(&ctx->ctx_restart_sem, 0); /* init this semaphore to locked */
1370
1371 /*
1372 * initialize tasklet for signal notifications
1373 *
1374 * ALL signal-based (or any notification using data structures
1375 * external to perfmon) MUST use tasklets to avoid lock contentions
1376 * when a signal has to be sent for overflow interrupt handler.
1377 */
1378 tasklet_init(&ctx->ctx_tasklet, pfm_send_notification_signal, (unsigned long)ctx);
1379
1380 if (__copy_to_user(req, &tmp, sizeof(tmp))) {
1381 ret = -EFAULT;
1382 goto buffer_error;
1383 }
1384
1385 DBprintk(("context=%p, pid=%d notify_task=%p\n",
1386 (void *)ctx, task->pid, ctx->ctx_notify_task));
1387
1388 DBprintk(("context=%p, pid=%d flags=0x%x inherit=%d block=%d system=%d excl_idle=%d unsecure=%d\n",
1389 (void *)ctx, task->pid, ctx_flags, ctx->ctx_fl_inherit,
1390 ctx->ctx_fl_block, ctx->ctx_fl_system,
1391 ctx->ctx_fl_excl_idle,
1392 ctx->ctx_fl_unsecure));
1393
1394 /*
1395 * when no notification is required, we can make this visible at the last moment
1396 */
1397 if (notify_pid == 0) task->thread.pfm_context = ctx;
1398 /*
1399 * pin task to CPU and force reschedule on exit to ensure
1400 * that when back to user level the task runs on the designated
1401 * CPU.
1402 */
1403 if (ctx->ctx_fl_system) {
1404 ctx->ctx_saved_cpus_allowed = task->cpus_allowed;
1405 task->cpus_allowed = tmp.ctx_cpu_mask;
1406 task->need_resched = 1;
1407 DBprintk(("[%d] rescheduled allowed=0x%lx\n", task->pid, task->cpus_allowed));
1408 }
1409
1410 return 0;
1411
1412 buffer_error:
1413 pfm_context_free(ctx);
1414 error:
1415 pfm_unreserve_session(task, ctx_flags & PFM_FL_SYSTEM_WIDE , tmp.ctx_cpu_mask);
1416 abort:
1417 /* make sure we don't leave anything behind */
1418 task->thread.pfm_context = NULL;
1419
1420 return ret;
1421 }
1422
1423 static inline unsigned long
pfm_new_counter_value(pfm_counter_t * reg,int is_long_reset)1424 pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset)
1425 {
1426 unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset;
1427 unsigned long new_seed, old_seed = reg->seed, mask = reg->mask;
1428 extern unsigned long carta_random32 (unsigned long seed);
1429
1430 if (reg->flags & PFM_REGFL_RANDOM) {
1431 new_seed = carta_random32(old_seed);
1432 val -= (old_seed & mask); /* counter values are negative numbers! */
1433 if ((mask >> 32) != 0)
1434 /* construct a full 64-bit random value: */
1435 new_seed |= carta_random32(old_seed >> 32) << 32;
1436 reg->seed = new_seed;
1437 }
1438 reg->lval = val;
1439 return val;
1440 }
1441
1442 static void
pfm_reset_regs(pfm_context_t * ctx,unsigned long * ovfl_regs,int flag)1443 pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int flag)
1444 {
1445 unsigned long mask = ovfl_regs[0];
1446 unsigned long reset_others = 0UL;
1447 unsigned long val;
1448 int i, is_long_reset = (flag == PFM_PMD_LONG_RESET);
1449
1450 /*
1451 * now restore reset value on sampling overflowed counters
1452 */
1453 mask >>= PMU_FIRST_COUNTER;
1454 for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) {
1455 if (mask & 0x1) {
1456 val = pfm_new_counter_value(ctx->ctx_soft_pmds + i, is_long_reset);
1457 reset_others |= ctx->ctx_soft_pmds[i].reset_pmds[0];
1458
1459 DBprintk_ovfl(("[%d] %s reset soft_pmd[%d]=%lx\n", current->pid,
1460 is_long_reset ? "long" : "short", i, val));
1461
1462 /* upper part is ignored on rval */
1463 pfm_write_soft_counter(ctx, i, val);
1464 }
1465 }
1466
1467 /*
1468 * Now take care of resetting the other registers
1469 */
1470 for(i = 0; reset_others; i++, reset_others >>= 1) {
1471
1472 if ((reset_others & 0x1) == 0) continue;
1473
1474 val = pfm_new_counter_value(ctx->ctx_soft_pmds + i, is_long_reset);
1475
1476 if (PMD_IS_COUNTING(i)) {
1477 pfm_write_soft_counter(ctx, i, val);
1478 } else {
1479 ia64_set_pmd(i, val);
1480 }
1481 DBprintk_ovfl(("[%d] %s reset_others pmd[%d]=%lx\n", current->pid,
1482 is_long_reset ? "long" : "short", i, val));
1483 }
1484 ia64_srlz_d();
1485 }
1486
1487 static int
pfm_write_pmcs(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)1488 pfm_write_pmcs(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
1489 {
1490 struct thread_struct *th = &task->thread;
1491 pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
1492 unsigned long value, reset_pmds;
1493 unsigned int cnum, reg_flags, flags;
1494 int is_monitor, is_counting;
1495 int i, ret = -EINVAL;
1496 #define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z))
1497
1498 /* we don't quite support this right now */
1499 if (task != current) return -EINVAL;
1500
1501 if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
1502
1503
1504 /* XXX: ctx locking may be required here */
1505
1506 for (i = 0; i < count; i++, req++) {
1507
1508 if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
1509
1510 cnum = tmp.reg_num;
1511 reg_flags = tmp.reg_flags;
1512 value = tmp.reg_value;
1513 reset_pmds = tmp.reg_reset_pmds[0];
1514 flags = 0;
1515
1516 is_counting = PMC_IS_COUNTING(cnum);
1517 is_monitor = PMC_IS_MONITOR(cnum);
1518
1519 /*
1520 * we reject all non implemented PMC as well
1521 * as attempts to modify PMC[0-3] which are used
1522 * as status registers by the PMU
1523 */
1524 if (!PMC_IS_IMPL(cnum) || cnum < 4) {
1525 DBprintk(("pmc[%u] is unimplemented or invalid\n", cnum));
1526 goto error;
1527 }
1528 /*
1529 * If the PMC is a monitor, then if the value is not the default:
1530 * - system-wide session: PMCx.pm=1 (privileged monitor)
1531 * - per-task : PMCx.pm=0 (user monitor)
1532 */
1533 if ((is_monitor || is_counting) && value != PMC_DFL_VAL(cnum) && PFM_CHECK_PMC_PM(ctx, cnum, value)) {
1534 DBprintk(("pmc%u pmc_pm=%ld fl_system=%d\n",
1535 cnum,
1536 PMC_PM(cnum, value),
1537 ctx->ctx_fl_system));
1538 goto error;
1539 }
1540
1541 if (is_counting) {
1542 pfm_monitor_t *p = (pfm_monitor_t *)&value;
1543 /*
1544 * enforce generation of overflow interrupt. Necessary on all
1545 * CPUs.
1546 */
1547 p->pmc_oi = 1;
1548
1549 if (reg_flags & PFM_REGFL_OVFL_NOTIFY) {
1550 /*
1551 * must have a target for the signal
1552 */
1553 if (ctx->ctx_notify_task == NULL) {
1554 DBprintk(("cannot set ovfl_notify: no notify_task\n"));
1555 goto error;
1556 }
1557 flags |= PFM_REGFL_OVFL_NOTIFY;
1558 }
1559
1560 if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM;
1561
1562 /* verify validity of reset_pmds */
1563 if ((reset_pmds & pmu_conf.impl_pmds[0]) != reset_pmds) {
1564 DBprintk(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum));
1565 goto error;
1566 }
1567 } else if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) {
1568 DBprintk(("cannot set ovfl_notify or random on pmc%u\n", cnum));
1569 goto error;
1570 }
1571
1572 /*
1573 * execute write checker, if any
1574 */
1575 if (PMC_WR_FUNC(cnum)) {
1576 ret = PMC_WR_FUNC(cnum)(task, cnum, &value, regs);
1577 if (ret) goto error;
1578 ret = -EINVAL;
1579 }
1580
1581 /*
1582 * no error on this register
1583 */
1584 PFM_REG_RETFLAG_SET(tmp.reg_flags, 0);
1585
1586 /*
1587 * update register return value, abort all if problem during copy.
1588 * we only modify the reg_flags field. no check mode is fine because
1589 * access has been verified upfront in sys_perfmonctl().
1590 *
1591 * If this fails, then the software state is not modified
1592 */
1593 if (__put_user(tmp.reg_flags, &req->reg_flags)) return -EFAULT;
1594
1595 /*
1596 * Now we commit the changes to the software state
1597 */
1598
1599 /*
1600 * full flag update each time a register is programmed
1601 */
1602 ctx->ctx_soft_pmds[cnum].flags = flags;
1603
1604 if (is_counting) {
1605 ctx->ctx_soft_pmds[cnum].reset_pmds[0] = reset_pmds;
1606
1607 /* mark all PMDS to be accessed as used */
1608 CTX_USED_PMD(ctx, reset_pmds);
1609 }
1610
1611 /*
1612 * Needed in case the user does not initialize the equivalent
1613 * PMD. Clearing is done in reset_pmu() so there is no possible
1614 * leak here.
1615 */
1616 CTX_USED_PMD(ctx, pmu_conf.pmc_desc[cnum].dep_pmd[0]);
1617
1618 /*
1619 * keep copy the pmc, used for register reload
1620 */
1621 th->pmc[cnum] = value;
1622
1623 ia64_set_pmc(cnum, value);
1624
1625 DBprintk(("[%d] pmc[%u]=0x%lx flags=0x%x used_pmds=0x%lx\n",
1626 task->pid, cnum, value,
1627 ctx->ctx_soft_pmds[cnum].flags,
1628 ctx->ctx_used_pmds[0]));
1629
1630 }
1631
1632 return 0;
1633
1634 error:
1635 PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL);
1636
1637 if (__put_user(tmp.reg_flags, &req->reg_flags)) ret = -EFAULT;
1638
1639 DBprintk(("[%d] pmc[%u]=0x%lx error %d\n", task->pid, cnum, value, ret));
1640
1641 return ret;
1642 }
1643
1644 static int
pfm_write_pmds(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)1645 pfm_write_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
1646 {
1647 pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
1648 unsigned long value, hw_value;
1649 unsigned int cnum;
1650 int i;
1651 int ret = -EINVAL;
1652
1653 /* we don't quite support this right now */
1654 if (task != current) return -EINVAL;
1655
1656 /*
1657 * Cannot do anything before PMU is enabled
1658 */
1659 if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
1660
1661 /* XXX: ctx locking may be required here */
1662
1663
1664 for (i = 0; i < count; i++, req++) {
1665
1666 if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
1667
1668 cnum = tmp.reg_num;
1669 value = tmp.reg_value;
1670
1671 if (!PMD_IS_IMPL(cnum)) {
1672 DBprintk(("pmd[%u] is unimplemented or invalid\n", cnum));
1673 goto abort_mission;
1674 }
1675
1676 /*
1677 * execute write checker, if any
1678 */
1679 if (PMD_WR_FUNC(cnum)) {
1680 unsigned long v = value;
1681 ret = PMD_WR_FUNC(cnum)(task, cnum, &v, regs);
1682 if (ret) goto abort_mission;
1683 value = v;
1684 ret = -EINVAL;
1685 }
1686 hw_value = value;
1687 /*
1688 * no error on this register
1689 */
1690 PFM_REG_RETFLAG_SET(tmp.reg_flags, 0);
1691
1692 if (__put_user(tmp.reg_flags, &req->reg_flags)) return -EFAULT;
1693
1694 /*
1695 * now commit changes to software state
1696 */
1697
1698 /* update virtualized (64bits) counter */
1699 if (PMD_IS_COUNTING(cnum)) {
1700 ctx->ctx_soft_pmds[cnum].lval = value;
1701 ctx->ctx_soft_pmds[cnum].val = value & ~pmu_conf.ovfl_val;
1702
1703 hw_value = value & pmu_conf.ovfl_val;
1704
1705 ctx->ctx_soft_pmds[cnum].long_reset = tmp.reg_long_reset;
1706 ctx->ctx_soft_pmds[cnum].short_reset = tmp.reg_short_reset;
1707
1708 ctx->ctx_soft_pmds[cnum].seed = tmp.reg_random_seed;
1709 ctx->ctx_soft_pmds[cnum].mask = tmp.reg_random_mask;
1710 }
1711
1712 /* keep track of what we use */
1713 CTX_USED_PMD(ctx, pmu_conf.pmd_desc[(cnum)].dep_pmd[0]);
1714
1715 /* mark this register as used as well */
1716 CTX_USED_PMD(ctx, RDEP(cnum));
1717
1718 /* writes to unimplemented part is ignored, so this is safe */
1719 ia64_set_pmd(cnum, hw_value);
1720
1721 /* to go away */
1722 ia64_srlz_d();
1723
1724 DBprintk(("[%d] pmd[%u]: value=0x%lx hw_value=0x%lx soft_pmd=0x%lx short_reset=0x%lx "
1725 "long_reset=0x%lx hw_pmd=%lx notify=%c used_pmds=0x%lx reset_pmds=0x%lx psr=%d\n",
1726 task->pid, cnum,
1727 value, hw_value,
1728 ctx->ctx_soft_pmds[cnum].val,
1729 ctx->ctx_soft_pmds[cnum].short_reset,
1730 ctx->ctx_soft_pmds[cnum].long_reset,
1731 ia64_get_pmd(cnum) & pmu_conf.ovfl_val,
1732 PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N',
1733 ctx->ctx_used_pmds[0],
1734 ctx->ctx_soft_pmds[cnum].reset_pmds[0], ia64_psr(regs)->sp));
1735 }
1736
1737 return 0;
1738
1739 abort_mission:
1740 /*
1741 * for now, we have only one possibility for error
1742 */
1743 PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL);
1744
1745 /*
1746 * we change the return value to EFAULT in case we cannot write register return code.
1747 * The caller first must correct this error, then a resubmission of the request will
1748 * eventually yield the EINVAL.
1749 */
1750 if (__put_user(tmp.reg_flags, &req->reg_flags)) ret = -EFAULT;
1751
1752 DBprintk(("[%d] pmc[%u]=0x%lx ret %d\n", task->pid, cnum, value, ret));
1753
1754 return ret;
1755 }
1756
1757 static int
pfm_read_pmds(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)1758 pfm_read_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
1759 {
1760 struct thread_struct *th = &task->thread;
1761 unsigned long val, lval;
1762 pfarg_reg_t *req = (pfarg_reg_t *)arg;
1763 unsigned int cnum, reg_flags = 0;
1764 int i, ret = 0;
1765 #if __GNUC__ < 3
1766 int foo;
1767 #endif
1768
1769 if (!CTX_IS_ENABLED(ctx)) {
1770 DBprintk(("context for [%d] is disabled\n", task->pid));
1771 return -EINVAL;
1772 }
1773
1774 /*
1775 * XXX: MUST MAKE SURE WE DON"T HAVE ANY PENDING OVERFLOW BEFORE READING
1776 * This is required when the monitoring has been stoppped by user or kernel.
1777 * If it is still going on, then that's fine because we a re not guaranteed
1778 * to return an accurate value in this case.
1779 */
1780
1781 /* XXX: ctx locking may be required here */
1782
1783 /*
1784 * should we need to access the PMU, serialization is needed
1785 */
1786 ia64_srlz_d();
1787
1788 for (i = 0; i < count; i++, req++) {
1789
1790 #if __GNUC__ < 3
1791 foo = __get_user(cnum, &req->reg_num);
1792 if (foo) return -EFAULT;
1793 foo = __get_user(reg_flags, &req->reg_flags);
1794 if (foo) return -EFAULT;
1795 #else
1796 if (__get_user(cnum, &req->reg_num)) return -EFAULT;
1797 if (__get_user(reg_flags, &req->reg_flags)) return -EFAULT;
1798 #endif
1799 lval = 0UL;
1800
1801 if (!PMD_IS_IMPL(cnum)) goto abort_mission;
1802 /*
1803 * we can only read the register that we use. That includes
1804 * the one we explicitely initialize AND the one we want included
1805 * in the sampling buffer (smpl_regs).
1806 *
1807 * Having this restriction allows optimization in the ctxsw routine
1808 * without compromising security (leaks)
1809 */
1810 if (!CTX_IS_USED_PMD(ctx, cnum)) goto abort_mission;
1811
1812 /*
1813 * we can access the registers directly only when task
1814 * is the OWNER of the local PMU. In SMP, this can
1815 * happen only when task == current. In addition
1816 * this can happen when task != currrent but
1817 * only in UP mode.
1818 */
1819 if (task == PMU_OWNER()) {
1820 val = ia64_get_pmd(cnum);
1821 DBprintk(("reading pmd[%u]=0x%lx from hw\n", cnum, val));
1822 } else {
1823 /* context has been saved */
1824 val = th->pmd[cnum];
1825 }
1826
1827 if (PMD_IS_COUNTING(cnum)) {
1828 /*
1829 * XXX: need to check for overflow
1830 */
1831 val &= pmu_conf.ovfl_val;
1832 val += ctx->ctx_soft_pmds[cnum].val;
1833
1834 lval = ctx->ctx_soft_pmds[cnum].lval;
1835 }
1836
1837 /*
1838 * execute read checker, if any
1839 */
1840 if (PMD_RD_FUNC(cnum)) {
1841 unsigned long v = val;
1842 ret = PMD_RD_FUNC(cnum)(task, cnum, &v, regs);
1843 val = v;
1844 }
1845
1846 PFM_REG_RETFLAG_SET(reg_flags, ret);
1847
1848 DBprintk(("read pmd[%u] ret=%d value=0x%lx pmc=0x%lx\n",
1849 cnum, ret, val, ia64_get_pmc(cnum)));
1850
1851 /*
1852 * update register return value, abort all if problem during copy.
1853 * we only modify the reg_flags field. no check mode is fine because
1854 * access has been verified upfront in sys_perfmonctl().
1855 */
1856 if (__put_user(cnum, &req->reg_num)) return -EFAULT;
1857 if (__put_user(val, &req->reg_value)) return -EFAULT;
1858 if (__put_user(reg_flags, &req->reg_flags)) return -EFAULT;
1859 if (__put_user(lval, &req->reg_last_reset_value)) return -EFAULT;
1860 }
1861
1862 return 0;
1863
1864 abort_mission:
1865 PFM_REG_RETFLAG_SET(reg_flags, PFM_REG_RETFL_EINVAL);
1866 /*
1867 * XXX: if this fails, we stick with the original failure, flag not updated!
1868 */
1869 __put_user(reg_flags, &req->reg_flags);
1870
1871 return -EINVAL;
1872 }
1873
1874 #ifdef PFM_PMU_USES_DBR
1875 /*
1876 * Only call this function when a process it trying to
1877 * write the debug registers (reading is always allowed)
1878 */
1879 int
pfm_use_debug_registers(struct task_struct * task)1880 pfm_use_debug_registers(struct task_struct *task)
1881 {
1882 pfm_context_t *ctx = task->thread.pfm_context;
1883 int ret = 0;
1884
1885 DBprintk(("called for [%d]\n", task->pid));
1886
1887 /*
1888 * do it only once
1889 */
1890 if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0;
1891
1892 /*
1893 * Even on SMP, we do not need to use an atomic here because
1894 * the only way in is via ptrace() and this is possible only when the
1895 * process is stopped. Even in the case where the ctxsw out is not totally
1896 * completed by the time we come here, there is no way the 'stopped' process
1897 * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine.
1898 * So this is always safe.
1899 */
1900 if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1;
1901
1902 LOCK_PFS();
1903
1904 /*
1905 * We cannot allow setting breakpoints when system wide monitoring
1906 * sessions are using the debug registers.
1907 */
1908 if (pfm_sessions.pfs_sys_use_dbregs> 0)
1909 ret = -1;
1910 else
1911 pfm_sessions.pfs_ptrace_use_dbregs++;
1912
1913 DBprintk(("ptrace_use_dbregs=%u sys_use_dbregs=%u by [%d] ret = %d\n",
1914 pfm_sessions.pfs_ptrace_use_dbregs,
1915 pfm_sessions.pfs_sys_use_dbregs,
1916 task->pid, ret));
1917
1918 UNLOCK_PFS();
1919
1920 return ret;
1921 }
1922
1923 /*
1924 * This function is called for every task that exits with the
1925 * IA64_THREAD_DBG_VALID set. This indicates a task which was
1926 * able to use the debug registers for debugging purposes via
1927 * ptrace(). Therefore we know it was not using them for
1928 * perfmormance monitoring, so we only decrement the number
1929 * of "ptraced" debug register users to keep the count up to date
1930 */
1931 int
pfm_release_debug_registers(struct task_struct * task)1932 pfm_release_debug_registers(struct task_struct *task)
1933 {
1934 int ret;
1935
1936 LOCK_PFS();
1937 if (pfm_sessions.pfs_ptrace_use_dbregs == 0) {
1938 printk(KERN_DEBUG "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n",
1939 task->pid);
1940 ret = -1;
1941 } else {
1942 pfm_sessions.pfs_ptrace_use_dbregs--;
1943 ret = 0;
1944 }
1945 UNLOCK_PFS();
1946
1947 return ret;
1948 }
1949 #else /* PFM_PMU_USES_DBR is true */
1950 /*
1951 * in case, the PMU does not use the debug registers, these two functions are nops.
1952 * The first function is called from arch/ia64/kernel/ptrace.c.
1953 * The second function is called from arch/ia64/kernel/process.c.
1954 */
1955 int
pfm_use_debug_registers(struct task_struct * task)1956 pfm_use_debug_registers(struct task_struct *task)
1957 {
1958 return 0;
1959 }
1960
1961 int
pfm_release_debug_registers(struct task_struct * task)1962 pfm_release_debug_registers(struct task_struct *task)
1963 {
1964 return 0;
1965 }
1966 #endif /* PFM_PMU_USES_DBR */
1967
1968 static int
pfm_restart(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)1969 pfm_restart(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
1970 struct pt_regs *regs)
1971 {
1972 void *sem = &ctx->ctx_restart_sem;
1973
1974 /*
1975 * Cannot do anything before PMU is enabled
1976 */
1977 if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
1978
1979 if (task == current) {
1980 DBprintk(("restarting self %d frozen=%d ovfl_regs=0x%lx\n",
1981 task->pid,
1982 ctx->ctx_fl_frozen,
1983 ctx->ctx_ovfl_regs[0]));
1984
1985 pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
1986
1987 ctx->ctx_ovfl_regs[0] = 0UL;
1988
1989 /*
1990 * We ignore block/don't block because we never block
1991 * for a self-monitoring process.
1992 */
1993 ctx->ctx_fl_frozen = 0;
1994
1995 if (CTX_HAS_SMPL(ctx)) {
1996 ctx->ctx_psb->psb_hdr->hdr_count = 0;
1997 ctx->ctx_psb->psb_index = 0;
1998 }
1999
2000 /* simply unfreeze */
2001 pfm_unfreeze_pmu();
2002
2003 return 0;
2004 }
2005 /* restart on another task */
2006
2007 /*
2008 * if blocking, then post the semaphore.
2009 * if non-blocking, then we ensure that the task will go into
2010 * pfm_overflow_must_block() before returning to user mode.
2011 * We cannot explicitely reset another task, it MUST always
2012 * be done by the task itself. This works for system wide because
2013 * the tool that is controlling the session is doing "self-monitoring".
2014 *
2015 * XXX: what if the task never goes back to user?
2016 *
2017 */
2018 if (CTX_OVFL_NOBLOCK(ctx) == 0) {
2019 DBprintk(("unblocking %d \n", task->pid));
2020 up(sem);
2021 } else {
2022 task->thread.pfm_ovfl_block_reset = 1;
2023 }
2024 #if 0
2025 /*
2026 * in case of non blocking mode, then it's just a matter of
2027 * of reseting the sampling buffer (if any) index. The PMU
2028 * is already active.
2029 */
2030
2031 /*
2032 * must reset the header count first
2033 */
2034 if (CTX_HAS_SMPL(ctx)) {
2035 DBprintk(("resetting sampling indexes for %d \n", task->pid));
2036 ctx->ctx_psb->psb_hdr->hdr_count = 0;
2037 ctx->ctx_psb->psb_index = 0;
2038 }
2039 #endif
2040 return 0;
2041 }
2042
2043 static int
pfm_stop(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2044 pfm_stop(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2045 struct pt_regs *regs)
2046 {
2047 /* we don't quite support this right now */
2048 if (task != current) return -EINVAL;
2049
2050 /*
2051 * Cannot do anything before PMU is enabled
2052 */
2053 if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
2054
2055 DBprintk(("[%d] fl_system=%d owner=%p current=%p\n",
2056 current->pid,
2057 ctx->ctx_fl_system, PMU_OWNER(),
2058 current));
2059
2060 /* simply stop monitoring but not the PMU */
2061 if (ctx->ctx_fl_system) {
2062
2063 /* disable dcr pp */
2064 ia64_set_dcr(ia64_get_dcr() & ~IA64_DCR_PP);
2065
2066 /* stop monitoring */
2067 pfm_clear_psr_pp();
2068 ia64_srlz_i();
2069
2070 PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
2071
2072 ia64_psr(regs)->pp = 0;
2073
2074 } else {
2075
2076 /* stop monitoring */
2077 pfm_clear_psr_up();
2078 ia64_srlz_i();
2079
2080 /*
2081 * clear user level psr.up
2082 */
2083 ia64_psr(regs)->up = 0;
2084 }
2085 return 0;
2086 }
2087
2088 static int
pfm_disable(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2089 pfm_disable(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2090 struct pt_regs *regs)
2091 {
2092 /* we don't quite support this right now */
2093 if (task != current) return -EINVAL;
2094
2095 if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
2096
2097 /*
2098 * stop monitoring, freeze PMU, and save state in context
2099 * this call will clear IA64_THREAD_PM_VALID for per-task sessions.
2100 */
2101 pfm_flush_regs(task);
2102
2103 if (ctx->ctx_fl_system) {
2104 ia64_psr(regs)->pp = 0;
2105 } else {
2106 ia64_psr(regs)->up = 0;
2107 }
2108 /*
2109 * goes back to default behavior: no user level control
2110 * no need to change live psr.sp because useless at the kernel level
2111 */
2112 ia64_psr(regs)->sp = 1;
2113
2114 DBprintk(("enabling psr.sp for [%d]\n", current->pid));
2115
2116 ctx->ctx_flags.state = PFM_CTX_DISABLED;
2117
2118 return 0;
2119 }
2120
2121 static int
pfm_context_destroy(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2122 pfm_context_destroy(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2123 struct pt_regs *regs)
2124 {
2125 /* we don't quite support this right now */
2126 if (task != current) return -EINVAL;
2127
2128 /*
2129 * if context was never enabled, then there is not much
2130 * to do
2131 */
2132 if (!CTX_IS_ENABLED(ctx)) goto skipped_stop;
2133
2134 /*
2135 * Disable context: stop monitoring, flush regs to software state (useless here),
2136 * and freeze PMU
2137 *
2138 * The IA64_THREAD_PM_VALID is cleared by pfm_flush_regs() called from pfm_disable()
2139 */
2140 pfm_disable(task, ctx, arg, count, regs);
2141
2142 if (ctx->ctx_fl_system) {
2143 ia64_psr(regs)->pp = 0;
2144 } else {
2145 ia64_psr(regs)->up = 0;
2146 }
2147
2148 skipped_stop:
2149 /*
2150 * remove sampling buffer mapping, if any
2151 */
2152 if (ctx->ctx_smpl_vaddr) {
2153 pfm_remove_smpl_mapping(task);
2154 ctx->ctx_smpl_vaddr = 0UL;
2155 }
2156 /* now free context and related state */
2157 pfm_context_exit(task);
2158
2159 return 0;
2160 }
2161
2162 /*
2163 * does nothing at the moment
2164 */
2165 static int
pfm_context_unprotect(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2166 pfm_context_unprotect(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2167 struct pt_regs *regs)
2168 {
2169 return 0;
2170 }
2171
2172 static int
pfm_protect_context(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2173 pfm_protect_context(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2174 struct pt_regs *regs)
2175 {
2176 /*
2177 * from now on, only the creator of the context has access to it
2178 */
2179 ctx->ctx_fl_protected = 1;
2180
2181 /*
2182 * reinforce secure monitoring: cannot toggle psr.up
2183 */
2184 if (ctx->ctx_fl_unsecure == 0) ia64_psr(regs)->sp = 1;
2185
2186 DBprintk(("[%d] protected psr.sp=%d\n", task->pid, ia64_psr(regs)->sp));
2187
2188 return 0;
2189 }
2190
2191 static int
pfm_debug(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2192 pfm_debug(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2193 struct pt_regs *regs)
2194 {
2195 unsigned int mode = *(unsigned int *)arg;
2196
2197 pfm_sysctl.debug = mode == 0 ? 0 : 1;
2198
2199 printk(KERN_INFO "perfmon debugging %s\n", pfm_sysctl.debug ? "on" : "off");
2200
2201 return 0;
2202 }
2203
2204 #ifdef PFM_PMU_USES_DBR
2205
2206 typedef struct {
2207 unsigned long ibr_mask:56;
2208 unsigned long ibr_plm:4;
2209 unsigned long ibr_ig:3;
2210 unsigned long ibr_x:1;
2211 } ibr_mask_reg_t;
2212
2213 typedef struct {
2214 unsigned long dbr_mask:56;
2215 unsigned long dbr_plm:4;
2216 unsigned long dbr_ig:2;
2217 unsigned long dbr_w:1;
2218 unsigned long dbr_r:1;
2219 } dbr_mask_reg_t;
2220
2221 typedef union {
2222 unsigned long val;
2223 ibr_mask_reg_t ibr;
2224 dbr_mask_reg_t dbr;
2225 } dbreg_t;
2226
2227
2228 static int
pfm_write_ibr_dbr(int mode,struct task_struct * task,void * arg,int count,struct pt_regs * regs)2229 pfm_write_ibr_dbr(int mode, struct task_struct *task, void *arg, int count, struct pt_regs *regs)
2230 {
2231 struct thread_struct *thread = &task->thread;
2232 pfm_context_t *ctx = task->thread.pfm_context;
2233 pfarg_dbreg_t tmp, *req = (pfarg_dbreg_t *)arg;
2234 dbreg_t dbreg;
2235 unsigned int rnum;
2236 int first_time;
2237 int i, ret = 0;
2238
2239 /*
2240 * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w
2241 * ensuring that no real breakpoint can be installed via this call.
2242 */
2243
2244 first_time = ctx->ctx_fl_using_dbreg == 0;
2245
2246 /*
2247 * check for debug registers in system wide mode
2248 *
2249 */
2250 LOCK_PFS();
2251 if (ctx->ctx_fl_system && first_time) {
2252 if (pfm_sessions.pfs_ptrace_use_dbregs)
2253 ret = -EBUSY;
2254 else
2255 pfm_sessions.pfs_sys_use_dbregs++;
2256 }
2257 UNLOCK_PFS();
2258
2259 if (ret != 0) return ret;
2260
2261 if (ctx->ctx_fl_system) {
2262 /* we mark ourselves as owner of the debug registers */
2263 ctx->ctx_fl_using_dbreg = 1;
2264 DBprintk(("system-wide setting fl_using_dbreg for [%d]\n", task->pid));
2265 } else if (first_time) {
2266 ret= -EBUSY;
2267 if ((thread->flags & IA64_THREAD_DBG_VALID) != 0) {
2268 DBprintk(("debug registers already in use for [%d]\n", task->pid));
2269 goto abort_mission;
2270 }
2271 /* we mark ourselves as owner of the debug registers */
2272 ctx->ctx_fl_using_dbreg = 1;
2273
2274 DBprintk(("setting fl_using_dbreg for [%d]\n", task->pid));
2275 /*
2276 * Given debug registers cannot be used for both debugging
2277 * and performance monitoring at the same time, we reuse
2278 * the storage area to save and restore the registers on ctxsw.
2279 */
2280 memset(task->thread.dbr, 0, sizeof(task->thread.dbr));
2281 memset(task->thread.ibr, 0, sizeof(task->thread.ibr));
2282 }
2283
2284 if (first_time) {
2285 DBprintk(("[%d] clearing ibrs,dbrs\n", task->pid));
2286 /*
2287 * clear hardware registers to make sure we don't
2288 * pick up stale state.
2289 *
2290 * for a system wide session, we do not use
2291 * thread.dbr, thread.ibr because this process
2292 * never leaves the current CPU and the state
2293 * is shared by all processes running on it
2294 */
2295 for (i=0; i < pmu_conf.num_ibrs; i++) {
2296 ia64_set_ibr(i, 0UL);
2297 }
2298 ia64_srlz_i();
2299 for (i=0; i < pmu_conf.num_dbrs; i++) {
2300 ia64_set_dbr(i, 0UL);
2301 }
2302 ia64_srlz_d();
2303 }
2304
2305 ret = -EFAULT;
2306
2307 /*
2308 * Now install the values into the registers
2309 */
2310 for (i = 0; i < count; i++, req++) {
2311
2312 if (__copy_from_user(&tmp, req, sizeof(tmp))) goto abort_mission;
2313
2314 rnum = tmp.dbreg_num;
2315 dbreg.val = tmp.dbreg_value;
2316
2317 ret = -EINVAL;
2318
2319 if ((mode == 0 && !IBR_IS_IMPL(rnum)) || ((mode == 1) && !DBR_IS_IMPL(rnum))) {
2320 DBprintk(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n",
2321 rnum, dbreg.val, mode, i, count));
2322
2323 goto abort_mission;
2324 }
2325
2326 /*
2327 * make sure we do not install enabled breakpoint
2328 */
2329 if (rnum & 0x1) {
2330 if (mode == 0)
2331 dbreg.ibr.ibr_x = 0;
2332 else
2333 dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0;
2334 }
2335
2336 /*
2337 * clear return flags and copy back to user
2338 *
2339 * XXX: fix once EAGAIN is implemented
2340 */
2341 ret = -EFAULT;
2342
2343 PFM_REG_RETFLAG_SET(tmp.dbreg_flags, 0);
2344
2345 if (__copy_to_user(req, &tmp, sizeof(tmp))) goto abort_mission;
2346
2347 /*
2348 * Debug registers, just like PMC, can only be modified
2349 * by a kernel call. Moreover, perfmon() access to those
2350 * registers are centralized in this routine. The hardware
2351 * does not modify the value of these registers, therefore,
2352 * if we save them as they are written, we can avoid having
2353 * to save them on context switch out. This is made possible
2354 * by the fact that when perfmon uses debug registers, ptrace()
2355 * won't be able to modify them concurrently.
2356 */
2357 if (mode == 0) {
2358 CTX_USED_IBR(ctx, rnum);
2359
2360 ia64_set_ibr(rnum, dbreg.val);
2361 ia64_srlz_i();
2362
2363 thread->ibr[rnum] = dbreg.val;
2364
2365 DBprintk(("write ibr%u=0x%lx used_ibrs=0x%lx\n", rnum, dbreg.val, ctx->ctx_used_ibrs[0]));
2366 } else {
2367 CTX_USED_DBR(ctx, rnum);
2368
2369 ia64_set_dbr(rnum, dbreg.val);
2370 ia64_srlz_d();
2371
2372 thread->dbr[rnum] = dbreg.val;
2373
2374 DBprintk(("write dbr%u=0x%lx used_dbrs=0x%lx\n", rnum, dbreg.val, ctx->ctx_used_dbrs[0]));
2375 }
2376 }
2377
2378 return 0;
2379
2380 abort_mission:
2381 /*
2382 * in case it was our first attempt, we undo the global modifications
2383 */
2384 if (first_time) {
2385 LOCK_PFS();
2386 if (ctx->ctx_fl_system) {
2387 pfm_sessions.pfs_sys_use_dbregs--;
2388 }
2389 UNLOCK_PFS();
2390 ctx->ctx_fl_using_dbreg = 0;
2391 }
2392 /*
2393 * install error return flag
2394 */
2395 if (ret != -EFAULT) {
2396 /*
2397 * XXX: for now we can only come here on EINVAL
2398 */
2399 PFM_REG_RETFLAG_SET(tmp.dbreg_flags, PFM_REG_RETFL_EINVAL);
2400 if (__put_user(tmp.dbreg_flags, &req->dbreg_flags)) ret = -EFAULT;
2401 }
2402 return ret;
2403 }
2404
2405 static int
pfm_write_ibrs(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2406 pfm_write_ibrs(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2407 struct pt_regs *regs)
2408 {
2409 /* we don't quite support this right now */
2410 if (task != current) return -EINVAL;
2411
2412 if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
2413
2414 return pfm_write_ibr_dbr(0, task, arg, count, regs);
2415 }
2416
2417 static int
pfm_write_dbrs(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2418 pfm_write_dbrs(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2419 struct pt_regs *regs)
2420 {
2421 /* we don't quite support this right now */
2422 if (task != current) return -EINVAL;
2423
2424 if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
2425
2426 return pfm_write_ibr_dbr(1, task, arg, count, regs);
2427 }
2428
2429 #endif /* PFM_PMU_USES_DBR */
2430
2431 static int
pfm_get_features(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2432 pfm_get_features(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
2433 {
2434 pfarg_features_t tmp;
2435
2436 memset(&tmp, 0, sizeof(tmp));
2437
2438 tmp.ft_version = PFM_VERSION;
2439 tmp.ft_smpl_version = PFM_SMPL_VERSION;
2440
2441 if (__copy_to_user(arg, &tmp, sizeof(tmp))) return -EFAULT;
2442
2443 return 0;
2444 }
2445
2446 static int
pfm_start(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2447 pfm_start(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2448 struct pt_regs *regs)
2449 {
2450 /* we don't quite support this right now */
2451 if (task != current) return -EINVAL;
2452
2453 /*
2454 * Cannot do anything before PMU is enabled
2455 */
2456 if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
2457
2458 DBprintk(("[%d] fl_system=%d owner=%p current=%p\n",
2459 current->pid,
2460 ctx->ctx_fl_system, PMU_OWNER(),
2461 current));
2462
2463 if (PMU_OWNER() != task) {
2464 printk(KERN_DEBUG "perfmon: pfm_start task [%d] not pmu owner\n", task->pid);
2465 return -EINVAL;
2466 }
2467
2468 if (ctx->ctx_fl_system) {
2469
2470 PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP);
2471
2472 /* set user level psr.pp */
2473 ia64_psr(regs)->pp = 1;
2474
2475 /* start monitoring at kernel level */
2476 pfm_set_psr_pp();
2477
2478 /* enable dcr pp */
2479 ia64_set_dcr(ia64_get_dcr()|IA64_DCR_PP);
2480
2481 ia64_srlz_i();
2482
2483 } else {
2484 if ((task->thread.flags & IA64_THREAD_PM_VALID) == 0) {
2485 printk(KERN_DEBUG "perfmon: pfm_start task flag not set for [%d]\n",
2486 task->pid);
2487 return -EINVAL;
2488 }
2489 /* set user level psr.up */
2490 ia64_psr(regs)->up = 1;
2491
2492 /* start monitoring at kernel level */
2493 pfm_set_psr_up();
2494
2495 ia64_srlz_i();
2496 }
2497
2498 return 0;
2499 }
2500
2501 static int
pfm_enable(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2502 pfm_enable(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2503 struct pt_regs *regs)
2504 {
2505 /* we don't quite support this right now */
2506 if (task != current) {
2507 DBprintk(("task [%d] != current [%d]\n", task->pid, current->pid));
2508 return -EINVAL;
2509 }
2510
2511 #ifndef CONFIG_SMP
2512 if (ctx->ctx_fl_system == 0 && PMU_OWNER() && PMU_OWNER() != current)
2513 pfm_lazy_save_regs(PMU_OWNER());
2514 #endif
2515
2516 /* reset all registers to stable quiet state */
2517 pfm_reset_pmu(task);
2518
2519 /* make sure nothing starts */
2520 if (ctx->ctx_fl_system) {
2521 ia64_psr(regs)->pp = 0;
2522 ia64_psr(regs)->up = 0; /* just to make sure! */
2523
2524 /* make sure monitoring is stopped */
2525 pfm_clear_psr_pp();
2526 ia64_srlz_i();
2527
2528 PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
2529 PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE);
2530 if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE);
2531 } else {
2532 /*
2533 * needed in case the task was a passive task during
2534 * a system wide session and now wants to have its own
2535 * session
2536 */
2537 ia64_psr(regs)->pp = 0; /* just to make sure! */
2538 ia64_psr(regs)->up = 0;
2539
2540 /* make sure monitoring is stopped */
2541 pfm_clear_psr_up();
2542 ia64_srlz_i();
2543
2544 DBprintk(("clearing psr.sp for [%d]\n", current->pid));
2545
2546 /* allow user level control */
2547 ia64_psr(regs)->sp = 0;
2548
2549 /* PMU state will be saved/restored on ctxsw */
2550 task->thread.flags |= IA64_THREAD_PM_VALID;
2551 }
2552
2553 SET_PMU_OWNER(task);
2554
2555 ctx->ctx_flags.state = PFM_CTX_ENABLED;
2556 SET_LAST_CPU(ctx, smp_processor_id());
2557 INC_ACTIVATION();
2558 SET_ACTIVATION(ctx);
2559
2560 /* simply unfreeze */
2561 pfm_unfreeze_pmu();
2562
2563 return 0;
2564 }
2565
2566 static int
pfm_get_pmc_reset(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2567 pfm_get_pmc_reset(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2568 struct pt_regs *regs)
2569 {
2570 pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
2571 unsigned int cnum;
2572 int i, ret = -EINVAL;
2573
2574 for (i = 0; i < count; i++, req++) {
2575
2576 if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
2577
2578 cnum = tmp.reg_num;
2579
2580 if (!PMC_IS_IMPL(cnum)) goto abort_mission;
2581
2582 tmp.reg_value = PMC_DFL_VAL(cnum);
2583
2584 PFM_REG_RETFLAG_SET(tmp.reg_flags, 0);
2585
2586 DBprintk(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, tmp.reg_value));
2587
2588 if (__copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT;
2589 }
2590 return 0;
2591 abort_mission:
2592 PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL);
2593 if (__copy_to_user(req, &tmp, sizeof(tmp))) ret = -EFAULT;
2594
2595 return ret;
2596 }
2597
2598 /*
2599 * functions MUST be listed in the increasing order of their index (see permfon.h)
2600 */
2601 static pfm_cmd_desc_t pfm_cmd_tab[]={
2602 /* 0 */{ NULL, 0, 0, 0}, /* not used */
2603 /* 1 */{ pfm_write_pmcs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
2604 /* 2 */{ pfm_write_pmds, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
2605 /* 3 */{ pfm_read_pmds,PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
2606 /* 4 */{ pfm_stop, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
2607 /* 5 */{ pfm_start, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
2608 /* 6 */{ pfm_enable, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
2609 /* 7 */{ pfm_disable, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
2610 /* 8 */{ pfm_context_create, PFM_CMD_PID|PFM_CMD_ARG_RW, 1, sizeof(pfarg_context_t)},
2611 /* 9 */{ pfm_context_destroy, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
2612 /* 10 */{ pfm_restart, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_NOCHK, 0, 0},
2613 /* 11 */{ pfm_protect_context, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
2614 /* 12 */{ pfm_get_features, PFM_CMD_ARG_RW, 0, 0},
2615 /* 13 */{ pfm_debug, 0, 1, sizeof(unsigned int)},
2616 /* 14 */{ pfm_context_unprotect, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
2617 /* 15 */{ pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
2618 /* 16 */{ NULL, 0, 0, 0}, /* not used */
2619 /* 17 */{ NULL, 0, 0, 0}, /* not used */
2620 /* 18 */{ NULL, 0, 0, 0}, /* not used */
2621 /* 19 */{ NULL, 0, 0, 0}, /* not used */
2622 /* 20 */{ NULL, 0, 0, 0}, /* not used */
2623 /* 21 */{ NULL, 0, 0, 0}, /* not used */
2624 /* 22 */{ NULL, 0, 0, 0}, /* not used */
2625 /* 23 */{ NULL, 0, 0, 0}, /* not used */
2626 /* 24 */{ NULL, 0, 0, 0}, /* not used */
2627 /* 25 */{ NULL, 0, 0, 0}, /* not used */
2628 /* 26 */{ NULL, 0, 0, 0}, /* not used */
2629 /* 27 */{ NULL, 0, 0, 0}, /* not used */
2630 /* 28 */{ NULL, 0, 0, 0}, /* not used */
2631 /* 29 */{ NULL, 0, 0, 0}, /* not used */
2632 /* 30 */{ NULL, 0, 0, 0}, /* not used */
2633 /* 31 */{ NULL, 0, 0, 0}, /* not used */
2634 #ifdef PFM_PMU_USES_DBR
2635 /* 32 */{ pfm_write_ibrs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_dbreg_t)},
2636 /* 33 */{ pfm_write_dbrs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_dbreg_t)}
2637 #endif
2638 };
2639 #define PFM_CMD_COUNT (sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t))
2640
2641 static int
check_task_state(struct task_struct * task)2642 check_task_state(struct task_struct *task)
2643 {
2644 int ret = 0;
2645 #ifdef CONFIG_SMP
2646 /* We must wait until the state has been completely
2647 * saved. There can be situations where the reader arrives before
2648 * after the task is marked as STOPPED but before pfm_save_regs()
2649 * is completed.
2650 */
2651 for (;;) {
2652
2653 task_lock(task);
2654 DBprintk((" [%d] state=%ld\n", task->pid, task->state));
2655 if (!task_has_cpu(task)) break;
2656 task_unlock(task);
2657
2658 do {
2659 if (task->state != TASK_ZOMBIE && task->state != TASK_STOPPED) {
2660 DBprintk(("warning [%d] not in stable state %ld\n", task->pid, task->state));
2661 return -EBUSY;
2662 }
2663 barrier();
2664 cpu_relax();
2665 } while (task_has_cpu(task));
2666 }
2667 task_unlock(task);
2668 #else
2669 if (task->state != TASK_ZOMBIE && task->state != TASK_STOPPED) {
2670 DBprintk(("warning [%d] not in stable state %ld\n", task->pid, task->state));
2671 ret = -EBUSY;
2672 }
2673 #endif
2674 return ret;
2675 }
2676
2677 asmlinkage long
sys_perfmonctl(pid_t pid,int cmd,void * arg,int count,long arg5,long arg6,long arg7,long arg8,long stack)2678 sys_perfmonctl (pid_t pid, int cmd, void *arg, int count, long arg5, long arg6, long arg7,
2679 long arg8, long stack)
2680 {
2681 struct pt_regs *regs = (struct pt_regs *)&stack;
2682 struct task_struct *task = current;
2683 pfm_context_t *ctx;
2684 size_t sz;
2685 long ret;
2686 int narg;
2687
2688 /*
2689 * reject any call if perfmon was disabled at initialization time
2690 */
2691 if (PFM_IS_DISABLED()) return -ENOSYS;
2692
2693 DBprintk(("cmd=%d idx=%d valid=%d narg=0x%x\n", cmd, PFM_CMD_IDX(cmd),
2694 PFM_CMD_IS_VALID(cmd), PFM_CMD_NARG(cmd)));
2695
2696 if (PFM_CMD_IS_VALID(cmd) == 0) return -EINVAL;
2697
2698 /* ingore arguments when command has none */
2699 narg = PFM_CMD_NARG(cmd);
2700 if ((narg == PFM_CMD_ARG_MANY && count == 0) || (narg > 0 && narg != count)) return -EINVAL;
2701
2702 sz = PFM_CMD_ARG_SIZE(cmd);
2703
2704 if (PFM_CMD_READ_ARG(cmd) && !access_ok(VERIFY_READ, arg, sz*count)) return -EFAULT;
2705
2706 if (PFM_CMD_RW_ARG(cmd) && !access_ok(VERIFY_WRITE, arg, sz*count)) return -EFAULT;
2707
2708 if (PFM_CMD_USE_PID(cmd)) {
2709 /*
2710 * XXX: may need to fine tune this one
2711 */
2712 if (pid < 2) return -EPERM;
2713
2714 if (pid != current->pid) {
2715
2716 ret = -ESRCH;
2717
2718 read_lock(&tasklist_lock);
2719
2720 task = find_task_by_pid(pid);
2721
2722 if (!task) goto abort_call;
2723
2724 ret = -EPERM;
2725
2726 if (pfm_bad_permissions(task)) goto abort_call;
2727
2728 if (PFM_CMD_CHK(cmd)) {
2729 ret = check_task_state(task);
2730 if (ret != 0) {
2731 DBprintk(("check_task_state=%ld for [%d]\n", ret, task->pid));
2732 goto abort_call;
2733 }
2734 }
2735 }
2736 }
2737
2738 ctx = PFM_GET_CTX(task);
2739
2740 if (PFM_CMD_USE_CTX(cmd)) {
2741 ret = -EINVAL;
2742 if (ctx == NULL) {
2743 DBprintk(("no context for task %d\n", task->pid));
2744 goto abort_call;
2745 }
2746
2747
2748 ret = -EPERM;
2749 /*
2750 * we only grant access to the context if:
2751 * - the caller is the creator of the context (ctx_owner)
2752 * OR - the context is attached to the caller AND The context IS NOT
2753 * in protected mode
2754 */
2755 if (ctx->ctx_owner != current && (ctx->ctx_fl_protected || task != current)) {
2756 DBprintk(("context protected, no access for [%d]\n", task->pid));
2757 goto abort_call;
2758 }
2759 }
2760
2761 ret = (*pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_func)(task, ctx, arg, count, regs);
2762
2763 abort_call:
2764 if (task != current) read_unlock(&tasklist_lock);
2765
2766 return ret;
2767 }
2768
2769 void asmlinkage
pfm_ovfl_block_reset(u64 arg0,u64 arg1,u64 arg2,u64 arg3,u64 arg4,u64 arg5,u64 arg6,u64 arg7,long info)2770 pfm_ovfl_block_reset(u64 arg0, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5,
2771 u64 arg6, u64 arg7, long info)
2772 {
2773 struct thread_struct *th = ¤t->thread;
2774 pfm_context_t *ctx = current->thread.pfm_context;
2775 int ret;
2776
2777 /*
2778 * clear the flag, to make sure we won't get here
2779 * again
2780 */
2781 th->pfm_ovfl_block_reset = 0;
2782
2783 /*
2784 * do some sanity checks first
2785 */
2786 if (!ctx) {
2787 printk(KERN_DEBUG "perfmon: [%d] has no PFM context\n", current->pid);
2788 return;
2789 }
2790
2791 if (CTX_OVFL_NOBLOCK(ctx)) goto non_blocking;
2792
2793 DBprintk(("[%d] before sleeping\n", current->pid));
2794
2795 /*
2796 * may go through without blocking on SMP systems
2797 * if restart has been received already by the time we call down()
2798 */
2799 ret = down_interruptible(&ctx->ctx_restart_sem);
2800
2801 DBprintk(("[%d] after sleeping ret=%d\n", current->pid, ret));
2802
2803 /*
2804 * in case of interruption of down() we don't restart anything
2805 */
2806 if (ret >= 0) {
2807
2808 non_blocking:
2809 /* we reactivate on context switch */
2810 ctx->ctx_fl_frozen = 0;
2811 /*
2812 * the ovfl_sem is cleared by the restart task and this is safe because we always
2813 * use the local reference
2814 */
2815
2816 pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
2817
2818 ctx->ctx_ovfl_regs[0] = 0UL;
2819
2820 /*
2821 * Unlock sampling buffer and reset index atomically
2822 * XXX: not really needed when blocking
2823 */
2824 if (CTX_HAS_SMPL(ctx)) {
2825 ctx->ctx_psb->psb_hdr->hdr_count = 0;
2826 ctx->ctx_psb->psb_index = 0;
2827 }
2828
2829 pfm_unfreeze_pmu();
2830
2831 /* state restored, can go back to work (user mode) */
2832 }
2833 }
2834
2835 /*
2836 * This function will record an entry in the sampling if it is not full already.
2837 * Input:
2838 * ovfl_mask: mask of overflowed PMD. MUST NEVER be 0.
2839 * Return:
2840 * 0 : buffer is not full (did not BECOME full: still space or was already full)
2841 * 1 : buffer is full (recorded the last entry)
2842 */
2843 static int
pfm_record_sample(struct task_struct * task,pfm_context_t * ctx,unsigned long ovfl_mask,struct pt_regs * regs)2844 pfm_record_sample(struct task_struct *task, pfm_context_t *ctx, unsigned long ovfl_mask, struct pt_regs *regs)
2845 {
2846 pfm_smpl_buffer_desc_t *psb = ctx->ctx_psb;
2847 unsigned long *e, m, idx;
2848 perfmon_smpl_entry_t *h;
2849 int j;
2850
2851 idx = ia64_fetch_and_add(1, &psb->psb_index);
2852 DBprintk_ovfl(("recording index=%ld entries=%ld\n", idx-1, psb->psb_entries));
2853
2854 /*
2855 * XXX: there is a small chance that we could run out on index before resetting
2856 * but index is unsigned long, so it will take some time.....
2857 * We use > instead of == because fetch_and_add() is off by one (see below)
2858 *
2859 * This case can happen in non-blocking mode or with multiple processes.
2860 * For non-blocking, we need to reload and continue.
2861 */
2862 if (idx > psb->psb_entries) return 0;
2863
2864 /* first entry is really entry 0, not 1 caused by fetch_and_add */
2865 idx--;
2866
2867 h = (perfmon_smpl_entry_t *)(((char *)psb->psb_addr) + idx*(psb->psb_entry_size));
2868
2869 /*
2870 * initialize entry header
2871 */
2872 h->pid = ctx->ctx_fl_system ? current->pid : task->pid;
2873 h->cpu = smp_processor_id();
2874 h->last_reset_value = ovfl_mask ? ctx->ctx_soft_pmds[ffz(~ovfl_mask)].lval : 0UL;
2875 h->ip = regs ? regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3): 0x0UL;
2876 h->regs = ovfl_mask; /* which registers overflowed */
2877
2878 /* guaranteed to monotonically increase on each cpu */
2879 h->stamp = pfm_get_stamp();
2880
2881 /* position for first pmd */
2882 e = (unsigned long *)(h+1);
2883
2884 /*
2885 * selectively store PMDs in increasing index number
2886 */
2887 m = ctx->ctx_smpl_regs[0];
2888 for (j=0; m; m >>=1, j++) {
2889
2890 if ((m & 0x1) == 0) continue;
2891
2892 if (PMD_IS_COUNTING(j)) {
2893 *e = pfm_read_soft_counter(ctx, j);
2894 } else {
2895 *e = ia64_get_pmd(j); /* slow */
2896 }
2897 DBprintk_ovfl(("e=%p pmd%d =0x%lx\n", (void *)e, j, *e));
2898 e++;
2899 }
2900 pfm_stats[smp_processor_id()].pfm_recorded_samples_count++;
2901
2902 /*
2903 * make the new entry visible to user, needs to be atomic
2904 */
2905 ia64_fetch_and_add(1, &psb->psb_hdr->hdr_count);
2906
2907 DBprintk_ovfl(("index=%ld entries=%ld hdr_count=%ld\n",
2908 idx, psb->psb_entries, psb->psb_hdr->hdr_count));
2909 /*
2910 * sampling buffer full ?
2911 */
2912 if (idx == (psb->psb_entries-1)) {
2913 DBprintk_ovfl(("sampling buffer full\n"));
2914 /*
2915 * XXX: must reset buffer in blocking mode and lost notified
2916 */
2917 pfm_stats[smp_processor_id()].pfm_full_smpl_buffer_count++;
2918 return 1;
2919 }
2920 return 0;
2921 }
2922
2923 /*
2924 * main overflow processing routine.
2925 * it can be called from the interrupt path or explicitely during the context switch code
2926 * Return:
2927 * new value of pmc[0]. if 0x0 then unfreeze, else keep frozen
2928 */
2929 static unsigned long
pfm_overflow_handler(struct task_struct * task,pfm_context_t * ctx,u64 pmc0,struct pt_regs * regs)2930 pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs)
2931 {
2932 unsigned long mask;
2933 struct thread_struct *t;
2934 unsigned long old_val;
2935 unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL;
2936 int i;
2937 int ret = 1;
2938 /*
2939 * It is never safe to access the task for which the overflow interrupt is destinated
2940 * using the current variable as the interrupt may occur in the middle of a context switch
2941 * where current does not hold the task that is running yet.
2942 *
2943 * For monitoring, however, we do need to get access to the task which caused the overflow
2944 * to account for overflow on the counters.
2945 *
2946 * We accomplish this by maintaining a current owner of the PMU per CPU. During context
2947 * switch the ownership is changed in a way such that the reflected owner is always the
2948 * valid one, i.e. the one that caused the interrupt.
2949 */
2950
2951 t = &task->thread;
2952
2953 /*
2954 * XXX: debug test
2955 * Don't think this could happen given upfront tests
2956 */
2957 if ((t->flags & IA64_THREAD_PM_VALID) == 0 && ctx->ctx_fl_system == 0) {
2958 printk(KERN_DEBUG "perfmon: Spurious overflow interrupt: process %d not "
2959 "using perfmon\n", task->pid);
2960 return 0x1;
2961 }
2962 /*
2963 * sanity test. Should never happen
2964 */
2965 if ((pmc0 & 0x1) == 0) {
2966 printk(KERN_DEBUG "perfmon: pid %d pmc0=0x%lx assumption error for freeze bit\n",
2967 task->pid, pmc0);
2968 return 0x0;
2969 }
2970
2971 mask = pmc0 >> PMU_FIRST_COUNTER;
2972
2973 DBprintk_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s"
2974 " mode used_pmds=0x%lx used_pmcs=0x%lx reload_pmcs=0x%lx\n",
2975 pmc0, task->pid, (regs ? regs->cr_iip : 0),
2976 CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking",
2977 ctx->ctx_used_pmds[0],
2978 ctx->ctx_used_pmcs[0],
2979 ctx->ctx_reload_pmcs[0]));
2980
2981 /*
2982 * First we update the virtual counters
2983 */
2984 for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) {
2985
2986 /* skip pmd which did not overflow */
2987 if ((mask & 0x1) == 0) continue;
2988
2989 DBprintk_ovfl(("pmd[%d] overflowed hw_pmd=0x%lx soft_pmd=0x%lx\n",
2990 i, ia64_get_pmd(i), ctx->ctx_soft_pmds[i].val));
2991
2992 /*
2993 * Note that the pmd is not necessarily 0 at this point as qualified events
2994 * may have happened before the PMU was frozen. The residual count is not
2995 * taken into consideration here but will be with any read of the pmd via
2996 * pfm_read_pmds().
2997 */
2998 old_val = ctx->ctx_soft_pmds[i].val;
2999 ctx->ctx_soft_pmds[i].val += 1 + pmu_conf.ovfl_val;
3000
3001 /*
3002 * check for overflow condition
3003 */
3004 if (old_val > ctx->ctx_soft_pmds[i].val) {
3005
3006 ovfl_pmds |= 1UL << i;
3007
3008 if (PMC_OVFL_NOTIFY(ctx, i)) {
3009 ovfl_notify |= 1UL << i;
3010 }
3011 } else {
3012 /*
3013 * clear top bits (maintain counts in lower part, may not always be zero)
3014 */
3015 ia64_set_pmd(i, ia64_get_pmd(i) & pmu_conf.ovfl_val);
3016 }
3017 DBprintk_ovfl(("soft_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n",
3018 i, ctx->ctx_soft_pmds[i].val, old_val,
3019 ia64_get_pmd(i) & pmu_conf.ovfl_val, ovfl_pmds, ovfl_notify));
3020 }
3021
3022 /*
3023 * check for sampling buffer
3024 *
3025 * if present, record sample only when a 64-bit counter has overflowed.
3026 * We propagate notification ONLY when buffer becomes full.
3027 */
3028 if(CTX_HAS_SMPL(ctx) && ovfl_pmds) {
3029 ret = pfm_record_sample(task, ctx, ovfl_pmds, regs);
3030 if (ret == 1) {
3031 /*
3032 * Sampling buffer became full
3033 * If no notication was requested, then we reset buffer index
3034 * and reset registers (done below) and resume.
3035 * If notification requested, then defer reset until pfm_restart()
3036 */
3037 if (ovfl_notify == 0UL) {
3038 ctx->ctx_psb->psb_hdr->hdr_count = 0UL;
3039 ctx->ctx_psb->psb_index = 0UL;
3040 }
3041 } else {
3042 /*
3043 * sample recorded in buffer, no need to notify user
3044 */
3045 ovfl_notify = 0UL;
3046 }
3047 }
3048
3049 /*
3050 * No overflow requiring a user level notification
3051 */
3052 if (ovfl_notify == 0UL) {
3053 if (ovfl_pmds)
3054 pfm_reset_regs(ctx, &ovfl_pmds, PFM_PMD_SHORT_RESET);
3055 return 0x0;
3056 }
3057
3058 /*
3059 * keep track of what to reset when unblocking
3060 */
3061 ctx->ctx_ovfl_regs[0] = ovfl_pmds;
3062
3063 /*
3064 * As a consequence of the overflow, we always resume
3065 * with monitoring turned off. pfm_restart() will
3066 * reactivate.
3067 */
3068 ctx->ctx_fl_frozen = 1;
3069
3070 /*
3071 * we have come to this point because there was an overflow and that notification
3072 * was requested. The notify_task may have disappeared, in which case notify_task
3073 * is NULL.
3074 */
3075 LOCK_CTX(ctx);
3076
3077 if (ctx->ctx_notify_task) {
3078 if (CTX_OVFL_NOBLOCK(ctx) == 0 && ctx->ctx_notify_task != task) {
3079 t->pfm_ovfl_block_reset = 1; /* will cause blocking */
3080 } else {
3081 t->pfm_ovfl_block_reset = 0;
3082 }
3083
3084 DBprintk_ovfl(("[%d] scheduling tasklet\n", current->pid));
3085
3086 /*
3087 * the tasklet is responsible for sending the notification
3088 * not the PMU owner nor the current task.
3089 */
3090 tasklet_schedule(&ctx->ctx_tasklet);
3091
3092 } else {
3093 DBprintk_ovfl(("notification task has disappeared !\n"));
3094 t->pfm_ovfl_block_reset = 0;
3095 }
3096
3097 UNLOCK_CTX(ctx);
3098
3099 DBprintk_ovfl(("return pmc0=0x%x must_block=%ld\n",
3100 ctx->ctx_fl_frozen ? 0x1 : 0x0, t->pfm_ovfl_block_reset));
3101
3102 return ctx->ctx_fl_frozen ? 0x1 : 0x0;
3103 }
3104
3105 static void
pfm_interrupt_handler(int irq,void * arg,struct pt_regs * regs)3106 pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
3107 {
3108 u64 pmc0;
3109 struct task_struct *task;
3110 pfm_context_t *ctx;
3111
3112 pfm_stats[smp_processor_id()].pfm_ovfl_intr_count++;
3113
3114 /*
3115 * if an alternate handler is registered, just bypass the default one
3116 */
3117 if (pfm_alternate_intr_handler) {
3118 (*pfm_alternate_intr_handler->handler)(irq, arg, regs);
3119 return;
3120 }
3121
3122 /*
3123 * srlz.d done before arriving here
3124 *
3125 * This is slow
3126 */
3127 pmc0 = ia64_get_pmc(0);
3128 task = PMU_OWNER();
3129 /*
3130 * if we have some pending bits set
3131 * assumes : if any PM[0].bit[63-1] is set, then PMC[0].fr = 1
3132 */
3133 if (PMC0_HAS_OVFL(pmc0) && task) {
3134 /*
3135 * we assume that pmc0.fr is always set here
3136 */
3137 ctx = PFM_GET_CTX(task);
3138
3139 /* sanity check */
3140 if (!ctx) {
3141 printk(KERN_DEBUG "perfmon: Spurious overflow interrupt: process %d has "
3142 "no PFM context\n", task->pid);
3143 return;
3144 }
3145 /*
3146 * assume PMC[0].fr = 1 at this point
3147 */
3148 pmc0 = pfm_overflow_handler(task, ctx, pmc0, regs);
3149
3150 /*
3151 * we can only update pmc0 when the overflow
3152 * is for the current context or we are in system
3153 * wide mode. In UP (per-task) the current
3154 * task may not be the one owning the PMU,
3155 * same thing for system-wide.
3156 */
3157 if (task == current || ctx->ctx_fl_system) {
3158 /*
3159 * We always clear the overflow status bits and either unfreeze
3160 * or keep the PMU frozen.
3161 */
3162 ia64_set_pmc(0, pmc0);
3163 ia64_srlz_d();
3164 } else {
3165 task->thread.pmc[0] = pmc0;
3166 }
3167 } else {
3168 pfm_stats[smp_processor_id()].pfm_spurious_ovfl_intr_count++;
3169 }
3170 }
3171
3172 #define PFM_PROC_SHOW_HEADER ((void *)NR_CPUS+1)
3173
3174 static void *
pfm_proc_start(struct seq_file * m,loff_t * pos)3175 pfm_proc_start(struct seq_file *m, loff_t *pos)
3176 {
3177 if (*pos == 0) {
3178 return PFM_PROC_SHOW_HEADER;
3179 }
3180
3181 while (*pos <= NR_CPUS) {
3182 if (cpu_online(*pos - 1)) {
3183 return (void *)*pos;
3184 }
3185 ++*pos;
3186 }
3187 return NULL;
3188 }
3189
3190 static void *
pfm_proc_next(struct seq_file * m,void * v,loff_t * pos)3191 pfm_proc_next(struct seq_file *m, void *v, loff_t *pos)
3192 {
3193 ++*pos;
3194 return pfm_proc_start(m, pos);
3195 }
3196
3197 static void
pfm_proc_stop(struct seq_file * m,void * v)3198 pfm_proc_stop(struct seq_file *m, void *v)
3199 {
3200 }
3201
3202 static void
pfm_proc_show_header(struct seq_file * m)3203 pfm_proc_show_header(struct seq_file *m)
3204 {
3205 seq_printf(m,
3206 "perfmon version : %u.%u\n"
3207 "fastctxsw : %s\n"
3208 "ovfl_mask : 0x%lx\n",
3209 PFM_VERSION_MAJ, PFM_VERSION_MIN,
3210 pfm_sysctl.fastctxsw > 0 ? "Yes": "No",
3211 pmu_conf.ovfl_val);
3212
3213 LOCK_PFS();
3214
3215 seq_printf(m,
3216 "proc_sessions : %u\n"
3217 "sys_sessions : %u\n"
3218 "sys_use_dbregs : %u\n"
3219 "ptrace_use_dbregs : %u\n",
3220 pfm_sessions.pfs_task_sessions,
3221 pfm_sessions.pfs_sys_sessions,
3222 pfm_sessions.pfs_sys_use_dbregs,
3223 pfm_sessions.pfs_ptrace_use_dbregs);
3224
3225 UNLOCK_PFS();
3226 }
3227
3228 static int
pfm_proc_show(struct seq_file * m,void * v)3229 pfm_proc_show(struct seq_file *m, void *v)
3230 {
3231 int cpu;
3232
3233 if (v == PFM_PROC_SHOW_HEADER) {
3234 pfm_proc_show_header(m);
3235 return 0;
3236 }
3237
3238 /* show info for CPU (v - 1) */
3239
3240 cpu = (long)v - 1;
3241 seq_printf(m,
3242 "CPU%-2d overflow intrs : %lu\n"
3243 "CPU%-2d spurious intrs : %lu\n"
3244 "CPU%-2d recorded samples : %lu\n"
3245 "CPU%-2d smpl buffer full : %lu\n"
3246 "CPU%-2d syst_wide : %d\n"
3247 "CPU%-2d dcr_pp : %d\n"
3248 "CPU%-2d exclude idle : %d\n"
3249 "CPU%-2d owner : %d\n"
3250 "CPU%-2d activations : %lu\n",
3251 cpu, pfm_stats[cpu].pfm_ovfl_intr_count,
3252 cpu, pfm_stats[cpu].pfm_spurious_ovfl_intr_count,
3253 cpu, pfm_stats[cpu].pfm_recorded_samples_count,
3254 cpu, pfm_stats[cpu].pfm_full_smpl_buffer_count,
3255 cpu, cpu_data(cpu)->pfm_syst_info & PFM_CPUINFO_SYST_WIDE ? 1 : 0,
3256 cpu, cpu_data(cpu)->pfm_syst_info & PFM_CPUINFO_DCR_PP ? 1 : 0,
3257 cpu, cpu_data(cpu)->pfm_syst_info & PFM_CPUINFO_EXCL_IDLE ? 1 : 0,
3258 cpu, pmu_owners[cpu].owner ? pmu_owners[cpu].owner->pid: -1,
3259 cpu, pmu_owners[cpu].activation_number);
3260
3261 return 0;
3262 }
3263
3264 struct seq_operations pfm_seq_ops = {
3265 .start = pfm_proc_start,
3266 .next = pfm_proc_next,
3267 .stop = pfm_proc_stop,
3268 .show = pfm_proc_show
3269 };
3270
3271 static int
pfm_proc_open(struct inode * inode,struct file * file)3272 pfm_proc_open(struct inode *inode, struct file *file)
3273 {
3274 return seq_open(file, &pfm_seq_ops);
3275 }
3276
3277 /*
3278 * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens
3279 * during pfm_enable() hence before pfm_start(). We cannot assume monitoring
3280 * is active or inactive based on mode. We must rely on the value in
3281 * local_cpu_data->pfm_syst_info
3282 */
3283 void
pfm_syst_wide_update_task(struct task_struct * task,unsigned long info,int is_ctxswin)3284 pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin)
3285 {
3286 struct pt_regs *regs;
3287 unsigned long dcr;
3288 unsigned long dcr_pp;
3289
3290 dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0;
3291
3292 /*
3293 * pid 0 is guaranteed to be the idle task. There is one such task with pid 0
3294 * on every CPU, so we can rely on the pid to identify the idle task.
3295 */
3296 if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) {
3297 regs = (struct pt_regs *)((unsigned long) task + IA64_STK_OFFSET);
3298 regs--;
3299 ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0;
3300 return;
3301 }
3302 /*
3303 * we are the idle task and there is exclusion.
3304 *
3305 * if monitoring has started
3306 */
3307 if (dcr_pp) {
3308 dcr = ia64_get_dcr();
3309 /*
3310 * context switching in?
3311 */
3312 if (is_ctxswin) {
3313 /* mask monitoring for the idle task */
3314 ia64_set_dcr(dcr & ~IA64_DCR_PP);
3315 pfm_clear_psr_pp();
3316 ia64_srlz_i();
3317 return;
3318 }
3319 /*
3320 * context switching out
3321 * restore normal kernel level settings
3322 *
3323 * Due to inlining this odd if-then-else construction generates
3324 * better code.
3325 */
3326 ia64_set_dcr(dcr |IA64_DCR_PP);
3327 pfm_set_psr_pp();
3328 ia64_srlz_i();
3329 }
3330 }
3331
3332 #ifdef CONFIG_SMP
3333 void
pfm_save_regs(struct task_struct * task)3334 pfm_save_regs(struct task_struct *task)
3335 {
3336 pfm_context_t *ctx;
3337 struct thread_struct *t;
3338 u64 psr;
3339
3340 ctx = PFM_GET_CTX(task);
3341 if (ctx == NULL) goto save_error;
3342 t = &task->thread;
3343
3344 /*
3345 * sanity check
3346 */
3347 if (ctx->ctx_last_activation != GET_ACTIVATION()) {
3348 DBprintk(("ctx_activation=%lu activation=%lu: no save\n",
3349 ctx->ctx_last_activation, GET_ACTIVATION()));
3350 return;
3351 }
3352
3353 /*
3354 * save current PSR: needed because we modify it
3355 */
3356 psr = pfm_get_psr();
3357
3358 /*
3359 * stop monitoring:
3360 * This is the last instruction which may generate an overflow
3361 *
3362 * We do not need to set psr.sp because, it is irrelevant in kernel.
3363 * It will be restored from ipsr when going back to user level
3364 */
3365 pfm_clear_psr_up();
3366
3367 /*
3368 * keep a copy of the saved psr (for reload)
3369 */
3370 ctx->ctx_saved_psr = psr;
3371
3372 /*
3373 * release ownership of this PMU.
3374 */
3375 SET_PMU_OWNER(NULL);
3376
3377 /*
3378 * we systematically save the PMD as we have no
3379 * guarantee we will be schedule at that same
3380 * CPU again.
3381 */
3382 pfm_save_pmds(t->pmd, ctx->ctx_used_pmds[0]);
3383
3384 /*
3385 * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
3386 * we will need it on the restore path to check
3387 * for pending overflow.
3388 */
3389 t->pmc[0] = ia64_get_pmc(0);
3390
3391 return;
3392
3393 save_error:
3394 printk(KERN_ERR "perfmon: pfm_save_regs CPU%d [%d] NULL context PM_VALID=%ld\n",
3395 smp_processor_id(), task->pid,
3396 task->thread.flags & IA64_THREAD_PM_VALID);
3397 }
3398
3399 #else /* !CONFIG_SMP */
3400
3401 void
pfm_save_regs(struct task_struct * task)3402 pfm_save_regs(struct task_struct *task)
3403 {
3404 pfm_context_t *ctx;
3405 u64 psr;
3406
3407 ctx = PFM_GET_CTX(task);
3408 if (ctx == NULL) goto save_error;
3409 /*
3410 * save current PSR: needed because we modify it
3411 */
3412 psr = pfm_get_psr();
3413
3414 /*
3415 * stop monitoring:
3416 * This is the last instruction which may generate an overflow
3417 *
3418 * We do not need to set psr.sp because, it is irrelevant in kernel.
3419 * It will be restored from ipsr when going back to user level
3420 */
3421 pfm_clear_psr_up();
3422
3423 /*
3424 * keep a copy of the saved psr (for reload)
3425 */
3426 ctx->ctx_saved_psr = psr;
3427
3428 return;
3429 save_error:
3430 printk(KERN_ERR "perfmon: pfm_save_regs CPU%d [%d] NULL context PM_VALID=%ld\n",
3431 smp_processor_id(), task->pid,
3432 task->thread.flags & IA64_THREAD_PM_VALID);
3433 }
3434
3435 static unsigned long
pfm_lazy_save_regs(struct task_struct * task)3436 pfm_lazy_save_regs (struct task_struct *task)
3437 {
3438 pfm_context_t *ctx;
3439 struct thread_struct *t;
3440
3441 ctx = PFM_GET_CTX(task);
3442 t = &task->thread;
3443
3444 DBprintk(("on [%d] used_pmds=0x%lx\n", task->pid, ctx->ctx_used_pmds[0]));
3445
3446 /*
3447 * release ownership of this PMU.
3448 * must be done before we save the registers.
3449 *
3450 * after this call any PMU interrupt is treated
3451 * as spurious.
3452 */
3453 SET_PMU_OWNER(NULL);
3454
3455 /*
3456 * save all the pmds we use
3457 */
3458 pfm_save_pmds(t->pmd, ctx->ctx_used_pmds[0]);
3459
3460 /*
3461 * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
3462 * it is needed to check for pended overflow
3463 * on the restore path
3464 */
3465 t->pmc[0] = ia64_get_pmc(0);
3466
3467 return t->pmc[0];
3468 }
3469 #endif /* CONFIG_SMP */
3470
3471 #ifdef CONFIG_SMP
3472 void
pfm_load_regs(struct task_struct * task)3473 pfm_load_regs (struct task_struct *task)
3474 {
3475 pfm_context_t *ctx;
3476 struct thread_struct *t;
3477 struct task_struct *owner;
3478 unsigned long pmc_mask = 0UL, pmd_mask = 0UL;
3479 u64 psr;
3480
3481 ctx = PFM_GET_CTX(task);
3482 if (unlikely(ctx == NULL)) {
3483 printk(KERN_ERR "perfmon: pfm_load_regs() null context\n");
3484 return;
3485 }
3486
3487 owner = PMU_OWNER();
3488 t = &task->thread;
3489
3490 /*
3491 * possible on unload
3492 */
3493 if ((t->flags & IA64_THREAD_PM_VALID) == 0) {
3494 DBprintk(("[%d] PM_VALID=0, nothing to do\n", task->pid));
3495 return;
3496 }
3497
3498 /*
3499 * we restore ALL the debug registers to avoid picking up
3500 * stale state.
3501 *
3502 * This must be done even when the task is still the owner
3503 * as the registers may have been modified via ptrace()
3504 * (not perfmon) by the previous task.
3505 */
3506 if (ctx->ctx_fl_using_dbreg) {
3507 pfm_restore_ibrs(t->ibr, pmu_conf.num_ibrs);
3508 pfm_restore_dbrs(t->dbr, pmu_conf.num_dbrs);
3509 }
3510
3511 /*
3512 * retrieve saved psr
3513 */
3514 psr = ctx->ctx_saved_psr;
3515
3516 /*
3517 * if we were the last user of the PMU on that CPU,
3518 * then nothing to do except restore psr
3519 */
3520 if (GET_LAST_CPU(ctx) == smp_processor_id() && ctx->ctx_last_activation == GET_ACTIVATION()) {
3521 /*
3522 * retrieve partial reload masks (due to user modifications)
3523 */
3524 pmc_mask = 0UL;
3525 pmd_mask = 0UL;
3526
3527 if (pmc_mask || pmd_mask) DBprintk(("partial reload [%d] pmd_mask=0x%lx pmc_mask=0x%lx\n", task->pid, pmd_mask, pmc_mask));
3528 } else {
3529 /*
3530 * To avoid leaking information to the user level when psr.sp=0,
3531 * we must reload ALL implemented pmds (even the ones we don't use).
3532 * In the kernel we only allow PFM_READ_PMDS on registers which
3533 * we initialized or requested (sampling) so there is no risk there.
3534 */
3535 pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_reload_pmds[0];
3536
3537 /*
3538 * ALL accessible PMCs are systematically reloaded, unused registers
3539 * get their default (from pfm_reset_pmu_state()) values to avoid picking
3540 * up stale configuration.
3541 *
3542 * PMC0 is never in the mask. It is always restored separately.
3543 */
3544 pmc_mask = ctx->ctx_reload_pmcs[0];
3545
3546 DBprintk(("full reload for [%d] owner=%d activation=%lu last_activation=%lu last_cpu=%d pmd_mask=0x%lx pmc_mask=0x%lx\n",
3547 task->pid, owner ? owner->pid : -1,
3548 GET_ACTIVATION(), ctx->ctx_last_activation,
3549 GET_LAST_CPU(ctx), pmd_mask, pmc_mask));
3550
3551 }
3552
3553 if (pmd_mask) pfm_restore_pmds(t->pmd, pmd_mask);
3554 if (pmc_mask) pfm_restore_pmcs(t->pmc, pmc_mask);
3555
3556 /*
3557 * check for pending overflow at the time the state
3558 * was saved.
3559 */
3560 if (PMC0_HAS_OVFL(t->pmc[0])) {
3561 struct pt_regs *regs = TASK_PTREGS(task);
3562 pfm_overflow_handler(task, ctx, t->pmc[0], regs);
3563 }
3564
3565 /*
3566 * fl_frozen==1 when we are in blocking mode waiting for restart
3567 */
3568 if (ctx->ctx_fl_frozen == 0) {
3569 pfm_unfreeze_pmu();
3570 }
3571
3572 SET_LAST_CPU(ctx, smp_processor_id());
3573
3574 /*
3575 * dump activation value for this PMU
3576 */
3577 INC_ACTIVATION();
3578 /*
3579 * record current activation for this context
3580 */
3581 SET_ACTIVATION(ctx);
3582
3583 /*
3584 * establish new ownership. Interrupts
3585 * are still masked at this point.
3586 */
3587 SET_PMU_OWNER(task);
3588
3589 /*
3590 * restore the psr we changed
3591 */
3592 pfm_set_psr_l(psr);
3593
3594 }
3595 #else /* !CONFIG_SMP */
3596 /*
3597 * reload PMU state for UP kernels
3598 */
3599 void
pfm_load_regs(struct task_struct * task)3600 pfm_load_regs (struct task_struct *task)
3601 {
3602 struct thread_struct *t;
3603 pfm_context_t *ctx;
3604 struct task_struct *owner;
3605 unsigned long pmd_mask, pmc_mask;
3606 unsigned long prev_pmc0 = ~0UL;
3607 u64 psr;
3608
3609 owner = PMU_OWNER();
3610 ctx = PFM_GET_CTX(task);
3611 t = &task->thread;
3612
3613 /*
3614 * we restore ALL the debug registers to avoid picking up
3615 * stale state.
3616 *
3617 * This must be done even when the task is still the owner
3618 * as the registers may have been modified via ptrace()
3619 * (not perfmon) by the previous task.
3620 */
3621 if (ctx->ctx_fl_using_dbreg) {
3622 pfm_restore_ibrs(t->ibr, pmu_conf.num_ibrs);
3623 pfm_restore_dbrs(t->dbr, pmu_conf.num_dbrs);
3624 }
3625
3626 /*
3627 * retrieved save psr
3628 */
3629 psr = ctx->ctx_saved_psr;
3630
3631 /*
3632 * short path, our state is still there, just
3633 * need to restore psr and we go
3634 *
3635 * we do not touch either PMC nor PMD. the psr is not touched
3636 * by the overflow_handler. So we are safe w.r.t. to interrupt
3637 * concurrency even without interrupt masking.
3638 */
3639 if (owner == task) {
3640 pfm_set_psr_l(psr);
3641 return;
3642 }
3643
3644 DBprintk(("reload for [%d] owner=%d\n", task->pid, owner ? owner->pid : -1));
3645
3646 /*
3647 * someone else is still using the PMU, first push it out and
3648 * then we'll be able to install our stuff !
3649 *
3650 * Upon return, there will be no owner for the current PMU
3651 */
3652 if (owner) prev_pmc0 = pfm_lazy_save_regs(owner);
3653 /*
3654 * To avoid leaking information to the user level when psr.sp=0,
3655 * we must reload ALL implemented pmds (even the ones we don't use).
3656 * In the kernel we only allow PFM_READ_PMDS on registers which
3657 * we initialized or requested (sampling) so there is no risk there.
3658 */
3659 pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_reload_pmds[0];
3660
3661 /*
3662 * ALL accessible PMCs are systematically reloaded, unused registers
3663 * get their default (from pfm_reset_pmu_state()) values to avoid picking
3664 * up stale configuration.
3665 *
3666 * PMC0 is never in the mask. It is always restored separately.
3667 */
3668 pmc_mask = ctx->ctx_reload_pmcs[0];
3669
3670 pfm_restore_pmds(t->pmd, pmd_mask);
3671 pfm_restore_pmcs(t->pmc, pmc_mask);
3672
3673 /*
3674 * Check for pending overflow when state was last saved.
3675 * invoked handler is overflow status bits set.
3676 *
3677 * Any PMU overflow in flight at this point, will still
3678 * be treated as spurious because we have no declared
3679 * owner. Note that the first level interrupt handler
3680 * DOES NOT TOUCH any PMC except PMC0 for which we have
3681 * a copy already.
3682 */
3683 if (PMC0_HAS_OVFL(t->pmc[0])) {
3684 struct pt_regs *regs = TASK_PTREGS(task);
3685 pfm_overflow_handler(task, ctx, t->pmc[0], regs);
3686 }
3687
3688
3689
3690 /*
3691 * fl_frozen==1 when we are in blocking mode waiting for restart
3692 */
3693 if (ctx->ctx_fl_frozen == 0) {
3694 pfm_unfreeze_pmu();
3695 } else if (prev_pmc0 == 0UL && ctx->ctx_fl_frozen) {
3696 /*
3697 * owner is still NULL at this point.
3698 *
3699 * if the previous owner (from lazy_save_regs())
3700 * was not in frozen state, then we need to freeze
3701 * the PMU if the new context is frozen.
3702 *
3703 * on McKinley this will generate a spurious interrupt
3704 * but we have no other way.
3705 */
3706 pfm_freeze_pmu();
3707 }
3708
3709 /*
3710 * establish new ownership. If there was an in-flight
3711 * overflow interrupt, it will be treated as spurious
3712 * before and after the call, because no overflow
3713 * status bit can possibly be set. No new overflow
3714 * can be generated because, at this point, psr.up
3715 * is still cleared.
3716 */
3717 SET_PMU_OWNER(task);
3718
3719 /*
3720 * restore the psr. This is the point at which
3721 * new overflow interrupts can be generated again.
3722 */
3723 pfm_set_psr_l(psr);
3724 }
3725 #endif /* CONFIG_SMP */
3726
3727 /*
3728 * XXX: make this routine able to work with non current context
3729 */
3730 static void
pfm_reset_pmu(struct task_struct * task)3731 pfm_reset_pmu(struct task_struct *task)
3732 {
3733 struct thread_struct *t = &task->thread;
3734 pfm_context_t *ctx = t->pfm_context;
3735 int i;
3736
3737 if (task != current) {
3738 printk("perfmon: invalid task in pfm_reset_pmu()\n");
3739 return;
3740 }
3741
3742 /* Let's make sure the PMU is frozen */
3743 pfm_freeze_pmu();
3744
3745 /*
3746 * install reset values for PMC. We skip PMC0 (done above)
3747 * XX: good up to 64 PMCS
3748 */
3749 for (i=1; (pmu_conf.pmc_desc[i].type & PFM_REG_END) == 0; i++) {
3750 if ((pmu_conf.pmc_desc[i].type & PFM_REG_IMPL) == 0) continue;
3751 ia64_set_pmc(i, PMC_DFL_VAL(i));
3752 /*
3753 * When restoring context, we must restore ALL pmcs, even the ones
3754 * that the task does not use to avoid leaks and possibly corruption
3755 * of the sesion because of configuration conflicts. So here, we
3756 * initialize the entire set used in the context switch restore routine.
3757 */
3758 t->pmc[i] = PMC_DFL_VAL(i);
3759 DBprintk(("pmc[%d]=0x%lx\n", i, t->pmc[i]));
3760 }
3761
3762 /*
3763 * clear reset values for PMD.
3764 * XXX: good up to 64 PMDS.
3765 */
3766 for (i=0; (pmu_conf.pmd_desc[i].type & PFM_REG_END) == 0; i++) {
3767 if ((pmu_conf.pmd_desc[i].type & PFM_REG_IMPL) == 0) continue;
3768 ia64_set_pmd(i, 0UL);
3769 t->pmd[i] = 0UL;
3770 }
3771
3772 /*
3773 * On context switched restore, we must restore ALL pmc and ALL pmd even
3774 * when they are not actively used by the task. In UP, the incoming process
3775 * may otherwise pick up left over PMC, PMD state from the previous process.
3776 * As opposed to PMD, stale PMC can cause harm to the incoming
3777 * process because they may change what is being measured.
3778 * Therefore, we must systematically reinstall the entire
3779 * PMC state. In SMP, the same thing is possible on the
3780 * same CPU but also on between 2 CPUs.
3781 *
3782 * The problem with PMD is information leaking especially
3783 * to user level when psr.sp=0
3784 *
3785 * There is unfortunately no easy way to avoid this problem
3786 * on either UP or SMP. This definitively slows down the
3787 * pfm_load_regs() function.
3788 */
3789
3790 /*
3791 * We must include all the PMC in this mask to make sure we don't
3792 * see any side effect of a stale state, such as opcode matching
3793 * or range restrictions, for instance.
3794 *
3795 * We never directly restore PMC0 so we do not include it in the mask.
3796 */
3797 ctx->ctx_reload_pmcs[0] = pmu_conf.impl_pmcs[0] & ~0x1;
3798 /*
3799 * We must include all the PMD in this mask to avoid picking
3800 * up stale value and leak information, especially directly
3801 * at the user level when psr.sp=0
3802 */
3803 ctx->ctx_reload_pmds[0] = pmu_conf.impl_pmds[0];
3804
3805 /*
3806 * Keep track of the pmds we want to sample
3807 * XXX: may be we don't need to save/restore the DEAR/IEAR pmds
3808 * but we do need the BTB for sure. This is because of a hardware
3809 * buffer of 1 only for non-BTB pmds.
3810 *
3811 * We ignore the unimplemented pmds specified by the user
3812 */
3813 ctx->ctx_used_pmds[0] = ctx->ctx_smpl_regs[0];
3814 ctx->ctx_used_pmcs[0] = 1; /* always save/restore PMC[0] */
3815
3816 /*
3817 * useful in case of re-enable after disable
3818 */
3819 ctx->ctx_used_ibrs[0] = 0UL;
3820 ctx->ctx_used_dbrs[0] = 0UL;
3821
3822 ia64_srlz_d();
3823 }
3824
3825 /*
3826 * This function is called when a thread exits (from exit_thread()).
3827 * This is a simplified pfm_save_regs() that simply flushes the current
3828 * register state into the save area taking into account any pending
3829 * overflow. This time no notification is sent because the task is dying
3830 * anyway. The inline processing of overflows avoids loosing some counts.
3831 * The PMU is frozen on exit from this call and is to never be reenabled
3832 * again for this task.
3833 *
3834 */
3835 void
pfm_flush_regs(struct task_struct * task)3836 pfm_flush_regs (struct task_struct *task)
3837 {
3838 pfm_context_t *ctx;
3839 u64 pmc0;
3840 unsigned long mask2, val;
3841 int i;
3842
3843 ctx = task->thread.pfm_context;
3844
3845 if (ctx == NULL) return;
3846
3847 /*
3848 * that's it if context already disabled
3849 */
3850 if (ctx->ctx_flags.state == PFM_CTX_DISABLED) return;
3851
3852 /*
3853 * stop monitoring:
3854 * This is the only way to stop monitoring without destroying overflow
3855 * information in PMC[0].
3856 * This is the last instruction which can cause overflow when monitoring
3857 * in kernel.
3858 * By now, we could still have an overflow interrupt in-flight.
3859 */
3860 if (ctx->ctx_fl_system) {
3861
3862 /* disable dcr pp */
3863 ia64_set_dcr(ia64_get_dcr() & ~IA64_DCR_PP);
3864
3865 /* stop monitoring */
3866 pfm_clear_psr_pp();
3867 ia64_srlz_i();
3868
3869 PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE);
3870 PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
3871 PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE);
3872 } else {
3873
3874 /* stop monitoring */
3875 pfm_clear_psr_up();
3876 ia64_srlz_i();
3877
3878 /* no more save/restore on ctxsw */
3879 current->thread.flags &= ~IA64_THREAD_PM_VALID;
3880 }
3881
3882 /*
3883 * Mark the PMU as not owned
3884 * This will cause the interrupt handler to do nothing in case an overflow
3885 * interrupt was in-flight
3886 * This also guarantees that pmc0 will contain the final state
3887 * It virtually gives us full control on overflow processing from that point
3888 * on.
3889 * It must be an atomic operation.
3890 */
3891 SET_PMU_OWNER(NULL);
3892
3893 /*
3894 * read current overflow status:
3895 *
3896 * we are guaranteed to read the final stable state
3897 */
3898 ia64_srlz_d();
3899 pmc0 = ia64_get_pmc(0); /* slow */
3900
3901 /*
3902 * freeze PMU:
3903 *
3904 * This destroys the overflow information. This is required to make sure
3905 * next process does not start with monitoring on if not requested
3906 */
3907 pfm_freeze_pmu();
3908
3909 /*
3910 * We don't need to restore psr, because we are on our way out
3911 */
3912
3913 /*
3914 * This loop flushes the PMD into the PFM context.
3915 * It also processes overflow inline.
3916 *
3917 * IMPORTANT: No notification is sent at this point as the process is dying.
3918 * The implicit notification will come from a SIGCHILD or a return from a
3919 * waitpid().
3920 *
3921 */
3922 #ifdef CONFIG_SMP
3923 if (GET_LAST_CPU(ctx) != smp_processor_id())
3924 printk(KERN_DEBUG "perfmon: [%d] last_cpu=%d\n",
3925 task->pid, GET_LAST_CPU(ctx));
3926 #endif
3927
3928 /*
3929 * we save all the used pmds
3930 * we take care of overflows for pmds used as counters
3931 */
3932 mask2 = ctx->ctx_used_pmds[0];
3933 for (i = 0; mask2; i++, mask2>>=1) {
3934
3935 /* skip non used pmds */
3936 if ((mask2 & 0x1) == 0) continue;
3937
3938 val = ia64_get_pmd(i);
3939
3940 if (PMD_IS_COUNTING(i)) {
3941 DBprintk(("[%d] pmd[%d] soft_pmd=0x%lx hw_pmd=0x%lx\n",
3942 task->pid,
3943 i,
3944 ctx->ctx_soft_pmds[i].val,
3945 val & pmu_conf.ovfl_val));
3946
3947 /* collect latest results */
3948 ctx->ctx_soft_pmds[i].val += val & pmu_conf.ovfl_val;
3949
3950 /*
3951 * now everything is in ctx_soft_pmds[] and we need
3952 * to clear the saved context from save_regs() such that
3953 * pfm_read_pmds() gets the correct value
3954 */
3955 task->thread.pmd[i] = 0;
3956
3957 /*
3958 * take care of overflow inline
3959 */
3960 if (pmc0 & (1UL << i)) {
3961 ctx->ctx_soft_pmds[i].val += 1 + pmu_conf.ovfl_val;
3962 DBprintk(("[%d] pmd[%d] overflowed soft_pmd=0x%lx\n",
3963 task->pid, i, ctx->ctx_soft_pmds[i].val));
3964 }
3965 } else {
3966 DBprintk(("[%d] pmd[%d] hw_pmd=0x%lx\n", task->pid, i, val));
3967 /*
3968 * not a counter, just save value as is
3969 */
3970 task->thread.pmd[i] = val;
3971 }
3972 }
3973 SET_LAST_CPU(ctx, -1);
3974 }
3975
3976
3977 /*
3978 * task is the newly created task, pt_regs for new child
3979 */
3980 int
pfm_inherit(struct task_struct * task,struct pt_regs * regs)3981 pfm_inherit(struct task_struct *task, struct pt_regs *regs)
3982 {
3983 pfm_context_t *ctx;
3984 pfm_context_t *nctx;
3985 struct thread_struct *thread;
3986 unsigned long m;
3987 int i;
3988
3989 /*
3990 * the new task was copied from parent and therefore points
3991 * to the parent's context at this point
3992 */
3993 ctx = task->thread.pfm_context;
3994 thread = &task->thread;
3995
3996 /*
3997 * for secure sessions, make sure child cannot mess up
3998 * the monitoring session.
3999 */
4000 if (ctx->ctx_fl_unsecure == 0) {
4001 ia64_psr(regs)->sp = 1;
4002 DBprintk(("enabling psr.sp for [%d]\n", task->pid));
4003 } else {
4004 DBprintk(("psr.sp=%d [%d]\n", ia64_psr(regs)->sp, task->pid));
4005 }
4006
4007
4008 /*
4009 * if there was a virtual mapping for the sampling buffer
4010 * the mapping is NOT inherited across fork() (see VM_DONTCOPY),
4011 * so we don't have to explicitely remove it here.
4012 *
4013 *
4014 * Part of the clearing of fields is also done in
4015 * copy_thread() because the fiels are outside the
4016 * pfm_context structure and can affect tasks not
4017 * using perfmon.
4018 */
4019
4020 /* clear pending notification */
4021 task->thread.pfm_ovfl_block_reset = 0;
4022
4023 /*
4024 * clear cpu pinning restriction for child
4025 */
4026 if (ctx->ctx_fl_system) {
4027 task->cpus_allowed = ctx->ctx_saved_cpus_allowed;
4028 task->need_resched = 1;
4029
4030 DBprintk(("setting cpus_allowed for [%d] to 0x%lx from 0x%lx\n",
4031 task->pid,
4032 ctx->ctx_saved_cpus_allowed,
4033 current->cpus_allowed));
4034 }
4035
4036 /*
4037 * takes care of easiest case first
4038 */
4039 if (CTX_INHERIT_MODE(ctx) == PFM_FL_INHERIT_NONE) {
4040
4041 DBprintk(("removing PFM context for [%d]\n", task->pid));
4042
4043 task->thread.pfm_context = NULL;
4044
4045 /*
4046 * we must clear psr.up because the new child does
4047 * not have a context and the PM_VALID flag is cleared
4048 * in copy_thread().
4049 *
4050 * we do not clear psr.pp because it is always
4051 * controlled by the system wide logic and we should
4052 * never be here when system wide is running anyway
4053 */
4054 ia64_psr(regs)->up = 0;
4055
4056 /* copy_thread() clears IA64_THREAD_PM_VALID */
4057 return 0;
4058 }
4059 nctx = pfm_context_alloc();
4060 if (nctx == NULL) return -ENOMEM;
4061
4062 /* copy content */
4063 *nctx = *ctx;
4064
4065 if (CTX_INHERIT_MODE(ctx) == PFM_FL_INHERIT_ONCE) {
4066 nctx->ctx_fl_inherit = PFM_FL_INHERIT_NONE;
4067 DBprintk(("downgrading to INHERIT_NONE for [%d]\n", task->pid));
4068 /*
4069 * downgrade parent: once means only first child!
4070 */
4071 ctx->ctx_fl_inherit = PFM_FL_INHERIT_NONE;
4072 }
4073 /*
4074 * task is not yet visible in the tasklist, so we do
4075 * not need to lock the newly created context.
4076 * However, we must grab the tasklist_lock to ensure
4077 * that the ctx_owner or ctx_notify_task do not disappear
4078 * while we increment their check counters.
4079 */
4080 read_lock(&tasklist_lock);
4081
4082 if (nctx->ctx_notify_task)
4083 atomic_inc(&nctx->ctx_notify_task->thread.pfm_notifiers_check);
4084
4085 if (nctx->ctx_owner)
4086 atomic_inc(&nctx->ctx_owner->thread.pfm_owners_check);
4087
4088 read_unlock(&tasklist_lock);
4089
4090
4091 LOCK_PFS();
4092 pfm_sessions.pfs_task_sessions++;
4093 UNLOCK_PFS();
4094
4095 /* initialize counters in new context */
4096 m = nctx->ctx_used_pmds[0] >> PMU_FIRST_COUNTER;
4097 for(i = PMU_FIRST_COUNTER ; m ; m>>=1, i++) {
4098 if ((m & 0x1) && pmu_conf.pmd_desc[i].type == PFM_REG_COUNTING) {
4099 nctx->ctx_soft_pmds[i].val = nctx->ctx_soft_pmds[i].lval & ~pmu_conf.ovfl_val;
4100 thread->pmd[i] = nctx->ctx_soft_pmds[i].lval & pmu_conf.ovfl_val;
4101 } else {
4102 thread->pmd[i] = 0UL; /* reset to initial state */
4103 }
4104 }
4105
4106 nctx->ctx_fl_frozen = 0;
4107 nctx->ctx_ovfl_regs[0] = 0UL;
4108 SET_LAST_CPU(nctx, -1);
4109
4110 /*
4111 * here nctx->ctx_psb == ctx->ctx_psb
4112 *
4113 * increment reference count to sampling
4114 * buffer, if any. Note that this is independent
4115 * from the virtual mapping. The latter is never
4116 * inherited while the former will be if context
4117 * is setup to something different from PFM_FL_INHERIT_NONE
4118 */
4119 if (nctx->ctx_psb) {
4120 LOCK_PSB(nctx->ctx_psb);
4121
4122 nctx->ctx_psb->psb_refcnt++;
4123
4124 DBprintk(("updated smpl @ %p refcnt=%lu psb_flags=0x%x\n",
4125 ctx->ctx_psb->psb_hdr,
4126 ctx->ctx_psb->psb_refcnt,
4127 ctx->ctx_psb->psb_flags));
4128
4129 UNLOCK_PSB(nctx->ctx_psb);
4130
4131 /*
4132 * remove any pointer to sampling buffer mapping
4133 */
4134 nctx->ctx_smpl_vaddr = 0;
4135 }
4136
4137 sema_init(&nctx->ctx_restart_sem, 0); /* reset this semaphore to locked */
4138
4139 /*
4140 * propagate kernel psr in new context (used for first ctxsw in
4141 */
4142 nctx->ctx_saved_psr = pfm_get_psr();
4143
4144 /*
4145 * force a full reload on ctxsw in
4146 */
4147 nctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
4148 SET_LAST_CPU(nctx, -1);
4149
4150 /*
4151 * initialize tasklet for signal notifications
4152 *
4153 * ALL signal-based (or any notification using data structures
4154 * external to perfmon) MUST use tasklets to avoid lock contentions
4155 * when a signal has to be sent for overflow interrupt handler.
4156 */
4157 tasklet_init(&nctx->ctx_tasklet, pfm_send_notification_signal, (unsigned long)nctx);
4158
4159 /* link with new task */
4160 thread->pfm_context = nctx;
4161
4162 DBprintk(("nctx=%p for process [%d]\n", (void *)nctx, task->pid));
4163
4164 /*
4165 * the copy_thread routine automatically clears
4166 * IA64_THREAD_PM_VALID, so we need to reenable it, if it was used by the caller
4167 */
4168 if (current->thread.flags & IA64_THREAD_PM_VALID) {
4169 DBprintk(("setting PM_VALID for [%d]\n", task->pid));
4170 thread->flags |= IA64_THREAD_PM_VALID;
4171 }
4172 return 0;
4173 }
4174
4175 /*
4176 *
4177 * We cannot touch any of the PMU registers at this point as we may
4178 * not be running on the same CPU the task was last run on. Therefore
4179 * it is assumed that the PMU has been stopped appropriately in
4180 * pfm_flush_regs() called from exit_thread().
4181 *
4182 * The function is called in the context of the parent via a release_thread()
4183 * and wait4(). The task is not in the tasklist anymore.
4184 */
4185 void
pfm_context_exit(struct task_struct * task)4186 pfm_context_exit(struct task_struct *task)
4187 {
4188 pfm_context_t *ctx = task->thread.pfm_context;
4189
4190 /*
4191 * check sampling buffer
4192 */
4193 if (ctx->ctx_psb) {
4194 pfm_smpl_buffer_desc_t *psb = ctx->ctx_psb;
4195
4196 LOCK_PSB(psb);
4197
4198 DBprintk(("sampling buffer from [%d] @%p size %ld refcnt=%lu psb_flags=0x%x\n",
4199 task->pid,
4200 psb->psb_hdr, psb->psb_size, psb->psb_refcnt, psb->psb_flags));
4201
4202 /*
4203 * in the case where we are the last user, we may be able to free
4204 * the buffer
4205 */
4206 psb->psb_refcnt--;
4207
4208 if (psb->psb_refcnt == 0) {
4209
4210 /*
4211 * The flag is cleared in pfm_vm_close(). which gets
4212 * called from do_exit() via exit_mm().
4213 * By the time we come here, the task has no more mm context.
4214 *
4215 * We can only free the psb and buffer here after the vm area
4216 * describing the buffer has been removed. This normally happens
4217 * as part of do_exit() but the entire mm context is ONLY removed
4218 * once its reference counts goes to zero. This is typically
4219 * the case except for multi-threaded (several tasks) processes.
4220 *
4221 * See pfm_vm_close() and pfm_cleanup_smpl_buf() for more details.
4222 */
4223 if ((psb->psb_flags & PSB_HAS_VMA) == 0) {
4224
4225 DBprintk(("cleaning sampling buffer from [%d] @%p size %ld\n",
4226 task->pid,
4227 psb->psb_hdr, psb->psb_size));
4228
4229 /*
4230 * free the buffer and psb
4231 */
4232 pfm_rvfree(psb->psb_hdr, psb->psb_size);
4233 kfree(psb);
4234 psb = NULL;
4235 }
4236 }
4237 /* psb may have been deleted */
4238 if (psb) UNLOCK_PSB(psb);
4239 }
4240
4241 DBprintk(("cleaning [%d] pfm_context @%p notify_task=%p check=%d mm=%p\n",
4242 task->pid, ctx,
4243 ctx->ctx_notify_task,
4244 atomic_read(&task->thread.pfm_notifiers_check), task->mm));
4245
4246 /*
4247 * To avoid getting the notified task or owner task scan the entire process
4248 * list when they exit, we decrement notifiers_check and owners_check respectively.
4249 *
4250 * Of course, there is race condition between decreasing the value and the
4251 * task exiting. The danger comes from the fact that, in both cases, we have a
4252 * direct pointer to a task structure thereby bypassing the tasklist.
4253 * We must make sure that, if we have task!= NULL, the target task is still
4254 * present and is identical to the initial task specified
4255 * during pfm_context_create(). It may already be detached from the tasklist but
4256 * that's okay. Note that it is okay if we miss the deadline and the task scans
4257 * the list for nothing, it will affect performance but not correctness.
4258 * The correctness is ensured by using the ctx_lock which prevents the
4259 * notify_task from changing the fields in our context.
4260 * Once holdhing this lock, if we see task!= NULL, then it will stay like
4261 * that until we release the lock. If it is NULL already then we came too late.
4262 */
4263 LOCK_CTX(ctx);
4264
4265 if (ctx->ctx_notify_task != NULL) {
4266 DBprintk(("[%d], [%d] atomic_sub on [%d] notifiers=%u\n", current->pid,
4267 task->pid,
4268 ctx->ctx_notify_task->pid,
4269 atomic_read(&ctx->ctx_notify_task->thread.pfm_notifiers_check)));
4270
4271 atomic_dec(&ctx->ctx_notify_task->thread.pfm_notifiers_check);
4272 }
4273
4274 if (ctx->ctx_owner != NULL) {
4275 DBprintk(("[%d], [%d] atomic_sub on [%d] owners=%u\n",
4276 current->pid,
4277 task->pid,
4278 ctx->ctx_owner->pid,
4279 atomic_read(&ctx->ctx_owner->thread.pfm_owners_check)));
4280
4281 atomic_dec(&ctx->ctx_owner->thread.pfm_owners_check);
4282 }
4283
4284 UNLOCK_CTX(ctx);
4285
4286 pfm_unreserve_session(task, ctx->ctx_fl_system, 1UL << ctx->ctx_cpu);
4287
4288 if (ctx->ctx_fl_system) {
4289 /*
4290 * remove any CPU pinning
4291 */
4292 task->cpus_allowed = ctx->ctx_saved_cpus_allowed;
4293 task->need_resched = 1;
4294 }
4295
4296 pfm_context_free(ctx);
4297 /*
4298 * clean pfm state in thread structure,
4299 */
4300 task->thread.pfm_context = NULL;
4301 task->thread.pfm_ovfl_block_reset = 0;
4302
4303 /* pfm_notifiers is cleaned in pfm_cleanup_notifiers() */
4304 }
4305
4306 /*
4307 * function invoked from release_thread when pfm_smpl_buf_list is not NULL
4308 */
4309 int
pfm_cleanup_smpl_buf(struct task_struct * task)4310 pfm_cleanup_smpl_buf(struct task_struct *task)
4311 {
4312 pfm_smpl_buffer_desc_t *tmp, *psb = task->thread.pfm_smpl_buf_list;
4313
4314 if (psb == NULL) {
4315 printk(KERN_DEBUG "perfmon: psb is null in [%d]\n", current->pid);
4316 return -1;
4317 }
4318 /*
4319 * Walk through the list and free the sampling buffer and psb
4320 */
4321 while (psb) {
4322 DBprintk(("[%d] freeing smpl @%p size %ld\n", current->pid, psb->psb_hdr, psb->psb_size));
4323
4324 pfm_rvfree(psb->psb_hdr, psb->psb_size);
4325 tmp = psb->psb_next;
4326 kfree(psb);
4327 psb = tmp;
4328 }
4329
4330 /* just in case */
4331 task->thread.pfm_smpl_buf_list = NULL;
4332
4333 return 0;
4334 }
4335
4336 /*
4337 * function invoked from release_thread to make sure that the ctx_owner field does not
4338 * point to an unexisting task.
4339 */
4340 void
pfm_cleanup_owners(struct task_struct * task)4341 pfm_cleanup_owners(struct task_struct *task)
4342 {
4343 struct task_struct *p;
4344 pfm_context_t *ctx;
4345
4346 DBprintk(("called by [%d] for [%d]\n", current->pid, task->pid));
4347
4348 read_lock(&tasklist_lock);
4349
4350 for_each_task(p) {
4351 /*
4352 * It is safe to do the 2-step test here, because thread.ctx
4353 * is cleaned up only in release_thread() and at that point
4354 * the task has been detached from the tasklist which is an
4355 * operation which uses the write_lock() on the tasklist_lock
4356 * so it cannot run concurrently to this loop. So we have the
4357 * guarantee that if we find p and it has a perfmon ctx then
4358 * it is going to stay like this for the entire execution of this
4359 * loop.
4360 */
4361 ctx = p->thread.pfm_context;
4362
4363 //DBprintk(("[%d] scanning task [%d] ctx=%p\n", task->pid, p->pid, ctx));
4364
4365 if (ctx && ctx->ctx_owner == task) {
4366 DBprintk(("trying for owner [%d] in [%d]\n", task->pid, p->pid));
4367 /*
4368 * the spinlock is required to take care of a race condition
4369 * with the send_sig_info() call. We must make sure that
4370 * either the send_sig_info() completes using a valid task,
4371 * or the notify_task is cleared before the send_sig_info()
4372 * can pick up a stale value. Note that by the time this
4373 * function is executed the 'task' is already detached from the
4374 * tasklist. The problem is that the notifiers have a direct
4375 * pointer to it. It is okay to send a signal to a task in this
4376 * stage, it simply will have no effect. But it is better than sending
4377 * to a completely destroyed task or worse to a new task using the same
4378 * task_struct address.
4379 */
4380 LOCK_CTX(ctx);
4381
4382 ctx->ctx_owner = NULL;
4383
4384 UNLOCK_CTX(ctx);
4385
4386 DBprintk(("done for notifier [%d] in [%d]\n", task->pid, p->pid));
4387 }
4388 }
4389 read_unlock(&tasklist_lock);
4390
4391 atomic_set(&task->thread.pfm_owners_check, 0);
4392 }
4393
4394
4395 /*
4396 * function called from release_thread to make sure that the ctx_notify_task is not pointing
4397 * to an unexisting task
4398 */
4399 void
pfm_cleanup_notifiers(struct task_struct * task)4400 pfm_cleanup_notifiers(struct task_struct *task)
4401 {
4402 struct task_struct *p;
4403 pfm_context_t *ctx;
4404
4405 DBprintk(("called by [%d] for [%d]\n", current->pid, task->pid));
4406
4407 read_lock(&tasklist_lock);
4408
4409 for_each_task(p) {
4410 /*
4411 * It is safe to do the 2-step test here, because thread.ctx
4412 * is cleaned up only in release_thread() and at that point
4413 * the task has been detached from the tasklist which is an
4414 * operation which uses the write_lock() on the tasklist_lock
4415 * so it cannot run concurrently to this loop. So we have the
4416 * guarantee that if we find p and it has a perfmon ctx then
4417 * it is going to stay like this for the entire execution of this
4418 * loop.
4419 */
4420 ctx = p->thread.pfm_context;
4421
4422 //DBprintk(("[%d] scanning task [%d] ctx=%p\n", task->pid, p->pid, ctx));
4423
4424 if (ctx && ctx->ctx_notify_task == task) {
4425 DBprintk(("trying for notifier [%d] in [%d]\n", task->pid, p->pid));
4426 /*
4427 * the spinlock is required to take care of a race condition
4428 * with the send_sig_info() call. We must make sure that
4429 * either the send_sig_info() completes using a valid task,
4430 * or the notify_task is cleared before the send_sig_info()
4431 * can pick up a stale value. Note that by the time this
4432 * function is executed the 'task' is already detached from the
4433 * tasklist. The problem is that the notifiers have a direct
4434 * pointer to it. It is okay to send a signal to a task in this
4435 * stage, it simply will have no effect. But it is better than sending
4436 * to a completely destroyed task or worse to a new task using the same
4437 * task_struct address.
4438 */
4439 LOCK_CTX(ctx);
4440
4441 ctx->ctx_notify_task = NULL;
4442
4443 UNLOCK_CTX(ctx);
4444
4445 DBprintk(("done for notifier [%d] in [%d]\n", task->pid, p->pid));
4446 }
4447 }
4448 read_unlock(&tasklist_lock);
4449
4450 atomic_set(&task->thread.pfm_notifiers_check, 0);
4451 }
4452
4453 static struct irqaction perfmon_irqaction = {
4454 .handler = pfm_interrupt_handler,
4455 .flags = SA_INTERRUPT,
4456 .name = "perfmon"
4457 };
4458
4459 int
pfm_install_alternate_syswide_subsystem(pfm_intr_handler_desc_t * hdl)4460 pfm_install_alternate_syswide_subsystem(pfm_intr_handler_desc_t *hdl)
4461 {
4462 int ret;
4463
4464 /* some sanity checks */
4465 if (hdl == NULL || hdl->handler == NULL) return -EINVAL;
4466
4467 /* do the easy test first */
4468 if (pfm_alternate_intr_handler) return -EBUSY;
4469
4470 /* reserve our session */
4471 ret = pfm_reserve_session(NULL, 1, cpu_online_map);
4472 if (ret) return ret;
4473
4474 if (pfm_alternate_intr_handler) {
4475 printk(KERN_DEBUG "perfmon: install_alternate, intr_handler not NULL "
4476 "after reserve\n");
4477 return -EINVAL;
4478 }
4479
4480 pfm_alternate_intr_handler = hdl;
4481
4482 return 0;
4483 }
4484
4485 int
pfm_remove_alternate_syswide_subsystem(pfm_intr_handler_desc_t * hdl)4486 pfm_remove_alternate_syswide_subsystem(pfm_intr_handler_desc_t *hdl)
4487 {
4488 if (hdl == NULL) return -EINVAL;
4489
4490 /* cannot remove someone else's handler! */
4491 if (pfm_alternate_intr_handler != hdl) return -EINVAL;
4492
4493 pfm_alternate_intr_handler = NULL;
4494
4495 /*
4496 * XXX: assume cpu_online_map has not changed since reservation
4497 */
4498 pfm_unreserve_session(NULL, 1, cpu_online_map);
4499
4500 return 0;
4501 }
4502
4503 static struct file_operations pfm_proc_fops = {
4504 .open = pfm_proc_open,
4505 .read = seq_read,
4506 .llseek = seq_lseek,
4507 .release = seq_release,
4508 };
4509
4510 /*
4511 * perfmon initialization routine, called from the initcall() table
4512 */
4513 int __init
pfm_init(void)4514 pfm_init(void)
4515 {
4516 unsigned int n, n_counters, i;
4517
4518 pmu_conf.disabled = 1;
4519
4520 printk(KERN_INFO "perfmon: version %u.%u IRQ %u\n", PFM_VERSION_MAJ, PFM_VERSION_MIN,
4521 IA64_PERFMON_VECTOR);
4522
4523 /*
4524 * compute the number of implemented PMD/PMC from the
4525 * description tables
4526 */
4527 n = 0;
4528 for (i=0; PMC_IS_LAST(i) == 0; i++) {
4529 if (PMC_IS_IMPL(i) == 0) continue;
4530 pmu_conf.impl_pmcs[i>>6] |= 1UL << (i&63);
4531 n++;
4532 }
4533 pmu_conf.num_pmcs = n;
4534
4535 n = 0; n_counters = 0;
4536 for (i=0; PMD_IS_LAST(i) == 0; i++) {
4537 if (PMD_IS_IMPL(i) == 0) continue;
4538 pmu_conf.impl_pmds[i>>6] |= 1UL << (i&63);
4539 n++;
4540 if (PMD_IS_COUNTING(i)) n_counters++;
4541 }
4542 pmu_conf.num_pmds = n;
4543 pmu_conf.num_counters = n_counters;
4544
4545 printk(KERN_INFO "perfmon: %u PMCs, %u PMDs, %u counters (%lu bits)\n",
4546 pmu_conf.num_pmcs,
4547 pmu_conf.num_pmds,
4548 pmu_conf.num_counters,
4549 ffz(pmu_conf.ovfl_val));
4550
4551 /* sanity check */
4552 if (pmu_conf.num_pmds >= IA64_NUM_PMD_REGS || pmu_conf.num_pmcs >= IA64_NUM_PMC_REGS) {
4553 printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n");
4554 return -1;
4555 }
4556
4557 /*
4558 * for now here for debug purposes
4559 */
4560 perfmon_dir = create_proc_entry("perfmon", S_IRUGO, NULL);
4561 if (perfmon_dir == NULL) {
4562 printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n");
4563 return -1;
4564 }
4565 /*
4566 * install customized file operations for /proc/perfmon entry
4567 */
4568 perfmon_dir->proc_fops = &pfm_proc_fops;
4569
4570 /*
4571 * create /proc/sys/kernel/perfmon
4572 */
4573 pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root, 0);
4574
4575 /*
4576 * initialize all our spinlocks
4577 */
4578 spin_lock_init(&pfm_sessions.pfs_lock);
4579
4580 /* we are all set */
4581 pmu_conf.disabled = 0;
4582
4583 return 0;
4584 }
4585
4586 __initcall(pfm_init);
4587
4588 void
pfm_init_percpu(void)4589 pfm_init_percpu(void)
4590 {
4591 int i;
4592
4593 if (smp_processor_id() == 0)
4594 register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
4595
4596 ia64_set_pmv(IA64_PERFMON_VECTOR);
4597 ia64_srlz_d();
4598
4599 /*
4600 * we first initialize the PMU to a stable state.
4601 * the values may have been changed from their power-up
4602 * values by software executed before the kernel took over.
4603 *
4604 * At this point, pmu_conf has not yet been initialized
4605 *
4606 * On McKinley, this code is ineffective until PMC4 is initialized.
4607 */
4608 for (i=1; PMC_IS_LAST(i) == 0; i++) {
4609 if (PMC_IS_IMPL(i) == 0) continue;
4610 ia64_set_pmc(i, PMC_DFL_VAL(i));
4611 }
4612
4613 for (i=0; PMD_IS_LAST(i); i++) {
4614 if (PMD_IS_IMPL(i) == 0) continue;
4615 ia64_set_pmd(i, 0UL);
4616 }
4617 pfm_freeze_pmu();
4618 }
4619
4620 #else /* !CONFIG_PERFMON */
4621
4622 asmlinkage long
sys_perfmonctl(int pid,int cmd,void * req,int count,long arg5,long arg6,long arg7,long arg8,long stack)4623 sys_perfmonctl (int pid, int cmd, void *req, int count, long arg5, long arg6,
4624 long arg7, long arg8, long stack)
4625 {
4626 return -ENOSYS;
4627 }
4628
4629 #endif /* !CONFIG_PERFMON */
4630