1 /*
2  * This file implements the perfmon subsystem which is used
3  * to program the IA-64 Performance Monitoring Unit (PMU).
4  *
5  * Originaly Written by Ganesh Venkitachalam, IBM Corp.
6  * Copyright (C) 1999 Ganesh Venkitachalam <venkitac@us.ibm.com>
7  *
8  * Modifications by Stephane Eranian, Hewlett-Packard Co.
9  * Modifications by David Mosberger-Tang, Hewlett-Packard Co.
10  *
11  * Copyright (C) 1999-2003  Hewlett Packard Co
12  *               Stephane Eranian <eranian@hpl.hp.com>
13  *               David Mosberger-Tang <davidm@hpl.hp.com>
14  */
15 
16 #include <linux/config.h>
17 #include <linux/kernel.h>
18 #include <linux/sched.h>
19 #include <linux/interrupt.h>
20 #include <linux/smp_lock.h>
21 #include <linux/proc_fs.h>
22 #include <linux/init.h>
23 #include <linux/vmalloc.h>
24 #include <linux/wrapper.h>
25 #include <linux/mm.h>
26 #include <linux/sysctl.h>
27 #include <linux/smp.h>
28 #include <linux/seq_file.h>
29 
30 #include <asm/bitops.h>
31 #include <asm/errno.h>
32 #include <asm/page.h>
33 #include <asm/perfmon.h>
34 #include <asm/processor.h>
35 #include <asm/signal.h>
36 #include <asm/system.h>
37 #include <asm/uaccess.h>
38 #include <asm/delay.h> /* for ia64_get_itc() */
39 
40 #ifdef CONFIG_PERFMON
41 
42 /*
43  * For PMUs which rely on the debug registers for some features, you must
44  * you must enable the following flag to activate the support for
45  * accessing the registers via the perfmonctl() interface.
46  */
47 #if defined(CONFIG_ITANIUM) || defined(CONFIG_MCKINLEY)
48 #define PFM_PMU_USES_DBR	1
49 #endif
50 
51 /*
52  * perfmon context states
53  */
54 #define PFM_CTX_DISABLED	0
55 #define PFM_CTX_ENABLED		1
56 
57 /*
58  * Reset register flags
59  */
60 #define PFM_PMD_LONG_RESET	1
61 #define PFM_PMD_SHORT_RESET	2
62 
63 /*
64  * Misc macros and definitions
65  */
66 #define PMU_FIRST_COUNTER	4
67 #define PMU_MAX_PMCS		256
68 #define PMU_MAX_PMDS		256
69 
70 /*
71  * type of a PMU register (bitmask).
72  * bitmask structure:
73  * 	bit0   : register implemented
74  * 	bit1   : end marker
75  * 	bit2-3 : reserved
76  * 	bit4-7 : register type
77  * 	bit8-31: reserved
78  */
79 #define PFM_REG_IMPL		0x1 /* register implemented */
80 #define PFM_REG_END		0x2 /* end marker */
81 #define PFM_REG_MONITOR		(0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */
82 #define PFM_REG_COUNTING	(0x2<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm AND pmc.oi, a PMD used as a counter */
83 #define PFM_REG_CONTROL		(0x3<<4|PFM_REG_IMPL) /* PMU control register */
84 #define	PFM_REG_CONFIG		(0x4<<4|PFM_REG_IMPL) /* refine configuration */
85 #define PFM_REG_BUFFER	 	(0x5<<4|PFM_REG_IMPL) /* PMD used as buffer */
86 
87 #define PMC_IS_LAST(i)	(pmu_conf.pmc_desc[i].type & PFM_REG_END)
88 #define PMD_IS_LAST(i)	(pmu_conf.pmd_desc[i].type & PFM_REG_END)
89 
90 #define PFM_IS_DISABLED() pmu_conf.disabled
91 
92 #define PMC_OVFL_NOTIFY(ctx, i)	((ctx)->ctx_soft_pmds[i].flags &  PFM_REGFL_OVFL_NOTIFY)
93 #define PFM_FL_INHERIT_MASK	(PFM_FL_INHERIT_NONE|PFM_FL_INHERIT_ONCE|PFM_FL_INHERIT_ALL)
94 
95 /* i assume unsigned */
96 #define PMC_IS_IMPL(i)	  (i< PMU_MAX_PMCS && (pmu_conf.pmc_desc[i].type & PFM_REG_IMPL))
97 #define PMD_IS_IMPL(i)	  (i< PMU_MAX_PMDS && (pmu_conf.pmd_desc[i].type & PFM_REG_IMPL))
98 
99 /* XXX: these three assume that register i is implemented */
100 #define PMD_IS_COUNTING(i) (pmu_conf.pmd_desc[i].type == PFM_REG_COUNTING)
101 #define PMC_IS_COUNTING(i) (pmu_conf.pmc_desc[i].type == PFM_REG_COUNTING)
102 #define PMC_IS_MONITOR(i)  (pmu_conf.pmc_desc[i].type == PFM_REG_MONITOR)
103 #define PMC_DFL_VAL(i)     pmu_conf.pmc_desc[i].default_value
104 #define PMC_RSVD_MASK(i)   pmu_conf.pmc_desc[i].reserved_mask
105 #define PMD_PMD_DEP(i)	   pmu_conf.pmd_desc[i].dep_pmd[0]
106 #define PMC_PMD_DEP(i)	   pmu_conf.pmc_desc[i].dep_pmd[0]
107 
108 /* k assume unsigned */
109 #define IBR_IS_IMPL(k)	  (k<pmu_conf.num_ibrs)
110 #define DBR_IS_IMPL(k)	  (k<pmu_conf.num_dbrs)
111 
112 #define CTX_IS_ENABLED(c) 	((c)->ctx_flags.state == PFM_CTX_ENABLED)
113 #define CTX_OVFL_NOBLOCK(c)	((c)->ctx_fl_block == 0)
114 #define CTX_INHERIT_MODE(c)	((c)->ctx_fl_inherit)
115 #define CTX_HAS_SMPL(c)		((c)->ctx_psb != NULL)
116 /* XXX: does not support more than 64 PMDs */
117 #define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask)
118 #define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL)
119 
120 
121 #define CTX_USED_IBR(ctx,n) 	(ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64)
122 #define CTX_USED_DBR(ctx,n) 	(ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64)
123 #define CTX_USES_DBREGS(ctx)	(((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1)
124 
125 #ifdef CONFIG_SMP
126 #define GET_ACTIVATION()	pmu_owners[smp_processor_id()].activation_number
127 #define INC_ACTIVATION()	pmu_owners[smp_processor_id()].activation_number++
128 #define SET_ACTIVATION(c)	(c)->ctx_last_activation = GET_ACTIVATION()
129 #define SET_LAST_CPU(ctx, v)	(ctx)->ctx_last_cpu = (v)
130 #define GET_LAST_CPU(ctx)	(ctx)->ctx_last_cpu
131 #else /* !CONFIG_SMP */
132 #define SET_ACTIVATION(t)	do {} while(0)
133 #define GET_ACTIVATION(t)	do {} while(0)
134 #define INC_ACTIVATION(t)	do {} while(0)
135 #define SET_LAST_CPU(ctx, v)	do {} while(0)
136 #define GET_LAST_CPU(ctx)	do {} while(0)
137 #endif /* CONFIG_SMP */
138 
139 
140 #define PFM_INVALID_ACTIVATION	(~0UL)
141 
142 #define SET_PMU_OWNER(t)    do { pmu_owners[smp_processor_id()].owner = (t); } while(0)
143 #define PMU_OWNER()	    pmu_owners[smp_processor_id()].owner
144 
145 #define LOCK_PFS()	    spin_lock(&pfm_sessions.pfs_lock)
146 #define UNLOCK_PFS()	    spin_unlock(&pfm_sessions.pfs_lock)
147 
148 #define PFM_REG_RETFLAG_SET(flags, val)	do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0)
149 
150 #define TASK_PTREGS(t) (((struct pt_regs *)((unsigned long) (t) + IA64_STK_OFFSET))-1)
151 
152 /*
153  * cmp0 must be the value of pmc0
154  */
155 #define PMC0_HAS_OVFL(cmp0)  (cmp0 & ~0x1UL)
156 
157 
158 /*
159  * debugging
160  */
161 #define DBprintk(a) \
162 	do { \
163 		if (pfm_sysctl.debug >0) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
164 	} while (0)
165 
166 #define DBprintk_ovfl(a) \
167 	do { \
168 		if (pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
169 	} while (0)
170 
171 
172 
173 /*
174  * Architected PMC structure
175  */
176 typedef struct {
177 	unsigned long pmc_plm:4;	/* privilege level mask */
178 	unsigned long pmc_ev:1;		/* external visibility */
179 	unsigned long pmc_oi:1;		/* overflow interrupt */
180 	unsigned long pmc_pm:1;		/* privileged monitor */
181 	unsigned long pmc_ig1:1;	/* reserved */
182 	unsigned long pmc_es:8;		/* event select */
183 	unsigned long pmc_ig2:48;	/* reserved */
184 } pfm_monitor_t;
185 
186 /*
187  * There is one such data structure per perfmon context. It is used to describe the
188  * sampling buffer. It is to be shared among siblings whereas the pfm_context
189  * is not.
190  * Therefore we maintain a refcnt which is incremented on fork().
191  * This buffer is private to the kernel only the actual sampling buffer
192  * including its header are exposed to the user. This construct allows us to
193  * export the buffer read-write, if needed, without worrying about security
194  * problems.
195  */
196 typedef struct _pfm_smpl_buffer_desc {
197 	spinlock_t		psb_lock;	/* protection lock */
198 	unsigned long		psb_refcnt;	/* how many users for the buffer */
199 	int			psb_flags;	/* bitvector of flags (not yet used) */
200 
201 	void			*psb_addr;	/* points to location of first entry */
202 	unsigned long		psb_entries;	/* maximum number of entries */
203 	unsigned long		psb_size;	/* aligned size of buffer */
204 	unsigned long		psb_index;	/* next free entry slot XXX: must use the one in buffer */
205 	unsigned long		psb_entry_size;	/* size of each entry including entry header */
206 
207 	perfmon_smpl_hdr_t	*psb_hdr;	/* points to sampling buffer header */
208 
209 	struct _pfm_smpl_buffer_desc *psb_next;	/* next psb, used for rvfreeing of psb_hdr */
210 
211 } pfm_smpl_buffer_desc_t;
212 
213 /*
214  * psb_flags
215  */
216 #define PSB_HAS_VMA	0x1		/* a virtual mapping for the buffer exists */
217 
218 #define LOCK_PSB(p)	spin_lock(&(p)->psb_lock)
219 #define UNLOCK_PSB(p)	spin_unlock(&(p)->psb_lock)
220 
221 /*
222  * 64-bit software counter structure
223  */
224 typedef struct {
225 	u64 val;	/* virtual 64bit counter value */
226 	u64 lval;	/* last value */
227 	u64 long_reset;	/* reset value on sampling overflow */
228 	u64 short_reset;/* reset value on overflow */
229 	u64 reset_pmds[4]; /* which other pmds to reset when this counter overflows */
230 	u64 seed;	/* seed for random-number generator */
231 	u64 mask;	/* mask for random-number generator */
232 	unsigned int flags; /* notify/do not notify */
233 } pfm_counter_t;
234 
235 /*
236  * perfmon context. One per process, is cloned on fork() depending on
237  * inheritance flags
238  */
239 typedef struct {
240 	unsigned int state:1;		/* 0=disabled, 1=enabled */
241 	unsigned int inherit:2;		/* inherit mode */
242 	unsigned int block:1;		/* when 1, task will blocked on user notifications */
243 	unsigned int system:1;		/* do system wide monitoring */
244 	unsigned int frozen:1;		/* pmu must be kept frozen on ctxsw in */
245 	unsigned int protected:1;	/* allow access to creator of context only */
246 	unsigned int using_dbreg:1;	/* using range restrictions (debug registers) */
247 	unsigned int excl_idle:1;	/* exclude idle task in system wide session */
248 	unsigned int unsecure:1;	/* sp = 0 for non self-monitored task */
249 	unsigned int reserved:22;
250 } pfm_context_flags_t;
251 
252 /*
253  * perfmon context: encapsulates all the state of a monitoring session
254  * XXX: probably need to change layout
255  */
256 typedef struct pfm_context {
257 	pfm_smpl_buffer_desc_t	*ctx_psb;		/* sampling buffer, if any */
258 	unsigned long		ctx_smpl_vaddr;		/* user level virtual address of smpl buffer */
259 
260 	spinlock_t		ctx_lock;
261 	pfm_context_flags_t	ctx_flags;		/* block/noblock */
262 
263 	struct task_struct	*ctx_notify_task;	/* who to notify on overflow */
264 	struct task_struct	*ctx_owner;		/* pid of creator (debug) */
265 
266 	unsigned long		ctx_ovfl_regs[4];	/* which registers overflowed (notification) */
267 	unsigned long		ctx_smpl_regs[4];	/* which registers to record on overflow */
268 
269 	struct semaphore	ctx_restart_sem;   	/* use for blocking notification mode */
270 
271 	unsigned long		ctx_used_pmds[4];	/* bitmask of PMD used                 */
272 	unsigned long		ctx_reload_pmds[4];	/* bitmask of PMD to reload on ctxsw   */
273 
274 	unsigned long		ctx_used_pmcs[4];	/* bitmask PMC used by context         */
275 	unsigned long		ctx_reload_pmcs[4];	/* bitmask of PMC to reload on ctxsw   */
276 
277 	unsigned long		ctx_used_ibrs[4];	/* bitmask of used IBR (speedup ctxsw) */
278 	unsigned long		ctx_used_dbrs[4];	/* bitmask of used DBR (speedup ctxsw) */
279 
280 	pfm_counter_t		ctx_soft_pmds[IA64_NUM_PMD_REGS]; /* XXX: size should be dynamic */
281 
282 	u64			ctx_saved_psr;		/* copy of psr used for lazy ctxsw */
283 	unsigned long		ctx_saved_cpus_allowed;	/* copy of the task cpus_allowed (system wide) */
284 	unsigned long		ctx_last_activation;	/* context last activation number for last_cpu */
285 	unsigned int		ctx_last_cpu;		/* CPU id of current or last CPU used (SMP only) */
286 	unsigned int		ctx_cpu;		/* cpu to which perfmon is applied (system wide) */
287 
288 	struct tasklet_struct   ctx_tasklet;		/* used for sending signal-based notifications */
289 } pfm_context_t;
290 
291 #define PFM_GET_CTX(t)	((pfm_context_t *)(t)->thread.pfm_context)
292 #define LOCK_CTX(ctx)	spin_lock(&(ctx)->ctx_lock)
293 #define UNLOCK_CTX(ctx)	spin_unlock(&(ctx)->ctx_lock)
294 
295 #define ctx_fl_inherit		ctx_flags.inherit
296 #define ctx_fl_block		ctx_flags.block
297 #define ctx_fl_system		ctx_flags.system
298 #define ctx_fl_frozen		ctx_flags.frozen
299 #define ctx_fl_protected	ctx_flags.protected
300 #define ctx_fl_using_dbreg	ctx_flags.using_dbreg
301 #define ctx_fl_excl_idle	ctx_flags.excl_idle
302 #define ctx_fl_unsecure		ctx_flags.unsecure
303 
304 /*
305  * global information about all sessions
306  * mostly used to synchronize between system wide and per-process
307  */
308 typedef struct {
309 	spinlock_t		pfs_lock;		   /* lock the structure */
310 
311 	unsigned int 		pfs_task_sessions;	   /* number of per task sessions */
312 	unsigned int		pfs_sys_sessions;	   /* number of per system wide sessions */
313 	unsigned int		pfs_sys_use_dbregs;	   /* incremented when a system wide session uses debug regs */
314 	unsigned int		pfs_ptrace_use_dbregs;	   /* incremented when a process uses debug regs */
315 	struct task_struct	*pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */
316 } pfm_session_t;
317 
318 /*
319  * information about a PMC or PMD.
320  * dep_pmd[]: a bitmask of dependent PMD registers
321  * dep_pmc[]: a bitmask of dependent PMC registers
322  */
323 typedef struct {
324 	unsigned int		type;
325 	int			pm_pos;
326 	unsigned long		default_value;	/* power-on default value */
327 	unsigned long		reserved_mask;	/* bitmask of reserved bits */
328 	int			(*read_check)(struct task_struct *task, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
329 	int			(*write_check)(struct task_struct *task, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
330 	unsigned long		dep_pmd[4];
331 	unsigned long		dep_pmc[4];
332 } pfm_reg_desc_t;
333 
334 /* assume cnum is a valid monitor */
335 #define PMC_PM(cnum, val)	(((val) >> (pmu_conf.pmc_desc[cnum].pm_pos)) & 0x1)
336 #define PMC_WR_FUNC(cnum)	(pmu_conf.pmc_desc[cnum].write_check)
337 #define PMD_WR_FUNC(cnum)	(pmu_conf.pmd_desc[cnum].write_check)
338 #define PMD_RD_FUNC(cnum)	(pmu_conf.pmd_desc[cnum].read_check)
339 
340 /*
341  * This structure is initialized at boot time and contains
342  * a description of the PMU main characteristics.
343  */
344 typedef struct {
345 	unsigned int  disabled;		/* indicates if perfmon is working properly */
346 	unsigned long ovfl_val;		/* overflow value for generic counters   */
347 	unsigned long impl_pmcs[4];	/* bitmask of implemented PMCS */
348 	unsigned long impl_pmds[4];	/* bitmask of implemented PMDS */
349 	unsigned int  num_pmcs;		/* number of implemented PMCS */
350 	unsigned int  num_pmds;		/* number of implemented PMDS */
351 	unsigned int  num_ibrs;		/* number of implemented IBRS */
352 	unsigned int  num_dbrs;		/* number of implemented DBRS */
353 	unsigned int  num_counters;	/* number of PMD/PMC counters */
354 	pfm_reg_desc_t *pmc_desc;	/* detailed PMC register dependencies descriptions */
355 	pfm_reg_desc_t *pmd_desc;	/* detailed PMD register dependencies descriptions */
356 } pmu_config_t;
357 
358 /*
359  * perfmon command descriptions
360  */
361 typedef struct {
362 	int		(*cmd_func)(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
363 	int		cmd_flags;
364 	unsigned int	cmd_narg;
365 	size_t		cmd_argsize;
366 } pfm_cmd_desc_t;
367 
368 #define PFM_CMD_PID		0x1	/* command requires pid argument */
369 #define PFM_CMD_ARG_READ	0x2	/* command must read argument(s) */
370 #define PFM_CMD_ARG_RW		0x4	/* command must read/write argument(s) */
371 #define PFM_CMD_CTX		0x8	/* command needs a perfmon context */
372 #define PFM_CMD_NOCHK		0x10	/* command does not need to check task's state */
373 
374 #define PFM_CMD_IDX(cmd)	(cmd)
375 
376 #define PFM_CMD_IS_VALID(cmd)	((PFM_CMD_IDX(cmd) >= 0) && (PFM_CMD_IDX(cmd) < PFM_CMD_COUNT) \
377 				  && pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_func != NULL)
378 
379 #define PFM_CMD_USE_PID(cmd)	((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_PID) != 0)
380 #define PFM_CMD_READ_ARG(cmd)	((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_ARG_READ) != 0)
381 #define PFM_CMD_RW_ARG(cmd)	((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_ARG_RW) != 0)
382 #define PFM_CMD_USE_CTX(cmd)	((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_CTX) != 0)
383 #define PFM_CMD_CHK(cmd)	((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_NOCHK) == 0)
384 
385 #define PFM_CMD_ARG_MANY	-1 /* cannot be zero */
386 #define PFM_CMD_NARG(cmd)	(pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_narg)
387 #define PFM_CMD_ARG_SIZE(cmd)	(pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_argsize)
388 
389 typedef struct {
390 	int	debug;		/* turn on/off debugging via syslog */
391 	int	debug_ovfl;	/* turn on/off debug printk in overflow handler */
392 	int	fastctxsw;	/* turn on/off fast (unsecure) ctxsw */
393 } pfm_sysctl_t;
394 
395 typedef struct {
396 	unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */
397 	unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */
398 	unsigned long pfm_recorded_samples_count;
399 	unsigned long pfm_full_smpl_buffer_count; /* how many times the sampling buffer was full */
400 	char pad[SMP_CACHE_BYTES] ____cacheline_aligned;
401 } pfm_stats_t;
402 
403 /*
404  * perfmon internal variables
405  */
406 static pfm_session_t	pfm_sessions;	/* global sessions information */
407 static struct proc_dir_entry *perfmon_dir; /* for debug only */
408 static pfm_stats_t	pfm_stats[NR_CPUS];
409 static pfm_intr_handler_desc_t	*pfm_alternate_intr_handler;
410 
411 /* sysctl() controls */
412 static pfm_sysctl_t pfm_sysctl;
413 
414 static ctl_table pfm_ctl_table[]={
415 	{1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
416 	{2, "debug_ovfl", &pfm_sysctl.debug_ovfl, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
417 	{3, "fastctxsw", &pfm_sysctl.fastctxsw, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
418 	{ 0, },
419 };
420 static ctl_table pfm_sysctl_dir[] = {
421 	{1, "perfmon", NULL, 0, 0755, pfm_ctl_table, },
422  	{0,},
423 };
424 static ctl_table pfm_sysctl_root[] = {
425 	{1, "kernel", NULL, 0, 0755, pfm_sysctl_dir, },
426  	{0,},
427 };
428 static struct ctl_table_header *pfm_sysctl_header;
429 
430 static void pfm_vm_close(struct vm_area_struct * area);
431 
432 static struct vm_operations_struct pfm_vm_ops={
433 	.close =  pfm_vm_close
434 };
435 
436 /*
437  * keep track of task owning the PMU per CPU.
438  */
439 static struct {
440 	struct task_struct *owner;
441 	unsigned long	   activation_number;
442 	char 		   pad[SMP_CACHE_BYTES] ____cacheline_aligned;
443 } pmu_owners[NR_CPUS];
444 
445 
446 
447 /*
448  * forward declarations
449  */
450 static void pfm_reset_pmu(struct task_struct *);
451 #ifndef CONFIG_SMP
452 static unsigned long pfm_lazy_save_regs (struct task_struct *ta);
453 #endif
454 
455 #if   defined(CONFIG_ITANIUM)
456 #include "perfmon_itanium.h"
457 #elif defined(CONFIG_MCKINLEY)
458 #include "perfmon_mckinley.h"
459 #else
460 #include "perfmon_generic.h"
461 #endif
462 
463 static inline void
pfm_clear_psr_pp(void)464 pfm_clear_psr_pp(void)
465 {
466 	__asm__ __volatile__ ("rsm psr.pp;; srlz.i;;"::: "memory");
467 }
468 
469 static inline void
pfm_set_psr_pp(void)470 pfm_set_psr_pp(void)
471 {
472 	__asm__ __volatile__ ("ssm psr.pp;; srlz.i;;"::: "memory");
473 }
474 
475 static inline void
pfm_clear_psr_up(void)476 pfm_clear_psr_up(void)
477 {
478 	__asm__ __volatile__ ("rsm psr.up;; srlz.i;;"::: "memory");
479 }
480 
481 static inline void
pfm_set_psr_up(void)482 pfm_set_psr_up(void)
483 {
484 	__asm__ __volatile__ ("ssm psr.up;; srlz.i;;"::: "memory");
485 }
486 
487 static inline unsigned long
pfm_get_psr(void)488 pfm_get_psr(void)
489 {
490 	unsigned long tmp;
491 	__asm__ __volatile__ ("mov %0=psr;;": "=r"(tmp) :: "memory");
492 	return tmp;
493 }
494 
495 static inline void
pfm_set_psr_l(unsigned long val)496 pfm_set_psr_l(unsigned long val)
497 {
498 	__asm__ __volatile__ ("mov psr.l=%0;; srlz.i;;"::"r"(val): "memory");
499 }
500 
501 
502 
503 static inline void
pfm_freeze_pmu(void)504 pfm_freeze_pmu(void)
505 {
506 	ia64_set_pmc(0,1UL);
507 	ia64_srlz_d();
508 }
509 
510 static inline void
pfm_unfreeze_pmu(void)511 pfm_unfreeze_pmu(void)
512 {
513 	ia64_set_pmc(0,0UL);
514 	ia64_srlz_d();
515 }
516 
517 static inline void
pfm_restore_ibrs(unsigned long * ibrs,unsigned int nibrs)518 pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs)
519 {
520 	int i;
521 
522 	for (i=0; i < nibrs; i++) {
523 		ia64_set_ibr(i, ibrs[i]);
524 	}
525 	ia64_srlz_i();
526 }
527 
528 static inline void
pfm_restore_dbrs(unsigned long * dbrs,unsigned int ndbrs)529 pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs)
530 {
531 	int i;
532 
533 	for (i=0; i < ndbrs; i++) {
534 		ia64_set_dbr(i, dbrs[i]);
535 	}
536 	ia64_srlz_d();
537 }
538 
539 static inline void
pfm_restore_pmcs(unsigned long * pmcs,unsigned long mask)540 pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask)
541 {
542 	int i;
543 
544 	DBprintk(("mask=0x%lx\n", mask));
545 	for (i=0; mask; i++, mask>>=1) {
546 		if ((mask & 0x1) == 0) continue;
547 		ia64_set_pmc(i, pmcs[i]);
548 		DBprintk(("pmc[%d]=0x%lx\n", i, pmcs[i]));
549 	}
550 	ia64_srlz_d();
551 }
552 
553 static inline void
pfm_restore_pmds(unsigned long * pmds,unsigned long mask)554 pfm_restore_pmds(unsigned long *pmds, unsigned long mask)
555 {
556 	int i;
557 	unsigned long val, ovfl_val = pmu_conf.ovfl_val;
558 
559 	DBprintk(("mask=0x%lx\n", mask));
560 	for (i=0; mask; i++, mask>>=1) {
561 		if ((mask & 0x1) == 0) continue;
562 		val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i];
563 		ia64_set_pmd(i, val);
564 		DBprintk(("pmd[%d]=0x%lx\n", i, val));
565 	}
566 	ia64_srlz_d();
567 }
568 
569 static inline void
pfm_save_pmds(unsigned long * pmds,unsigned long mask)570 pfm_save_pmds(unsigned long *pmds, unsigned long mask)
571 {
572 	int i;
573 
574 	ia64_srlz_d();
575 
576 	for (i=0; mask; i++, mask>>=1) {
577 		if (mask & 0x1) pmds[i] = ia64_get_pmd(i);
578 	}
579 }
580 
581 static inline unsigned long
pfm_read_soft_counter(pfm_context_t * ctx,int i)582 pfm_read_soft_counter(pfm_context_t *ctx, int i)
583 {
584 	return ctx->ctx_soft_pmds[i].val + (ia64_get_pmd(i) & pmu_conf.ovfl_val);
585 }
586 
587 static inline void
pfm_write_soft_counter(pfm_context_t * ctx,int i,unsigned long val)588 pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val)
589 {
590 	ctx->ctx_soft_pmds[i].val = val  & ~pmu_conf.ovfl_val;
591 	/*
592 	 * writing to unimplemented part is ignore, so we do not need to
593 	 * mask off top part
594 	 */
595 	ia64_set_pmd(i, val & pmu_conf.ovfl_val);
596 }
597 
598 /*
599  * Generates a unique (per CPU) timestamp
600  */
601 static inline unsigned long
pfm_get_stamp(void)602 pfm_get_stamp(void)
603 {
604 	/*
605 	 * XXX: must find something more efficient
606 	 */
607 	return ia64_get_itc();
608 }
609 
610 /* Here we want the physical address of the memory.
611  * This is used when initializing the contents of the
612  * area and marking the pages as reserved.
613  */
614 static inline unsigned long
pfm_kvirt_to_pa(unsigned long adr)615 pfm_kvirt_to_pa(unsigned long adr)
616 {
617 	__u64 pa = ia64_tpa(adr);
618 	//DBprintk(("kv2pa(%lx-->%lx)\n", adr, pa));
619 	return pa;
620 }
621 
622 static void *
pfm_rvmalloc(unsigned long size)623 pfm_rvmalloc(unsigned long size)
624 {
625 	void *mem;
626 	unsigned long adr, page;
627 
628 	mem=vmalloc(size);
629 	if (mem) {
630 		//printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem);
631 		memset(mem, 0, size); /* Clear the ram out, no junk to the user */
632 		adr=(unsigned long) mem;
633 		while (size > 0) {
634 			page = pfm_kvirt_to_pa(adr);
635 			mem_map_reserve(virt_to_page(__va(page)));
636 			adr  += PAGE_SIZE;
637 			size -= PAGE_SIZE;
638 		}
639 	}
640 	return mem;
641 }
642 
643 static void
pfm_rvfree(void * mem,unsigned long size)644 pfm_rvfree(void *mem, unsigned long size)
645 {
646 	unsigned long adr, page = 0;
647 
648 	if (mem) {
649 		adr=(unsigned long) mem;
650 		while (size > 0) {
651 			page = pfm_kvirt_to_pa(adr);
652 			mem_map_unreserve(virt_to_page(__va(page)));
653 			adr+=PAGE_SIZE;
654 			size-=PAGE_SIZE;
655 		}
656 		vfree(mem);
657 	}
658 	return;
659 }
660 
661 /*
662  * This function gets called from mm/mmap.c:exit_mmap() only when there is a sampling buffer
663  * attached to the context AND the current task has a mapping for it, i.e., it is the original
664  * creator of the context.
665  *
666  * This function is used to remember the fact that the vma describing the sampling buffer
667  * has now been removed. It can only be called when no other tasks share the same mm context.
668  *
669  */
670 static void
pfm_vm_close(struct vm_area_struct * vma)671 pfm_vm_close(struct vm_area_struct *vma)
672 {
673 	pfm_smpl_buffer_desc_t *psb = (pfm_smpl_buffer_desc_t *)vma->vm_private_data;
674 
675 	if (psb == NULL) {
676 		printk(KERN_DEBUG "perfmon: psb is null in [%d]\n", current->pid);
677 		return;
678 	}
679 	/*
680 	 * Add PSB to list of buffers to free on release_thread() when no more users
681 	 *
682 	 * This call is safe because, once the count is zero is cannot be modified anymore.
683 	 * This is not because there is no more user of the mm context, that the sampling
684 	 * buffer is not being used anymore outside of this task. In fact, it can still
685 	 * be accessed from within the kernel by another task (such as the monitored task).
686 	 *
687 	 * Therefore, we only move the psb into the list of buffers to free when we know
688 	 * nobody else is using it.
689 	 * The linked list if independent of the perfmon context, because in the case of
690 	 * multi-threaded processes, the last thread may not have been involved with
691 	 * monitoring however it will be the one removing the vma and it should therefore
692 	 * also remove the sampling buffer. This buffer cannot be removed until the vma
693 	 * is removed.
694 	 *
695 	 * This function cannot remove the buffer from here, because exit_mmap() must first
696 	 * complete. Given that there is no other vma related callback in the generic code,
697 	 * we have created our own with the linked list of sampling buffers to free. The list
698 	 * is part of the thread structure. In release_thread() we check if the list is
699 	 * empty. If not we call into perfmon to free the buffer and psb. That is the only
700 	 * way to ensure a safe deallocation of the sampling buffer which works when
701 	 * the buffer is shared between distinct processes or with multi-threaded programs.
702 	 *
703 	 * We need to lock the psb because the refcnt test and flag manipulation must
704 	 * looked like an atomic operation vis a vis pfm_context_exit()
705 	 */
706 	LOCK_PSB(psb);
707 
708 	if (psb->psb_refcnt == 0) {
709 
710 		psb->psb_next = current->thread.pfm_smpl_buf_list;
711 		current->thread.pfm_smpl_buf_list = psb;
712 
713 		DBprintk(("[%d] add smpl @%p size %lu to smpl_buf_list psb_flags=0x%x\n",
714 			current->pid, psb->psb_hdr, psb->psb_size, psb->psb_flags));
715 	}
716 	DBprintk(("[%d] clearing psb_flags=0x%x smpl @%p size %lu\n",
717 			current->pid, psb->psb_flags, psb->psb_hdr, psb->psb_size));
718 	/*
719 	 * decrement the number vma for the buffer
720 	 */
721 	psb->psb_flags &= ~PSB_HAS_VMA;
722 
723 	UNLOCK_PSB(psb);
724 }
725 
726 /*
727  * This function is called from pfm_destroy_context() and also from pfm_inherit()
728  * to explicitely remove the sampling buffer mapping from the user level address space.
729  */
730 static int
pfm_remove_smpl_mapping(struct task_struct * task)731 pfm_remove_smpl_mapping(struct task_struct *task)
732 {
733 	pfm_context_t *ctx = task->thread.pfm_context;
734 	pfm_smpl_buffer_desc_t *psb;
735 	int r;
736 
737 	/*
738 	 * some sanity checks first
739 	 */
740 	if (ctx == NULL || task->mm == NULL || ctx->ctx_smpl_vaddr == 0 || ctx->ctx_psb == NULL) {
741 		printk(KERN_DEBUG "perfmon: invalid context mm=%p\n", task->mm);
742 		return -1;
743 	}
744 	psb = ctx->ctx_psb;
745 
746 	down_write(&task->mm->mmap_sem);
747 
748 	r = do_munmap(task->mm, ctx->ctx_smpl_vaddr, psb->psb_size);
749 
750 	up_write(&task->mm->mmap_sem);
751 	if (r !=0) {
752 		printk(KERN_DEBUG "perfmon: pid %d unable to unmap sampling buffer "
753 		       "@0x%lx size=%ld\n", task->pid, ctx->ctx_smpl_vaddr, psb->psb_size);
754 	}
755 
756 	DBprintk(("[%d] do_unmap(0x%lx, %ld)=%d refcnt=%lu psb_flags=0x%x\n",
757 		task->pid, ctx->ctx_smpl_vaddr, psb->psb_size, r, psb->psb_refcnt, psb->psb_flags));
758 
759 	return 0;
760 }
761 
762 static pfm_context_t *
pfm_context_alloc(void)763 pfm_context_alloc(void)
764 {
765 	pfm_context_t *ctx;
766 
767 	/* allocate context descriptor */
768 	ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL);
769 	if (ctx) memset(ctx, 0, sizeof(pfm_context_t));
770 
771 	return ctx;
772 }
773 
774 static void
pfm_context_free(pfm_context_t * ctx)775 pfm_context_free(pfm_context_t *ctx)
776 {
777 	if (ctx) {
778 		DBprintk(("kill tasklet for ctx %p\n", ctx));
779 
780 		tasklet_kill(&ctx->ctx_tasklet);
781 
782 		DBprintk(("free ctx @%p\n", ctx));
783 		kfree(ctx);
784 	}
785 }
786 
787 static int
pfm_remap_buffer(unsigned long buf,unsigned long addr,unsigned long size)788 pfm_remap_buffer(unsigned long buf, unsigned long addr, unsigned long size)
789 {
790 	unsigned long page;
791 
792 	DBprintk(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size));
793 
794 	while (size > 0) {
795 		page = pfm_kvirt_to_pa(buf);
796 
797 		if (remap_page_range(addr, page, PAGE_SIZE, PAGE_READONLY)) return -ENOMEM;
798 
799 		addr  += PAGE_SIZE;
800 		buf   += PAGE_SIZE;
801 		size  -= PAGE_SIZE;
802 	}
803 	return 0;
804 }
805 
806 /*
807  * counts the number of PMDS to save per entry.
808  * This code is generic enough to accomodate more than 64 PMDS when they become available
809  */
810 static unsigned long
pfm_smpl_entry_size(unsigned long * which,unsigned long size)811 pfm_smpl_entry_size(unsigned long *which, unsigned long size)
812 {
813 	unsigned long res = 0;
814 	int i;
815 
816 	for (i=0; i < size; i++, which++) res += hweight64(*which);
817 
818 	DBprintk(("weight=%ld\n", res));
819 
820 	return res;
821 }
822 
823 /*
824  * Allocates the sampling buffer and remaps it into caller's address space
825  */
826 static int
pfm_smpl_buffer_alloc(pfm_context_t * ctx,unsigned long * which_pmds,unsigned long entries,void ** user_vaddr)827 pfm_smpl_buffer_alloc(pfm_context_t *ctx, unsigned long *which_pmds, unsigned long entries,
828 		      void **user_vaddr)
829 {
830 	struct mm_struct *mm = current->mm;
831 	struct vm_area_struct *vma = NULL;
832 	unsigned long size, regcount;
833 	void *smpl_buf;
834 	pfm_smpl_buffer_desc_t *psb;
835 
836 
837 	/* note that regcount might be 0, in this case only the header for each
838 	 * entry will be recorded.
839 	 */
840 	regcount = pfm_smpl_entry_size(which_pmds, 1);
841 
842 	if ((sizeof(perfmon_smpl_hdr_t)+ entries*sizeof(perfmon_smpl_entry_t)) <= entries) {
843 		DBprintk(("requested entries %lu is too big\n", entries));
844 		return -EINVAL;
845 	}
846 
847 	/*
848 	 * 1 buffer hdr and for each entry a header + regcount PMDs to save
849 	 */
850 	size = PAGE_ALIGN(  sizeof(perfmon_smpl_hdr_t)
851 			  + entries * (sizeof(perfmon_smpl_entry_t) + regcount*sizeof(u64)));
852 
853 	DBprintk(("sampling buffer size=%lu bytes\n", size));
854 
855 	/*
856 	 * check requested size to avoid Denial-of-service attacks
857 	 * XXX: may have to refine this test
858 	 * Check against address space limit.
859 	 *
860 	 * if ((mm->total_vm << PAGE_SHIFT) + len> current->rlim[RLIMIT_AS].rlim_cur)
861 	 * 	return -ENOMEM;
862 	 */
863 	if (size > current->rlim[RLIMIT_MEMLOCK].rlim_cur) return -EAGAIN;
864 
865 	/*
866 	 * We do the easy to undo allocations first.
867  	 *
868 	 * pfm_rvmalloc(), clears the buffer, so there is no leak
869 	 */
870 	smpl_buf = pfm_rvmalloc(size);
871 	if (smpl_buf == NULL) {
872 		DBprintk(("Can't allocate sampling buffer\n"));
873 		return -ENOMEM;
874 	}
875 
876 	DBprintk(("smpl_buf @%p\n", smpl_buf));
877 
878 	/* allocate sampling buffer descriptor now */
879 	psb = kmalloc(sizeof(*psb), GFP_KERNEL);
880 	if (psb == NULL) {
881 		DBprintk(("Can't allocate sampling buffer descriptor\n"));
882 		goto error_kmalloc;
883 	}
884 
885 	/* allocate vma */
886 	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
887 	if (!vma) {
888 		DBprintk(("Cannot allocate vma\n"));
889 		goto error_kmem;
890 	}
891 	memset(vma, 0, sizeof(*vma));
892 
893 	/*
894 	 * partially initialize the vma for the sampling buffer
895 	 *
896 	 * The VM_DONTCOPY flag is very important as it ensures that the mapping
897 	 * will never be inherited for any child process (via fork()) which is always
898 	 * what we want.
899 	 */
900 	vma->vm_mm	     = mm;
901 	vma->vm_flags	     = VM_READ| VM_MAYREAD |VM_RESERVED|VM_DONTCOPY;
902 	vma->vm_page_prot    = PAGE_READONLY; /* XXX may need to change */
903 	vma->vm_ops	     = &pfm_vm_ops; /* necesarry to get the close() callback */
904 	vma->vm_pgoff	     = 0;
905 	vma->vm_file	     = NULL;
906 	vma->vm_raend	     = 0;
907 	vma->vm_private_data = psb;	/* information needed by the pfm_vm_close() function */
908 
909 	/*
910 	 * Now we have everything we need and we can initialize
911 	 * and connect all the data structures
912 	 */
913 
914 	psb->psb_hdr	 = smpl_buf;
915 	psb->psb_addr    = ((char *)smpl_buf)+sizeof(perfmon_smpl_hdr_t); /* first entry */
916 	psb->psb_size    = size; /* aligned size */
917 	psb->psb_index   = 0;
918 	psb->psb_entries = entries;
919 	psb->psb_refcnt  = 1;
920 	psb->psb_flags   = PSB_HAS_VMA;
921 
922 	spin_lock_init(&psb->psb_lock);
923 
924 	/*
925 	 * XXX: will need to do cacheline alignment to avoid false sharing in SMP mode and
926 	 * multitask monitoring.
927 	 */
928 	psb->psb_entry_size = sizeof(perfmon_smpl_entry_t) + regcount*sizeof(u64);
929 
930 	DBprintk(("psb @%p entry_size=%ld hdr=%p addr=%p refcnt=%lu psb_flags=0x%x\n",
931 		  (void *)psb,psb->psb_entry_size, (void *)psb->psb_hdr,
932 		  (void *)psb->psb_addr, psb->psb_refcnt, psb->psb_flags));
933 
934 	/* initialize some of the fields of user visible buffer header */
935 	psb->psb_hdr->hdr_version    = PFM_SMPL_VERSION;
936 	psb->psb_hdr->hdr_entry_size = psb->psb_entry_size;
937 	psb->psb_hdr->hdr_pmds[0]    = which_pmds[0];
938 
939 	/*
940 	 * Let's do the difficult operations next.
941 	 *
942 	 * now we atomically find some area in the address space and
943 	 * remap the buffer in it.
944 	 */
945 	down_write(&current->mm->mmap_sem);
946 
947 
948 	/* find some free area in address space, must have mmap sem held */
949 	vma->vm_start = get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS);
950 	if (vma->vm_start == 0UL) {
951 		DBprintk(("Cannot find unmapped area for size %ld\n", size));
952 		up_write(&current->mm->mmap_sem);
953 		goto error;
954 	}
955 	vma->vm_end = vma->vm_start + size;
956 
957 	DBprintk(("entries=%ld aligned size=%ld, unmapped @0x%lx\n", entries, size, vma->vm_start));
958 
959 	/* can only be applied to current, need to have the mm semaphore held when called */
960 	if (pfm_remap_buffer((unsigned long)smpl_buf, vma->vm_start, size)) {
961 		DBprintk(("Can't remap buffer\n"));
962 		up_write(&current->mm->mmap_sem);
963 		goto error;
964 	}
965 
966 	/*
967 	 * now insert the vma in the vm list for the process, must be
968 	 * done with mmap lock held
969 	 */
970 	insert_vm_struct(mm, vma);
971 
972 	mm->total_vm  += size >> PAGE_SHIFT;
973 
974 	up_write(&current->mm->mmap_sem);
975 
976 	/* store which PMDS to record */
977 	ctx->ctx_smpl_regs[0] = which_pmds[0];
978 
979 
980 	/* link to perfmon context */
981 	ctx->ctx_psb        = psb;
982 
983 	/*
984 	 * keep track of user level virtual address
985 	 */
986 	ctx->ctx_smpl_vaddr = *(unsigned long *)user_vaddr = vma->vm_start;
987 
988 	return 0;
989 
990 error:
991 	kmem_cache_free(vm_area_cachep, vma);
992 error_kmem:
993 	kfree(psb);
994 error_kmalloc:
995 	pfm_rvfree(smpl_buf, size);
996 	return -ENOMEM;
997 }
998 
999 static int
pfm_reserve_session(struct task_struct * task,int is_syswide,unsigned long cpu_mask)1000 pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned long cpu_mask)
1001 {
1002 	unsigned long m, undo_mask;
1003 	unsigned int n, i;
1004 
1005 	/*
1006 	 * validy checks on cpu_mask have been done upstream
1007 	 */
1008 	LOCK_PFS();
1009 
1010 	if (is_syswide) {
1011 		/*
1012 		 * cannot mix system wide and per-task sessions
1013 		 */
1014 		if (pfm_sessions.pfs_task_sessions > 0UL) {
1015 			DBprintk(("system wide not possible, %u conflicting task_sessions\n",
1016 			  	pfm_sessions.pfs_task_sessions));
1017 			goto abort;
1018 		}
1019 
1020 		m = cpu_mask; undo_mask = 0UL; n = 0;
1021 		DBprintk(("cpu_mask=0x%lx\n", cpu_mask));
1022 		for(i=0; m; i++, m>>=1) {
1023 
1024 			if ((m & 0x1) == 0UL) continue;
1025 
1026 			if (pfm_sessions.pfs_sys_session[i]) goto undo;
1027 
1028 			DBprintk(("reserving CPU%d currently on CPU%d\n", i, smp_processor_id()));
1029 
1030 			pfm_sessions.pfs_sys_session[i] = task;
1031 			undo_mask |= 1UL << i;
1032 			n++;
1033 		}
1034 		pfm_sessions.pfs_sys_sessions += n;
1035 	} else {
1036 		if (pfm_sessions.pfs_sys_sessions) goto abort;
1037 		pfm_sessions.pfs_task_sessions++;
1038 	}
1039 	UNLOCK_PFS();
1040 	return 0;
1041 undo:
1042 	DBprintk(("system wide not possible, conflicting session [%d] on CPU%d\n",
1043   		pfm_sessions.pfs_sys_session[i]->pid, i));
1044 
1045 	for(i=0; undo_mask; i++, undo_mask >>=1) {
1046 		pfm_sessions.pfs_sys_session[i] = NULL;
1047 	}
1048 abort:
1049 	UNLOCK_PFS();
1050 
1051 	return -EBUSY;
1052 
1053 }
1054 
1055 static int
pfm_unreserve_session(struct task_struct * task,int is_syswide,unsigned long cpu_mask)1056 pfm_unreserve_session(struct task_struct *task, int is_syswide, unsigned long cpu_mask)
1057 {
1058 	pfm_context_t *ctx;
1059 	unsigned long m;
1060 	unsigned int n, i;
1061 
1062 	ctx = task ? task->thread.pfm_context : NULL;
1063 
1064 	/*
1065 	 * validy checks on cpu_mask have been done upstream
1066 	 */
1067 	LOCK_PFS();
1068 
1069 	DBprintk(("[%d] sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu_mask=0x%lx\n",
1070 		task->pid,
1071 		pfm_sessions.pfs_sys_sessions,
1072 		pfm_sessions.pfs_task_sessions,
1073 		pfm_sessions.pfs_sys_use_dbregs,
1074 		is_syswide,
1075 		cpu_mask));
1076 
1077 
1078 	if (is_syswide) {
1079 		m = cpu_mask; n = 0;
1080 		for(i=0; m; i++, m>>=1) {
1081 			if ((m & 0x1) == 0UL) continue;
1082 			pfm_sessions.pfs_sys_session[i] = NULL;
1083 			n++;
1084 		}
1085 		/*
1086 		 * would not work with perfmon+more than one bit in cpu_mask
1087 		 */
1088 		if (ctx && ctx->ctx_fl_using_dbreg) {
1089 			if (pfm_sessions.pfs_sys_use_dbregs == 0) {
1090 				printk(KERN_DEBUG "perfmon: invalid release for [%d] "
1091 				       "sys_use_dbregs=0\n", task->pid);
1092 			} else {
1093 				pfm_sessions.pfs_sys_use_dbregs--;
1094 			}
1095 		}
1096 		pfm_sessions.pfs_sys_sessions -= n;
1097 
1098 		DBprintk(("CPU%d sys_sessions=%u\n",
1099 			smp_processor_id(), pfm_sessions.pfs_sys_sessions));
1100 	} else {
1101 		pfm_sessions.pfs_task_sessions--;
1102 		DBprintk(("[%d] task_sessions=%u\n",
1103 			task->pid, pfm_sessions.pfs_task_sessions));
1104 	}
1105 
1106 	UNLOCK_PFS();
1107 
1108 	return 0;
1109 }
1110 
1111 static void
pfm_send_notification_signal(unsigned long data)1112 pfm_send_notification_signal(unsigned long data)
1113 {
1114 	pfm_context_t *ctx = (pfm_context_t *)data;
1115 	struct siginfo si;
1116 	int ret;
1117 
1118 	DBprintk(("[%d] tasklet called\n", current->pid));
1119 
1120 	LOCK_CTX(ctx);
1121 
1122 	if (ctx->ctx_notify_task == NULL) {
1123 		printk(KERN_INFO "perfmon: tasklet lost notify_task\n");
1124 		goto nothing_to_do;
1125 	}
1126 	/* no leak */
1127 	memset(&si,0, sizeof(si));
1128 
1129 	si.si_addr        = NULL;
1130 	si.si_pid         = current->pid; /* irrelevant */
1131 	si.si_signo       = SIGPROF;
1132 	si.si_code        = PROF_OVFL; /* indicates a perfmon SIGPROF signal */
1133 	si.si_pfm_ovfl[0] = ctx->ctx_ovfl_regs[0];
1134 
1135 	if (ctx->ctx_notify_task != current) read_lock(&tasklist_lock);
1136 
1137 	DBprintk_ovfl(("[%d] tasklet sending notification to [%d]\n", current->pid, ctx->ctx_notify_task->pid));
1138 
1139 	ret = send_sig_info(SIGPROF, &si, ctx->ctx_notify_task);
1140 	if (ret != 0) printk(KERN_ERR "send_sig_info(process %d, SIGPROF)=%d\n", ctx->ctx_notify_task->pid, ret);
1141 
1142 	/*
1143 	 * now undo the protections in order
1144 	 */
1145 	if (ctx->ctx_notify_task != current) read_unlock(&tasklist_lock);
1146 nothing_to_do:
1147 	UNLOCK_CTX(ctx);
1148 }
1149 
1150 /*
1151  * XXX: do something better here
1152  */
1153 static int
pfm_bad_permissions(struct task_struct * task)1154 pfm_bad_permissions(struct task_struct *task)
1155 {
1156 	/* stolen from bad_signal() */
1157 	return (current->session != task->session)
1158 	    && (current->euid ^ task->suid) && (current->euid ^ task->uid)
1159 	    && (current->uid ^ task->suid) && (current->uid ^ task->uid);
1160 }
1161 
1162 static int
pfx_is_sane(struct task_struct * task,pfarg_context_t * pfx)1163 pfx_is_sane(struct task_struct *task, pfarg_context_t *pfx)
1164 {
1165 	unsigned long smpl_pmds = pfx->ctx_smpl_regs[0];
1166 	int ctx_flags;
1167 	int cpu;
1168 
1169 	/* valid signal */
1170 
1171 	/* cannot send to process 1, 0 means do not notify */
1172 	if (pfx->ctx_notify_pid == 1) {
1173 		DBprintk(("invalid notify_pid %d\n", pfx->ctx_notify_pid));
1174 		return -EINVAL;
1175 	}
1176 	ctx_flags = pfx->ctx_flags;
1177 
1178 	if ((ctx_flags & PFM_FL_INHERIT_MASK) == (PFM_FL_INHERIT_ONCE|PFM_FL_INHERIT_ALL)) {
1179 		DBprintk(("invalid inherit mask 0x%x\n",ctx_flags & PFM_FL_INHERIT_MASK));
1180 		return -EINVAL;
1181 	}
1182 
1183 	if (ctx_flags & PFM_FL_SYSTEM_WIDE) {
1184 		DBprintk(("cpu_mask=0x%lx\n", pfx->ctx_cpu_mask));
1185 		/*
1186 		 * cannot block in this mode
1187 		 */
1188 		if (ctx_flags & PFM_FL_NOTIFY_BLOCK) {
1189 			DBprintk(("cannot use blocking mode when in system wide monitoring\n"));
1190 			return -EINVAL;
1191 		}
1192 		/*
1193 		 * must only have one bit set in the CPU mask
1194 		 */
1195 		if (hweight64(pfx->ctx_cpu_mask) != 1UL) {
1196 			DBprintk(("invalid CPU mask specified\n"));
1197 			return -EINVAL;
1198 		}
1199 		/*
1200 		 * and it must be a valid CPU
1201 		 */
1202 		cpu = ffz(~pfx->ctx_cpu_mask);
1203 		if (cpu_online(cpu) == 0) {
1204 			DBprintk(("CPU%d is not online\n", cpu));
1205 			return -EINVAL;
1206 		}
1207 		/*
1208 		 * check for pre-existing pinning, if conflicting reject
1209 		 */
1210 		if (task->cpus_allowed != ~0UL && (task->cpus_allowed & (1UL<<cpu)) == 0) {
1211 			DBprintk(("[%d] pinned on 0x%lx, mask for CPU%d \n", task->pid,
1212 				task->cpus_allowed, cpu));
1213 			return -EINVAL;
1214 		}
1215 
1216 	} else {
1217 		/*
1218 		 * must provide a target for the signal in blocking mode even when
1219 		 * no counter is configured with PFM_FL_REG_OVFL_NOTIFY
1220 		 */
1221 		if ((ctx_flags & PFM_FL_NOTIFY_BLOCK) && pfx->ctx_notify_pid == 0) {
1222 			DBprintk(("must have notify_pid when blocking for [%d]\n", task->pid));
1223 			return -EINVAL;
1224 		}
1225 #if 0
1226 		if ((ctx_flags & PFM_FL_NOTIFY_BLOCK) && pfx->ctx_notify_pid == task->pid) {
1227 			DBprintk(("cannot notify self when blocking for [%d]\n", task->pid));
1228 			return -EINVAL;
1229 		}
1230 #endif
1231 	}
1232 	/* verify validity of smpl_regs */
1233 	if ((smpl_pmds & pmu_conf.impl_pmds[0]) != smpl_pmds) {
1234 		DBprintk(("invalid smpl_regs 0x%lx\n", smpl_pmds));
1235 		return -EINVAL;
1236 	}
1237 	/* probably more to add here */
1238 
1239 	return 0;
1240 }
1241 
1242 static int
pfm_context_create(struct task_struct * task,pfm_context_t * ctx,void * req,int count,struct pt_regs * regs)1243 pfm_context_create(struct task_struct *task, pfm_context_t *ctx, void *req, int count,
1244 		   struct pt_regs *regs)
1245 {
1246 	pfarg_context_t tmp;
1247 	void *uaddr = NULL;
1248 	int ret;
1249 	int ctx_flags;
1250 	pid_t notify_pid;
1251 
1252 	/* a context has already been defined */
1253 	if (ctx) return -EBUSY;
1254 
1255 	/*
1256 	 * not yet supported
1257 	 */
1258 	if (task != current) return -EINVAL;
1259 
1260 	if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
1261 
1262 	ret = pfx_is_sane(task, &tmp);
1263 	if (ret < 0) return ret;
1264 
1265 	ctx_flags = tmp.ctx_flags;
1266 
1267 	ret = pfm_reserve_session(task, ctx_flags & PFM_FL_SYSTEM_WIDE, tmp.ctx_cpu_mask);
1268 	if (ret) goto abort;
1269 
1270 	ret = -ENOMEM;
1271 
1272 	ctx = pfm_context_alloc();
1273 	if (!ctx) goto error;
1274 
1275 	/* record the creator (important for inheritance) */
1276 	ctx->ctx_owner = current;
1277 
1278 	notify_pid = tmp.ctx_notify_pid;
1279 
1280 	spin_lock_init(&ctx->ctx_lock);
1281 
1282 	if (notify_pid == current->pid) {
1283 
1284 		ctx->ctx_notify_task = current;
1285 		task->thread.pfm_context = ctx;
1286 
1287 	} else if (notify_pid!=0) {
1288 		struct task_struct *notify_task;
1289 
1290 		read_lock(&tasklist_lock);
1291 
1292 		notify_task = find_task_by_pid(notify_pid);
1293 
1294 		if (notify_task) {
1295 
1296 			ret = -EPERM;
1297 
1298 			/*
1299 			 * check if we can send this task a signal
1300 			 */
1301 			if (pfm_bad_permissions(notify_task)) {
1302 				read_unlock(&tasklist_lock);
1303 				goto buffer_error;
1304 			}
1305 
1306 			/*
1307 		 	 * make visible
1308 		 	 * must be done inside critical section
1309 		 	 *
1310 		 	 * if the initialization does not go through it is still
1311 		 	 * okay because child will do the scan for nothing which
1312 		 	 * won't hurt.
1313 		 	 */
1314 			task->thread.pfm_context = ctx;
1315 
1316 			/*
1317 			 * will cause task to check on exit for monitored
1318 			 * processes that would notify it. see release_thread()
1319 			 * Note: the scan MUST be done in release thread, once the
1320 			 * task has been detached from the tasklist otherwise you are
1321 			 * exposed to race conditions.
1322 			 */
1323 			atomic_add(1, &ctx->ctx_notify_task->thread.pfm_notifiers_check);
1324 
1325 			ctx->ctx_notify_task = notify_task;
1326 		}
1327 		read_unlock(&tasklist_lock);
1328 	}
1329 
1330 	/*
1331 	 * notification process does not exist
1332 	 */
1333 	if (notify_pid != 0 && ctx->ctx_notify_task == NULL) {
1334 		ret = -EINVAL;
1335 		goto buffer_error;
1336 	}
1337 
1338 	if (tmp.ctx_smpl_entries) {
1339 		DBprintk(("sampling entries=%lu\n",tmp.ctx_smpl_entries));
1340 
1341 		ret = pfm_smpl_buffer_alloc(ctx, tmp.ctx_smpl_regs,
1342 						 tmp.ctx_smpl_entries, &uaddr);
1343 		if (ret<0) goto buffer_error;
1344 
1345 		tmp.ctx_smpl_vaddr = uaddr;
1346 	}
1347 	/* initialization of context's flags */
1348 	ctx->ctx_fl_inherit   = ctx_flags & PFM_FL_INHERIT_MASK;
1349 	ctx->ctx_fl_block     = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
1350 	ctx->ctx_fl_system    = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
1351 	ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0;
1352 	ctx->ctx_fl_unsecure  = (ctx_flags & PFM_FL_UNSECURE) ? 1: 0;
1353 	ctx->ctx_fl_frozen    = 0;
1354 	/*
1355 	 * setting this flag to 0 here means, that the creator or the task that the
1356 	 * context is being attached are granted access. Given that a context can only
1357 	 * be created for the calling process this, in effect only allows the creator
1358 	 * to access the context. See pfm_protect() for more.
1359 	 */
1360 	ctx->ctx_fl_protected = 0;
1361 
1362 	/* for system wide mode only (only 1 bit set) */
1363 	ctx->ctx_cpu = ffz(~tmp.ctx_cpu_mask);
1364 
1365 	/* SMP only, means no CPU */
1366 	ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
1367 	SET_LAST_CPU(ctx, -1);
1368 
1369 	sema_init(&ctx->ctx_restart_sem, 0); /* init this semaphore to locked */
1370 
1371 	/*
1372 	 * initialize tasklet for signal notifications
1373 	 *
1374 	 * ALL signal-based (or any notification using data structures
1375 	 * external to perfmon) MUST use tasklets to avoid lock contentions
1376 	 * when a signal has to be sent for overflow interrupt handler.
1377 	 */
1378 	tasklet_init(&ctx->ctx_tasklet, pfm_send_notification_signal, (unsigned long)ctx);
1379 
1380 	if (__copy_to_user(req, &tmp, sizeof(tmp))) {
1381 		ret = -EFAULT;
1382 		goto buffer_error;
1383 	}
1384 
1385 	DBprintk(("context=%p, pid=%d notify_task=%p\n",
1386 			(void *)ctx, task->pid, ctx->ctx_notify_task));
1387 
1388 	DBprintk(("context=%p, pid=%d flags=0x%x inherit=%d block=%d system=%d excl_idle=%d unsecure=%d\n",
1389 			(void *)ctx, task->pid, ctx_flags, ctx->ctx_fl_inherit,
1390 			ctx->ctx_fl_block, ctx->ctx_fl_system,
1391 			ctx->ctx_fl_excl_idle,
1392 			ctx->ctx_fl_unsecure));
1393 
1394 	/*
1395 	 * when no notification is required, we can make this visible at the last moment
1396 	 */
1397 	if (notify_pid == 0) task->thread.pfm_context = ctx;
1398 	/*
1399 	 * pin task to CPU and force reschedule on exit to ensure
1400 	 * that when back to user level the task runs on the designated
1401 	 * CPU.
1402 	 */
1403 	if (ctx->ctx_fl_system) {
1404 		ctx->ctx_saved_cpus_allowed = task->cpus_allowed;
1405 		task->cpus_allowed = tmp.ctx_cpu_mask;
1406 		task->need_resched = 1;
1407 		DBprintk(("[%d] rescheduled allowed=0x%lx\n", task->pid, task->cpus_allowed));
1408 	}
1409 
1410 	return 0;
1411 
1412 buffer_error:
1413 	pfm_context_free(ctx);
1414 error:
1415 	pfm_unreserve_session(task, ctx_flags & PFM_FL_SYSTEM_WIDE , tmp.ctx_cpu_mask);
1416 abort:
1417 	/* make sure we don't leave anything behind */
1418 	task->thread.pfm_context = NULL;
1419 
1420 	return ret;
1421 }
1422 
1423 static inline unsigned long
pfm_new_counter_value(pfm_counter_t * reg,int is_long_reset)1424 pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset)
1425 {
1426 	unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset;
1427 	unsigned long new_seed, old_seed = reg->seed, mask = reg->mask;
1428 	extern unsigned long carta_random32 (unsigned long seed);
1429 
1430 	if (reg->flags & PFM_REGFL_RANDOM) {
1431 		new_seed = carta_random32(old_seed);
1432 		val -= (old_seed & mask);	/* counter values are negative numbers! */
1433 		if ((mask >> 32) != 0)
1434 			/* construct a full 64-bit random value: */
1435 			new_seed |= carta_random32(old_seed >> 32) << 32;
1436 		reg->seed = new_seed;
1437 	}
1438 	reg->lval = val;
1439 	return val;
1440 }
1441 
1442 static void
pfm_reset_regs(pfm_context_t * ctx,unsigned long * ovfl_regs,int flag)1443 pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int flag)
1444 {
1445 	unsigned long mask = ovfl_regs[0];
1446 	unsigned long reset_others = 0UL;
1447 	unsigned long val;
1448 	int i, is_long_reset = (flag == PFM_PMD_LONG_RESET);
1449 
1450 	/*
1451 	 * now restore reset value on sampling overflowed counters
1452 	 */
1453 	mask >>= PMU_FIRST_COUNTER;
1454 	for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) {
1455 		if (mask & 0x1) {
1456 			val = pfm_new_counter_value(ctx->ctx_soft_pmds + i, is_long_reset);
1457 			reset_others |= ctx->ctx_soft_pmds[i].reset_pmds[0];
1458 
1459 			DBprintk_ovfl(("[%d] %s reset soft_pmd[%d]=%lx\n", current->pid,
1460 				  is_long_reset ? "long" : "short", i, val));
1461 
1462 			/* upper part is ignored on rval */
1463 			pfm_write_soft_counter(ctx, i, val);
1464 		}
1465 	}
1466 
1467 	/*
1468 	 * Now take care of resetting the other registers
1469 	 */
1470 	for(i = 0; reset_others; i++, reset_others >>= 1) {
1471 
1472 		if ((reset_others & 0x1) == 0) continue;
1473 
1474 		val = pfm_new_counter_value(ctx->ctx_soft_pmds + i, is_long_reset);
1475 
1476 		if (PMD_IS_COUNTING(i)) {
1477 			pfm_write_soft_counter(ctx, i, val);
1478 		} else {
1479 			ia64_set_pmd(i, val);
1480 		}
1481 		DBprintk_ovfl(("[%d] %s reset_others pmd[%d]=%lx\n", current->pid,
1482 			  is_long_reset ? "long" : "short", i, val));
1483 	}
1484 	ia64_srlz_d();
1485 }
1486 
1487 static int
pfm_write_pmcs(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)1488 pfm_write_pmcs(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
1489 {
1490 	struct thread_struct *th = &task->thread;
1491 	pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
1492 	unsigned long value, reset_pmds;
1493 	unsigned int cnum, reg_flags, flags;
1494 	int is_monitor, is_counting;
1495 	int i, ret = -EINVAL;
1496 #define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z))
1497 
1498 	/* we don't quite support this right now */
1499 	if (task != current) return -EINVAL;
1500 
1501 	if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
1502 
1503 
1504 	/* XXX: ctx locking may be required here */
1505 
1506 	for (i = 0; i < count; i++, req++) {
1507 
1508 		if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
1509 
1510 		cnum       = tmp.reg_num;
1511 		reg_flags  = tmp.reg_flags;
1512 		value      = tmp.reg_value;
1513 		reset_pmds = tmp.reg_reset_pmds[0];
1514 		flags      = 0;
1515 
1516 		is_counting = PMC_IS_COUNTING(cnum);
1517 		is_monitor  = PMC_IS_MONITOR(cnum);
1518 
1519 		/*
1520 		 * we reject all non implemented PMC as well
1521 		 * as attempts to modify PMC[0-3] which are used
1522 		 * as status registers by the PMU
1523 		 */
1524 		if (!PMC_IS_IMPL(cnum) || cnum < 4) {
1525 			DBprintk(("pmc[%u] is unimplemented or invalid\n", cnum));
1526 			goto error;
1527 		}
1528 		/*
1529 		 * If the PMC is a monitor, then if the value is not the default:
1530 		 * 	- system-wide session: PMCx.pm=1 (privileged monitor)
1531 		 * 	- per-task           : PMCx.pm=0 (user monitor)
1532 		 */
1533 		if ((is_monitor || is_counting) && value != PMC_DFL_VAL(cnum) && PFM_CHECK_PMC_PM(ctx, cnum, value)) {
1534 			DBprintk(("pmc%u pmc_pm=%ld fl_system=%d\n",
1535 				cnum,
1536 				PMC_PM(cnum, value),
1537 				ctx->ctx_fl_system));
1538 			goto error;
1539 		}
1540 
1541 		if (is_counting) {
1542 			pfm_monitor_t *p = (pfm_monitor_t *)&value;
1543 			/*
1544 		 	 * enforce generation of overflow interrupt. Necessary on all
1545 		 	 * CPUs.
1546 		 	 */
1547 			p->pmc_oi = 1;
1548 
1549 			if (reg_flags & PFM_REGFL_OVFL_NOTIFY) {
1550 				/*
1551 			 	 * must have a target for the signal
1552 			 	 */
1553 				if (ctx->ctx_notify_task == NULL) {
1554 					DBprintk(("cannot set ovfl_notify: no notify_task\n"));
1555 					goto error;
1556 				}
1557 				flags |= PFM_REGFL_OVFL_NOTIFY;
1558 			}
1559 
1560 			if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM;
1561 
1562 			/* verify validity of reset_pmds */
1563 			if ((reset_pmds & pmu_conf.impl_pmds[0]) != reset_pmds) {
1564 				DBprintk(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum));
1565 				goto error;
1566 			}
1567 		} else if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) {
1568 				DBprintk(("cannot set ovfl_notify or random on pmc%u\n", cnum));
1569 				goto error;
1570 		}
1571 
1572 		/*
1573 		 * execute write checker, if any
1574 		 */
1575 		if (PMC_WR_FUNC(cnum)) {
1576 			ret = PMC_WR_FUNC(cnum)(task, cnum, &value, regs);
1577 			if (ret) goto error;
1578 			ret = -EINVAL;
1579 		}
1580 
1581 		/*
1582 		 * no error on this register
1583 		 */
1584 		PFM_REG_RETFLAG_SET(tmp.reg_flags, 0);
1585 
1586 		/*
1587 		 * update register return value, abort all if problem during copy.
1588 		 * we only modify the reg_flags field. no check mode is fine because
1589 		 * access has been verified upfront in sys_perfmonctl().
1590 		 *
1591 		 * If this fails, then the software state is not modified
1592 		 */
1593 		if (__put_user(tmp.reg_flags, &req->reg_flags)) return -EFAULT;
1594 
1595 		/*
1596 		 * Now we commit the changes to the software state
1597 		 */
1598 
1599 		/*
1600 		 * full flag update each time a register is programmed
1601 		 */
1602 		ctx->ctx_soft_pmds[cnum].flags = flags;
1603 
1604 		if (is_counting) {
1605 			ctx->ctx_soft_pmds[cnum].reset_pmds[0] = reset_pmds;
1606 
1607 			/* mark all PMDS to be accessed as used */
1608 			CTX_USED_PMD(ctx, reset_pmds);
1609 		}
1610 
1611 		/*
1612 		 * Needed in case the user does not initialize the equivalent
1613 		 * PMD. Clearing is done in reset_pmu() so there is no possible
1614 		 * leak here.
1615 		 */
1616 		CTX_USED_PMD(ctx, pmu_conf.pmc_desc[cnum].dep_pmd[0]);
1617 
1618 		/*
1619 		 * keep copy the pmc, used for register reload
1620 		 */
1621 		th->pmc[cnum] = value;
1622 
1623 		ia64_set_pmc(cnum, value);
1624 
1625 		DBprintk(("[%d] pmc[%u]=0x%lx flags=0x%x used_pmds=0x%lx\n",
1626 			  task->pid, cnum, value,
1627 			  ctx->ctx_soft_pmds[cnum].flags,
1628 			  ctx->ctx_used_pmds[0]));
1629 
1630 	}
1631 
1632 	return 0;
1633 
1634 error:
1635 	PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL);
1636 
1637 	if (__put_user(tmp.reg_flags, &req->reg_flags)) ret = -EFAULT;
1638 
1639 	DBprintk(("[%d] pmc[%u]=0x%lx error %d\n", task->pid, cnum, value, ret));
1640 
1641 	return ret;
1642 }
1643 
1644 static int
pfm_write_pmds(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)1645 pfm_write_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
1646 {
1647 	pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
1648 	unsigned long value, hw_value;
1649 	unsigned int cnum;
1650 	int i;
1651 	int ret = -EINVAL;
1652 
1653 	/* we don't quite support this right now */
1654 	if (task != current) return -EINVAL;
1655 
1656 	/*
1657 	 * Cannot do anything before PMU is enabled
1658 	 */
1659 	if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
1660 
1661 	/* XXX: ctx locking may be required here */
1662 
1663 
1664 	for (i = 0; i < count; i++, req++) {
1665 
1666 		if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
1667 
1668 		cnum  = tmp.reg_num;
1669 		value = tmp.reg_value;
1670 
1671 		if (!PMD_IS_IMPL(cnum)) {
1672 			DBprintk(("pmd[%u] is unimplemented or invalid\n", cnum));
1673 			goto abort_mission;
1674 		}
1675 
1676 		/*
1677 		 * execute write checker, if any
1678 		 */
1679 		if (PMD_WR_FUNC(cnum)) {
1680 			unsigned long v = value;
1681 			ret = PMD_WR_FUNC(cnum)(task, cnum, &v, regs);
1682 			if (ret) goto abort_mission;
1683 			value = v;
1684 			ret = -EINVAL;
1685 		}
1686 		hw_value = value;
1687 		/*
1688 		 * no error on this register
1689 		 */
1690 		PFM_REG_RETFLAG_SET(tmp.reg_flags, 0);
1691 
1692 		if (__put_user(tmp.reg_flags, &req->reg_flags)) return -EFAULT;
1693 
1694 		/*
1695 		 * now commit changes to software state
1696 		 */
1697 
1698 		/* update virtualized (64bits) counter */
1699 		if (PMD_IS_COUNTING(cnum)) {
1700 			ctx->ctx_soft_pmds[cnum].lval = value;
1701 			ctx->ctx_soft_pmds[cnum].val  = value & ~pmu_conf.ovfl_val;
1702 
1703 			hw_value = value & pmu_conf.ovfl_val;
1704 
1705 			ctx->ctx_soft_pmds[cnum].long_reset  = tmp.reg_long_reset;
1706 			ctx->ctx_soft_pmds[cnum].short_reset = tmp.reg_short_reset;
1707 
1708 			ctx->ctx_soft_pmds[cnum].seed = tmp.reg_random_seed;
1709 			ctx->ctx_soft_pmds[cnum].mask = tmp.reg_random_mask;
1710 		}
1711 
1712 		/* keep track of what we use */
1713 		CTX_USED_PMD(ctx, pmu_conf.pmd_desc[(cnum)].dep_pmd[0]);
1714 
1715 		/* mark this register as used as well */
1716 		CTX_USED_PMD(ctx, RDEP(cnum));
1717 
1718 		/* writes to unimplemented part is ignored, so this is safe */
1719 		ia64_set_pmd(cnum, hw_value);
1720 
1721 		/* to go away */
1722 		ia64_srlz_d();
1723 
1724 		DBprintk(("[%d] pmd[%u]: value=0x%lx hw_value=0x%lx soft_pmd=0x%lx  short_reset=0x%lx "
1725 			  "long_reset=0x%lx hw_pmd=%lx notify=%c used_pmds=0x%lx reset_pmds=0x%lx psr=%d\n",
1726 				task->pid, cnum,
1727 				value, hw_value,
1728 				ctx->ctx_soft_pmds[cnum].val,
1729 				ctx->ctx_soft_pmds[cnum].short_reset,
1730 				ctx->ctx_soft_pmds[cnum].long_reset,
1731 				ia64_get_pmd(cnum) & pmu_conf.ovfl_val,
1732 				PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N',
1733 				ctx->ctx_used_pmds[0],
1734 				ctx->ctx_soft_pmds[cnum].reset_pmds[0], ia64_psr(regs)->sp));
1735 	}
1736 
1737 	return 0;
1738 
1739 abort_mission:
1740 	/*
1741 	 * for now, we have only one possibility for error
1742 	 */
1743 	PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL);
1744 
1745 	/*
1746 	 * we change the return value to EFAULT in case we cannot write register return code.
1747 	 * The caller first must correct this error, then a resubmission of the request will
1748 	 * eventually yield the EINVAL.
1749 	 */
1750 	if (__put_user(tmp.reg_flags, &req->reg_flags)) ret = -EFAULT;
1751 
1752 	DBprintk(("[%d] pmc[%u]=0x%lx ret %d\n", task->pid, cnum, value, ret));
1753 
1754 	return ret;
1755 }
1756 
1757 static int
pfm_read_pmds(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)1758 pfm_read_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
1759 {
1760 	struct thread_struct *th = &task->thread;
1761 	unsigned long val, lval;
1762 	pfarg_reg_t *req = (pfarg_reg_t *)arg;
1763 	unsigned int cnum, reg_flags = 0;
1764 	int i, ret = 0;
1765 #if __GNUC__ < 3
1766 	int foo;
1767 #endif
1768 
1769 	if (!CTX_IS_ENABLED(ctx)) {
1770 		DBprintk(("context for [%d] is disabled\n", task->pid));
1771 		return -EINVAL;
1772 	}
1773 
1774 	/*
1775 	 * XXX: MUST MAKE SURE WE DON"T HAVE ANY PENDING OVERFLOW BEFORE READING
1776 	 * This is required when the monitoring has been stoppped by user or kernel.
1777 	 * If it is still going on, then that's fine because we a re not guaranteed
1778 	 * to return an accurate value in this case.
1779 	 */
1780 
1781 	/* XXX: ctx locking may be required here */
1782 
1783 	/*
1784 	 * should we need to access the PMU, serialization is needed
1785 	 */
1786 	ia64_srlz_d();
1787 
1788 	for (i = 0; i < count; i++, req++) {
1789 
1790 #if __GNUC__ < 3
1791 		foo = __get_user(cnum, &req->reg_num);
1792 		if (foo) return -EFAULT;
1793 		foo = __get_user(reg_flags, &req->reg_flags);
1794 		if (foo) return -EFAULT;
1795 #else
1796 		if (__get_user(cnum, &req->reg_num)) return -EFAULT;
1797 		if (__get_user(reg_flags, &req->reg_flags)) return -EFAULT;
1798 #endif
1799 		lval = 0UL;
1800 
1801 		if (!PMD_IS_IMPL(cnum)) goto abort_mission;
1802 		/*
1803 		 * we can only read the register that we use. That includes
1804 		 * the one we explicitely initialize AND the one we want included
1805 		 * in the sampling buffer (smpl_regs).
1806 		 *
1807 		 * Having this restriction allows optimization in the ctxsw routine
1808 		 * without compromising security (leaks)
1809 		 */
1810 		if (!CTX_IS_USED_PMD(ctx, cnum)) goto abort_mission;
1811 
1812 		/*
1813 		 * we can access the registers directly only when task
1814 		 * is the OWNER of the local PMU. In SMP, this can
1815 		 * happen only when task == current. In addition
1816 		 * this can happen when task != currrent but
1817 		 * only in UP mode.
1818 		 */
1819 		if (task == PMU_OWNER()) {
1820 			val = ia64_get_pmd(cnum);
1821 			DBprintk(("reading pmd[%u]=0x%lx from hw\n", cnum, val));
1822 		} else {
1823 			/* context has been saved */
1824 			val = th->pmd[cnum];
1825 		}
1826 
1827 		if (PMD_IS_COUNTING(cnum)) {
1828 			/*
1829 			 * XXX: need to check for overflow
1830 			 */
1831 			val &= pmu_conf.ovfl_val;
1832 			val += ctx->ctx_soft_pmds[cnum].val;
1833 
1834 			lval = ctx->ctx_soft_pmds[cnum].lval;
1835 		}
1836 
1837 		/*
1838 		 * execute read checker, if any
1839 		 */
1840 		if (PMD_RD_FUNC(cnum)) {
1841 			unsigned long v = val;
1842 			ret = PMD_RD_FUNC(cnum)(task, cnum, &v, regs);
1843 			val = v;
1844 		}
1845 
1846 		PFM_REG_RETFLAG_SET(reg_flags, ret);
1847 
1848 		DBprintk(("read pmd[%u] ret=%d value=0x%lx pmc=0x%lx\n",
1849 					cnum, ret, val, ia64_get_pmc(cnum)));
1850 
1851 		/*
1852 		 * update register return value, abort all if problem during copy.
1853 		 * we only modify the reg_flags field. no check mode is fine because
1854 		 * access has been verified upfront in sys_perfmonctl().
1855 		 */
1856 		if (__put_user(cnum, &req->reg_num)) return -EFAULT;
1857 		if (__put_user(val, &req->reg_value)) return -EFAULT;
1858 		if (__put_user(reg_flags, &req->reg_flags)) return -EFAULT;
1859 		if (__put_user(lval, &req->reg_last_reset_value)) return -EFAULT;
1860 	}
1861 
1862 	return 0;
1863 
1864 abort_mission:
1865 	PFM_REG_RETFLAG_SET(reg_flags, PFM_REG_RETFL_EINVAL);
1866 	/*
1867 	 * XXX: if this fails, we stick with the original failure, flag not updated!
1868 	 */
1869 	__put_user(reg_flags, &req->reg_flags);
1870 
1871 	return -EINVAL;
1872 }
1873 
1874 #ifdef PFM_PMU_USES_DBR
1875 /*
1876  * Only call this function when a process it trying to
1877  * write the debug registers (reading is always allowed)
1878  */
1879 int
pfm_use_debug_registers(struct task_struct * task)1880 pfm_use_debug_registers(struct task_struct *task)
1881 {
1882 	pfm_context_t *ctx = task->thread.pfm_context;
1883 	int ret = 0;
1884 
1885 	DBprintk(("called for [%d]\n", task->pid));
1886 
1887 	/*
1888 	 * do it only once
1889 	 */
1890 	if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0;
1891 
1892 	/*
1893 	 * Even on SMP, we do not need to use an atomic here because
1894 	 * the only way in is via ptrace() and this is possible only when the
1895 	 * process is stopped. Even in the case where the ctxsw out is not totally
1896 	 * completed by the time we come here, there is no way the 'stopped' process
1897 	 * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine.
1898 	 * So this is always safe.
1899 	 */
1900 	if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1;
1901 
1902 	LOCK_PFS();
1903 
1904 	/*
1905 	 * We cannot allow setting breakpoints when system wide monitoring
1906 	 * sessions are using the debug registers.
1907 	 */
1908 	if (pfm_sessions.pfs_sys_use_dbregs> 0)
1909 		ret = -1;
1910 	else
1911 		pfm_sessions.pfs_ptrace_use_dbregs++;
1912 
1913 	DBprintk(("ptrace_use_dbregs=%u  sys_use_dbregs=%u by [%d] ret = %d\n",
1914 		  pfm_sessions.pfs_ptrace_use_dbregs,
1915 		  pfm_sessions.pfs_sys_use_dbregs,
1916 		  task->pid, ret));
1917 
1918 	UNLOCK_PFS();
1919 
1920 	return ret;
1921 }
1922 
1923 /*
1924  * This function is called for every task that exits with the
1925  * IA64_THREAD_DBG_VALID set. This indicates a task which was
1926  * able to use the debug registers for debugging purposes via
1927  * ptrace(). Therefore we know it was not using them for
1928  * perfmormance monitoring, so we only decrement the number
1929  * of "ptraced" debug register users to keep the count up to date
1930  */
1931 int
pfm_release_debug_registers(struct task_struct * task)1932 pfm_release_debug_registers(struct task_struct *task)
1933 {
1934 	int ret;
1935 
1936 	LOCK_PFS();
1937 	if (pfm_sessions.pfs_ptrace_use_dbregs == 0) {
1938 		printk(KERN_DEBUG "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n",
1939 		       task->pid);
1940 		ret = -1;
1941 	}  else {
1942 		pfm_sessions.pfs_ptrace_use_dbregs--;
1943 		ret = 0;
1944 	}
1945 	UNLOCK_PFS();
1946 
1947 	return ret;
1948 }
1949 #else /* PFM_PMU_USES_DBR is true */
1950 /*
1951  * in case, the PMU does not use the debug registers, these two functions are nops.
1952  * The first function is called from arch/ia64/kernel/ptrace.c.
1953  * The second function is called from arch/ia64/kernel/process.c.
1954  */
1955 int
pfm_use_debug_registers(struct task_struct * task)1956 pfm_use_debug_registers(struct task_struct *task)
1957 {
1958 	return 0;
1959 }
1960 
1961 int
pfm_release_debug_registers(struct task_struct * task)1962 pfm_release_debug_registers(struct task_struct *task)
1963 {
1964 	return 0;
1965 }
1966 #endif /* PFM_PMU_USES_DBR */
1967 
1968 static int
pfm_restart(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)1969 pfm_restart(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
1970 	 struct pt_regs *regs)
1971 {
1972 	void *sem = &ctx->ctx_restart_sem;
1973 
1974 	/*
1975 	 * Cannot do anything before PMU is enabled
1976 	 */
1977 	if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
1978 
1979 	if (task == current) {
1980 		DBprintk(("restarting self %d frozen=%d ovfl_regs=0x%lx\n",
1981 			task->pid,
1982 			ctx->ctx_fl_frozen,
1983 			ctx->ctx_ovfl_regs[0]));
1984 
1985 		pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
1986 
1987 		ctx->ctx_ovfl_regs[0] = 0UL;
1988 
1989 		/*
1990 		 * We ignore block/don't block because we never block
1991 		 * for a self-monitoring process.
1992 		 */
1993 		ctx->ctx_fl_frozen = 0;
1994 
1995 		if (CTX_HAS_SMPL(ctx)) {
1996 			ctx->ctx_psb->psb_hdr->hdr_count = 0;
1997 			ctx->ctx_psb->psb_index = 0;
1998 		}
1999 
2000 		/* simply unfreeze */
2001 		pfm_unfreeze_pmu();
2002 
2003 		return 0;
2004 	}
2005 	/* restart on another task */
2006 
2007 	/*
2008 	 * if blocking, then post the semaphore.
2009 	 * if non-blocking, then we ensure that the task will go into
2010 	 * pfm_overflow_must_block() before returning to user mode.
2011 	 * We cannot explicitely reset another task, it MUST always
2012 	 * be done by the task itself. This works for system wide because
2013 	 * the tool that is controlling the session is doing "self-monitoring".
2014 	 *
2015 	 * XXX: what if the task never goes back to user?
2016 	 *
2017 	 */
2018 	if (CTX_OVFL_NOBLOCK(ctx) == 0) {
2019 		DBprintk(("unblocking %d \n", task->pid));
2020 		up(sem);
2021 	} else {
2022 		task->thread.pfm_ovfl_block_reset = 1;
2023 	}
2024 #if 0
2025 	/*
2026 	 * in case of non blocking mode, then it's just a matter of
2027 	 * of reseting the sampling buffer (if any) index. The PMU
2028 	 * is already active.
2029 	 */
2030 
2031 	/*
2032 	 * must reset the header count first
2033 	 */
2034 	if (CTX_HAS_SMPL(ctx)) {
2035 		DBprintk(("resetting sampling indexes for %d \n", task->pid));
2036 		ctx->ctx_psb->psb_hdr->hdr_count = 0;
2037 		ctx->ctx_psb->psb_index = 0;
2038 	}
2039 #endif
2040 	return 0;
2041 }
2042 
2043 static int
pfm_stop(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2044 pfm_stop(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2045 	 struct pt_regs *regs)
2046 {
2047 	/* we don't quite support this right now */
2048 	if (task != current) return -EINVAL;
2049 
2050 	/*
2051 	 * Cannot do anything before PMU is enabled
2052 	 */
2053 	if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
2054 
2055 	DBprintk(("[%d] fl_system=%d owner=%p current=%p\n",
2056 				current->pid,
2057 				ctx->ctx_fl_system, PMU_OWNER(),
2058 				current));
2059 
2060 	/* simply stop monitoring but not the PMU */
2061 	if (ctx->ctx_fl_system) {
2062 
2063 		/* disable dcr pp */
2064 		ia64_set_dcr(ia64_get_dcr() & ~IA64_DCR_PP);
2065 
2066 		/* stop monitoring */
2067 		pfm_clear_psr_pp();
2068 		ia64_srlz_i();
2069 
2070 		PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
2071 
2072 		ia64_psr(regs)->pp = 0;
2073 
2074 	} else {
2075 
2076 		/* stop monitoring */
2077 		pfm_clear_psr_up();
2078 		ia64_srlz_i();
2079 
2080 		/*
2081 		 * clear user level psr.up
2082 		 */
2083 		ia64_psr(regs)->up = 0;
2084 	}
2085 	return 0;
2086 }
2087 
2088 static int
pfm_disable(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2089 pfm_disable(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2090 	   struct pt_regs *regs)
2091 {
2092 	/* we don't quite support this right now */
2093 	if (task != current) return -EINVAL;
2094 
2095 	if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
2096 
2097 	/*
2098 	 * stop monitoring, freeze PMU, and save state in context
2099 	 * this call will clear IA64_THREAD_PM_VALID for per-task sessions.
2100 	 */
2101 	pfm_flush_regs(task);
2102 
2103 	if (ctx->ctx_fl_system) {
2104 		ia64_psr(regs)->pp = 0;
2105 	} else {
2106 		ia64_psr(regs)->up = 0;
2107 	}
2108 	/*
2109 	 * goes back to default behavior: no user level control
2110 	 * no need to change live psr.sp because useless at the kernel level
2111 	 */
2112 	ia64_psr(regs)->sp = 1;
2113 
2114 	DBprintk(("enabling psr.sp for [%d]\n", current->pid));
2115 
2116 	ctx->ctx_flags.state = PFM_CTX_DISABLED;
2117 
2118 	return 0;
2119 }
2120 
2121 static int
pfm_context_destroy(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2122 pfm_context_destroy(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2123 	 struct pt_regs *regs)
2124 {
2125 	/* we don't quite support this right now */
2126 	if (task != current) return -EINVAL;
2127 
2128 	/*
2129 	 * if context was never enabled, then there is not much
2130 	 * to do
2131 	 */
2132 	if (!CTX_IS_ENABLED(ctx)) goto skipped_stop;
2133 
2134 	/*
2135 	 * Disable context: stop monitoring, flush regs to software state (useless here),
2136 	 * and freeze PMU
2137 	 *
2138 	 * The IA64_THREAD_PM_VALID is cleared by pfm_flush_regs() called from pfm_disable()
2139 	 */
2140 	pfm_disable(task, ctx, arg, count, regs);
2141 
2142 	if (ctx->ctx_fl_system) {
2143 		ia64_psr(regs)->pp = 0;
2144 	} else {
2145 		ia64_psr(regs)->up = 0;
2146 	}
2147 
2148 skipped_stop:
2149 	/*
2150 	 * remove sampling buffer mapping, if any
2151 	 */
2152 	if (ctx->ctx_smpl_vaddr) {
2153 		pfm_remove_smpl_mapping(task);
2154 		ctx->ctx_smpl_vaddr = 0UL;
2155 	}
2156 	/* now free context and related state */
2157 	pfm_context_exit(task);
2158 
2159 	return 0;
2160 }
2161 
2162 /*
2163  * does nothing at the moment
2164  */
2165 static int
pfm_context_unprotect(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2166 pfm_context_unprotect(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2167 	 struct pt_regs *regs)
2168 {
2169 	return 0;
2170 }
2171 
2172 static int
pfm_protect_context(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2173 pfm_protect_context(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2174 	 struct pt_regs *regs)
2175 {
2176 	/*
2177 	 * from now on, only the creator of the context has access to it
2178 	 */
2179 	ctx->ctx_fl_protected = 1;
2180 
2181 	/*
2182 	 * reinforce secure monitoring: cannot toggle psr.up
2183 	 */
2184 	if (ctx->ctx_fl_unsecure == 0) ia64_psr(regs)->sp = 1;
2185 
2186 	DBprintk(("[%d] protected psr.sp=%d\n", task->pid, ia64_psr(regs)->sp));
2187 
2188 	return 0;
2189 }
2190 
2191 static int
pfm_debug(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2192 pfm_debug(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2193 	 struct pt_regs *regs)
2194 {
2195 	unsigned int mode = *(unsigned int *)arg;
2196 
2197 	pfm_sysctl.debug = mode == 0 ? 0 : 1;
2198 
2199 	printk(KERN_INFO "perfmon debugging %s\n", pfm_sysctl.debug ? "on" : "off");
2200 
2201 	return 0;
2202 }
2203 
2204 #ifdef PFM_PMU_USES_DBR
2205 
2206 typedef struct {
2207 	unsigned long ibr_mask:56;
2208 	unsigned long ibr_plm:4;
2209 	unsigned long ibr_ig:3;
2210 	unsigned long ibr_x:1;
2211 } ibr_mask_reg_t;
2212 
2213 typedef struct {
2214 	unsigned long dbr_mask:56;
2215 	unsigned long dbr_plm:4;
2216 	unsigned long dbr_ig:2;
2217 	unsigned long dbr_w:1;
2218 	unsigned long dbr_r:1;
2219 } dbr_mask_reg_t;
2220 
2221 typedef union {
2222 	unsigned long  val;
2223 	ibr_mask_reg_t ibr;
2224 	dbr_mask_reg_t dbr;
2225 } dbreg_t;
2226 
2227 
2228 static int
pfm_write_ibr_dbr(int mode,struct task_struct * task,void * arg,int count,struct pt_regs * regs)2229 pfm_write_ibr_dbr(int mode, struct task_struct *task, void *arg, int count, struct pt_regs *regs)
2230 {
2231 	struct thread_struct *thread = &task->thread;
2232 	pfm_context_t *ctx = task->thread.pfm_context;
2233 	pfarg_dbreg_t tmp, *req = (pfarg_dbreg_t *)arg;
2234 	dbreg_t dbreg;
2235 	unsigned int rnum;
2236 	int first_time;
2237 	int i, ret = 0;
2238 
2239 	/*
2240 	 * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w
2241 	 * ensuring that no real breakpoint can be installed via this call.
2242 	 */
2243 
2244 	first_time = ctx->ctx_fl_using_dbreg == 0;
2245 
2246 	/*
2247 	 * check for debug registers in system wide mode
2248 	 *
2249 	 */
2250 	LOCK_PFS();
2251 	if (ctx->ctx_fl_system && first_time) {
2252 		if (pfm_sessions.pfs_ptrace_use_dbregs)
2253 			ret = -EBUSY;
2254 		else
2255 			pfm_sessions.pfs_sys_use_dbregs++;
2256 	}
2257 	UNLOCK_PFS();
2258 
2259 	if (ret != 0) return ret;
2260 
2261 	if (ctx->ctx_fl_system) {
2262 		/* we mark ourselves as owner  of the debug registers */
2263 		ctx->ctx_fl_using_dbreg = 1;
2264 		DBprintk(("system-wide setting fl_using_dbreg for [%d]\n", task->pid));
2265 	} else if (first_time) {
2266 			ret= -EBUSY;
2267 			if ((thread->flags & IA64_THREAD_DBG_VALID) != 0) {
2268 				DBprintk(("debug registers already in use for [%d]\n", task->pid));
2269 				goto abort_mission;
2270 			}
2271 			/* we mark ourselves as owner  of the debug registers */
2272 			ctx->ctx_fl_using_dbreg = 1;
2273 
2274 			DBprintk(("setting fl_using_dbreg for [%d]\n", task->pid));
2275 			/*
2276 			 * Given debug registers cannot be used for both debugging
2277 			 * and performance monitoring at the same time, we reuse
2278 			 * the storage area to save and restore the registers on ctxsw.
2279 			 */
2280 			memset(task->thread.dbr, 0, sizeof(task->thread.dbr));
2281 			memset(task->thread.ibr, 0, sizeof(task->thread.ibr));
2282 	}
2283 
2284 	if (first_time) {
2285 		DBprintk(("[%d] clearing ibrs,dbrs\n", task->pid));
2286 		/*
2287 	 	 * clear hardware registers to make sure we don't
2288 	 	 * pick up stale state.
2289 		 *
2290 		 * for a system wide session, we do not use
2291 		 * thread.dbr, thread.ibr because this process
2292 		 * never leaves the current CPU and the state
2293 		 * is shared by all processes running on it
2294 	 	 */
2295 		for (i=0; i < pmu_conf.num_ibrs; i++) {
2296 			ia64_set_ibr(i, 0UL);
2297 		}
2298 		ia64_srlz_i();
2299 		for (i=0; i < pmu_conf.num_dbrs; i++) {
2300 			ia64_set_dbr(i, 0UL);
2301 		}
2302 		ia64_srlz_d();
2303 	}
2304 
2305 	ret = -EFAULT;
2306 
2307 	/*
2308 	 * Now install the values into the registers
2309 	 */
2310 	for (i = 0; i < count; i++, req++) {
2311 
2312 		if (__copy_from_user(&tmp, req, sizeof(tmp))) goto abort_mission;
2313 
2314 		rnum      = tmp.dbreg_num;
2315 		dbreg.val = tmp.dbreg_value;
2316 
2317 		ret = -EINVAL;
2318 
2319 		if ((mode == 0 && !IBR_IS_IMPL(rnum)) || ((mode == 1) && !DBR_IS_IMPL(rnum))) {
2320 			DBprintk(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n",
2321 				  rnum, dbreg.val, mode, i, count));
2322 
2323 			goto abort_mission;
2324 		}
2325 
2326 		/*
2327 		 * make sure we do not install enabled breakpoint
2328 		 */
2329 		if (rnum & 0x1) {
2330 			if (mode == 0)
2331 				dbreg.ibr.ibr_x = 0;
2332 			else
2333 				dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0;
2334 		}
2335 
2336 		/*
2337 		 * clear return flags and copy back to user
2338 		 *
2339 		 * XXX: fix once EAGAIN is implemented
2340 		 */
2341 		ret = -EFAULT;
2342 
2343 		PFM_REG_RETFLAG_SET(tmp.dbreg_flags, 0);
2344 
2345 		if (__copy_to_user(req, &tmp, sizeof(tmp))) goto abort_mission;
2346 
2347 		/*
2348 		 * Debug registers, just like PMC, can only be modified
2349 		 * by a kernel call. Moreover, perfmon() access to those
2350 		 * registers are centralized in this routine. The hardware
2351 		 * does not modify the value of these registers, therefore,
2352 		 * if we save them as they are written, we can avoid having
2353 		 * to save them on context switch out. This is made possible
2354 		 * by the fact that when perfmon uses debug registers, ptrace()
2355 		 * won't be able to modify them concurrently.
2356 		 */
2357 		if (mode == 0) {
2358 			CTX_USED_IBR(ctx, rnum);
2359 
2360 			ia64_set_ibr(rnum, dbreg.val);
2361 			ia64_srlz_i();
2362 
2363 			thread->ibr[rnum] = dbreg.val;
2364 
2365 			DBprintk(("write ibr%u=0x%lx used_ibrs=0x%lx\n", rnum, dbreg.val, ctx->ctx_used_ibrs[0]));
2366 		} else {
2367 			CTX_USED_DBR(ctx, rnum);
2368 
2369 			ia64_set_dbr(rnum, dbreg.val);
2370 			ia64_srlz_d();
2371 
2372 			thread->dbr[rnum] = dbreg.val;
2373 
2374 			DBprintk(("write dbr%u=0x%lx used_dbrs=0x%lx\n", rnum, dbreg.val, ctx->ctx_used_dbrs[0]));
2375 		}
2376 	}
2377 
2378 	return 0;
2379 
2380 abort_mission:
2381 	/*
2382 	 * in case it was our first attempt, we undo the global modifications
2383 	 */
2384 	if (first_time) {
2385 		LOCK_PFS();
2386 		if (ctx->ctx_fl_system) {
2387 			pfm_sessions.pfs_sys_use_dbregs--;
2388 		}
2389 		UNLOCK_PFS();
2390 		ctx->ctx_fl_using_dbreg = 0;
2391 	}
2392 	/*
2393 	 * install error return flag
2394 	 */
2395 	if (ret != -EFAULT) {
2396 		/*
2397 		 * XXX: for now we can only come here on EINVAL
2398 		 */
2399 		PFM_REG_RETFLAG_SET(tmp.dbreg_flags, PFM_REG_RETFL_EINVAL);
2400 		if (__put_user(tmp.dbreg_flags, &req->dbreg_flags)) ret = -EFAULT;
2401 	}
2402 	return ret;
2403 }
2404 
2405 static int
pfm_write_ibrs(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2406 pfm_write_ibrs(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2407 	 struct pt_regs *regs)
2408 {
2409 	/* we don't quite support this right now */
2410 	if (task != current) return -EINVAL;
2411 
2412 	if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
2413 
2414 	return pfm_write_ibr_dbr(0, task, arg, count, regs);
2415 }
2416 
2417 static int
pfm_write_dbrs(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2418 pfm_write_dbrs(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2419 	 struct pt_regs *regs)
2420 {
2421 	/* we don't quite support this right now */
2422 	if (task != current) return -EINVAL;
2423 
2424 	if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
2425 
2426 	return pfm_write_ibr_dbr(1, task, arg, count, regs);
2427 }
2428 
2429 #endif /* PFM_PMU_USES_DBR */
2430 
2431 static int
pfm_get_features(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2432 pfm_get_features(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
2433 {
2434 	pfarg_features_t tmp;
2435 
2436 	memset(&tmp, 0, sizeof(tmp));
2437 
2438 	tmp.ft_version      = PFM_VERSION;
2439 	tmp.ft_smpl_version = PFM_SMPL_VERSION;
2440 
2441 	if (__copy_to_user(arg, &tmp, sizeof(tmp))) return -EFAULT;
2442 
2443 	return 0;
2444 }
2445 
2446 static int
pfm_start(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2447 pfm_start(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2448 	  struct pt_regs *regs)
2449 {
2450 	/* we don't quite support this right now */
2451 	if (task != current) return -EINVAL;
2452 
2453 	/*
2454 	 * Cannot do anything before PMU is enabled
2455 	 */
2456 	if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
2457 
2458 	DBprintk(("[%d] fl_system=%d owner=%p current=%p\n",
2459 				current->pid,
2460 				ctx->ctx_fl_system, PMU_OWNER(),
2461 				current));
2462 
2463 	if (PMU_OWNER() != task) {
2464 		printk(KERN_DEBUG "perfmon: pfm_start task [%d] not pmu owner\n", task->pid);
2465 		return -EINVAL;
2466 	}
2467 
2468 	if (ctx->ctx_fl_system) {
2469 
2470 		PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP);
2471 
2472 		/* set user level psr.pp */
2473 		ia64_psr(regs)->pp = 1;
2474 
2475 		/* start monitoring at kernel level */
2476 		pfm_set_psr_pp();
2477 
2478 		/* enable dcr pp */
2479 		ia64_set_dcr(ia64_get_dcr()|IA64_DCR_PP);
2480 
2481 		ia64_srlz_i();
2482 
2483 	} else {
2484 		if ((task->thread.flags & IA64_THREAD_PM_VALID) == 0) {
2485 			printk(KERN_DEBUG "perfmon: pfm_start task flag not set for [%d]\n",
2486 			       task->pid);
2487 			return -EINVAL;
2488 		}
2489 		/* set user level psr.up */
2490 		ia64_psr(regs)->up = 1;
2491 
2492 		/* start monitoring at kernel level */
2493 		pfm_set_psr_up();
2494 
2495 		ia64_srlz_i();
2496 	}
2497 
2498 	return 0;
2499 }
2500 
2501 static int
pfm_enable(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2502 pfm_enable(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2503 	   struct pt_regs *regs)
2504 {
2505 	/* we don't quite support this right now */
2506 	if (task != current) {
2507 		DBprintk(("task [%d] != current [%d]\n", task->pid, current->pid));
2508 		return -EINVAL;
2509 	}
2510 
2511 #ifndef CONFIG_SMP
2512 	if (ctx->ctx_fl_system == 0 && PMU_OWNER()  && PMU_OWNER() != current)
2513 		pfm_lazy_save_regs(PMU_OWNER());
2514 #endif
2515 
2516 	/* reset all registers to stable quiet state */
2517 	pfm_reset_pmu(task);
2518 
2519 	/* make sure nothing starts */
2520 	if (ctx->ctx_fl_system) {
2521 		ia64_psr(regs)->pp = 0;
2522 		ia64_psr(regs)->up = 0; /* just to make sure! */
2523 
2524 		/* make sure monitoring is stopped */
2525 		pfm_clear_psr_pp();
2526 		ia64_srlz_i();
2527 
2528 		PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
2529 		PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE);
2530 		if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE);
2531 	} else {
2532 		/*
2533 		 * needed in case the task was a passive task during
2534 		 * a system wide session and now wants to have its own
2535 		 * session
2536 		 */
2537 		ia64_psr(regs)->pp = 0; /* just to make sure! */
2538 		ia64_psr(regs)->up = 0;
2539 
2540 		/* make sure monitoring is stopped */
2541 		pfm_clear_psr_up();
2542 		ia64_srlz_i();
2543 
2544 		DBprintk(("clearing psr.sp for [%d]\n", current->pid));
2545 
2546 		/* allow user level control  */
2547 		ia64_psr(regs)->sp = 0;
2548 
2549 		/* PMU state will be saved/restored on ctxsw */
2550 		task->thread.flags |= IA64_THREAD_PM_VALID;
2551 	}
2552 
2553 	SET_PMU_OWNER(task);
2554 
2555 	ctx->ctx_flags.state = PFM_CTX_ENABLED;
2556 	SET_LAST_CPU(ctx, smp_processor_id());
2557 	INC_ACTIVATION();
2558 	SET_ACTIVATION(ctx);
2559 
2560 	/* simply unfreeze */
2561 	pfm_unfreeze_pmu();
2562 
2563 	return 0;
2564 }
2565 
2566 static int
pfm_get_pmc_reset(struct task_struct * task,pfm_context_t * ctx,void * arg,int count,struct pt_regs * regs)2567 pfm_get_pmc_reset(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
2568 	   struct pt_regs *regs)
2569 {
2570 	pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
2571 	unsigned int cnum;
2572 	int i, ret = -EINVAL;
2573 
2574 	for (i = 0; i < count; i++, req++) {
2575 
2576 		if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
2577 
2578 		cnum = tmp.reg_num;
2579 
2580 		if (!PMC_IS_IMPL(cnum)) goto abort_mission;
2581 
2582 		tmp.reg_value = PMC_DFL_VAL(cnum);
2583 
2584 		PFM_REG_RETFLAG_SET(tmp.reg_flags, 0);
2585 
2586 		DBprintk(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, tmp.reg_value));
2587 
2588 		if (__copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT;
2589 	}
2590 	return 0;
2591 abort_mission:
2592 	PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL);
2593 	if (__copy_to_user(req, &tmp, sizeof(tmp))) ret = -EFAULT;
2594 
2595 	return ret;
2596 }
2597 
2598 /*
2599  * functions MUST be listed in the increasing order of their index (see permfon.h)
2600  */
2601 static pfm_cmd_desc_t pfm_cmd_tab[]={
2602 /* 0  */{ NULL, 0, 0, 0}, /* not used */
2603 /* 1  */{ pfm_write_pmcs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
2604 /* 2  */{ pfm_write_pmds, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
2605 /* 3  */{ pfm_read_pmds,PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
2606 /* 4  */{ pfm_stop, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
2607 /* 5  */{ pfm_start, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
2608 /* 6  */{ pfm_enable, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
2609 /* 7  */{ pfm_disable, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
2610 /* 8  */{ pfm_context_create, PFM_CMD_PID|PFM_CMD_ARG_RW, 1, sizeof(pfarg_context_t)},
2611 /* 9  */{ pfm_context_destroy, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
2612 /* 10 */{ pfm_restart, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_NOCHK, 0, 0},
2613 /* 11 */{ pfm_protect_context, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
2614 /* 12 */{ pfm_get_features, PFM_CMD_ARG_RW, 0, 0},
2615 /* 13 */{ pfm_debug, 0, 1, sizeof(unsigned int)},
2616 /* 14 */{ pfm_context_unprotect, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
2617 /* 15 */{ pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
2618 /* 16 */{ NULL, 0, 0, 0}, /* not used */
2619 /* 17 */{ NULL, 0, 0, 0}, /* not used */
2620 /* 18 */{ NULL, 0, 0, 0}, /* not used */
2621 /* 19 */{ NULL, 0, 0, 0}, /* not used */
2622 /* 20 */{ NULL, 0, 0, 0}, /* not used */
2623 /* 21 */{ NULL, 0, 0, 0}, /* not used */
2624 /* 22 */{ NULL, 0, 0, 0}, /* not used */
2625 /* 23 */{ NULL, 0, 0, 0}, /* not used */
2626 /* 24 */{ NULL, 0, 0, 0}, /* not used */
2627 /* 25 */{ NULL, 0, 0, 0}, /* not used */
2628 /* 26 */{ NULL, 0, 0, 0}, /* not used */
2629 /* 27 */{ NULL, 0, 0, 0}, /* not used */
2630 /* 28 */{ NULL, 0, 0, 0}, /* not used */
2631 /* 29 */{ NULL, 0, 0, 0}, /* not used */
2632 /* 30 */{ NULL, 0, 0, 0}, /* not used */
2633 /* 31 */{ NULL, 0, 0, 0}, /* not used */
2634 #ifdef PFM_PMU_USES_DBR
2635 /* 32 */{ pfm_write_ibrs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_dbreg_t)},
2636 /* 33 */{ pfm_write_dbrs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_dbreg_t)}
2637 #endif
2638 };
2639 #define PFM_CMD_COUNT	(sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t))
2640 
2641 static int
check_task_state(struct task_struct * task)2642 check_task_state(struct task_struct *task)
2643 {
2644 	int ret = 0;
2645 #ifdef CONFIG_SMP
2646 	/* We must wait until the state has been completely
2647 	 * saved. There can be situations where the reader arrives before
2648 	 * after the task is marked as STOPPED but before pfm_save_regs()
2649 	 * is completed.
2650 	 */
2651 	for (;;) {
2652 
2653 		task_lock(task);
2654 		DBprintk((" [%d] state=%ld\n", task->pid, task->state));
2655 		if (!task_has_cpu(task)) break;
2656 		task_unlock(task);
2657 
2658 		do {
2659 			if (task->state != TASK_ZOMBIE && task->state != TASK_STOPPED) {
2660 				DBprintk(("warning [%d] not in stable state %ld\n", task->pid, task->state));
2661 				return -EBUSY;
2662 			}
2663 			barrier();
2664 			cpu_relax();
2665 		} while (task_has_cpu(task));
2666 	}
2667 	task_unlock(task);
2668 #else
2669 	if (task->state != TASK_ZOMBIE && task->state != TASK_STOPPED) {
2670 		DBprintk(("warning [%d] not in stable state %ld\n", task->pid, task->state));
2671 		ret = -EBUSY;
2672 	}
2673 #endif
2674 	return ret;
2675 }
2676 
2677 asmlinkage long
sys_perfmonctl(pid_t pid,int cmd,void * arg,int count,long arg5,long arg6,long arg7,long arg8,long stack)2678 sys_perfmonctl (pid_t pid, int cmd, void *arg, int count, long arg5, long arg6, long arg7,
2679 		long arg8, long stack)
2680 {
2681 	struct pt_regs *regs = (struct pt_regs *)&stack;
2682 	struct task_struct *task = current;
2683 	pfm_context_t *ctx;
2684 	size_t sz;
2685 	long ret;
2686 	int narg;
2687 
2688 	/*
2689 	 * reject any call if perfmon was disabled at initialization time
2690 	 */
2691 	if (PFM_IS_DISABLED()) return -ENOSYS;
2692 
2693 	DBprintk(("cmd=%d idx=%d valid=%d narg=0x%x\n", cmd, PFM_CMD_IDX(cmd),
2694 		  PFM_CMD_IS_VALID(cmd), PFM_CMD_NARG(cmd)));
2695 
2696 	if (PFM_CMD_IS_VALID(cmd) == 0) return -EINVAL;
2697 
2698 	/* ingore arguments when command has none */
2699 	narg = PFM_CMD_NARG(cmd);
2700 	if ((narg == PFM_CMD_ARG_MANY  && count == 0) || (narg > 0 && narg != count)) return -EINVAL;
2701 
2702 	sz = PFM_CMD_ARG_SIZE(cmd);
2703 
2704 	if (PFM_CMD_READ_ARG(cmd) && !access_ok(VERIFY_READ, arg, sz*count)) return -EFAULT;
2705 
2706 	if (PFM_CMD_RW_ARG(cmd) && !access_ok(VERIFY_WRITE, arg, sz*count)) return -EFAULT;
2707 
2708 	if (PFM_CMD_USE_PID(cmd))  {
2709 		/*
2710 		 * XXX: may need to fine tune this one
2711 		 */
2712 		if (pid < 2) return -EPERM;
2713 
2714 		if (pid != current->pid) {
2715 
2716 			ret = -ESRCH;
2717 
2718 			read_lock(&tasklist_lock);
2719 
2720 			task = find_task_by_pid(pid);
2721 
2722 			if (!task) goto abort_call;
2723 
2724 			ret = -EPERM;
2725 
2726 			if (pfm_bad_permissions(task)) goto abort_call;
2727 
2728 			if (PFM_CMD_CHK(cmd)) {
2729 				ret = check_task_state(task);
2730 				if (ret != 0) {
2731 					DBprintk(("check_task_state=%ld for [%d]\n", ret, task->pid));
2732 					goto abort_call;
2733 				}
2734 			}
2735 		}
2736 	}
2737 
2738 	ctx = PFM_GET_CTX(task);
2739 
2740 	if (PFM_CMD_USE_CTX(cmd)) {
2741 		ret = -EINVAL;
2742 	       if (ctx == NULL) {
2743 			DBprintk(("no context for task %d\n", task->pid));
2744 			goto abort_call;
2745 	       }
2746 
2747 
2748 	       ret = -EPERM;
2749 	       /*
2750 		* we only grant access to the context if:
2751 		* 	- the caller is the creator of the context (ctx_owner)
2752 		*  OR   - the context is attached to the caller AND The context IS NOT
2753 		*  	  in protected mode
2754 		*/
2755 	       if (ctx->ctx_owner != current && (ctx->ctx_fl_protected || task != current)) {
2756 				DBprintk(("context protected, no access for [%d]\n", task->pid));
2757 				goto abort_call;
2758 	       }
2759 	}
2760 
2761 	ret = (*pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_func)(task, ctx, arg, count, regs);
2762 
2763 abort_call:
2764 	if (task != current) read_unlock(&tasklist_lock);
2765 
2766 	return ret;
2767 }
2768 
2769 void asmlinkage
pfm_ovfl_block_reset(u64 arg0,u64 arg1,u64 arg2,u64 arg3,u64 arg4,u64 arg5,u64 arg6,u64 arg7,long info)2770 pfm_ovfl_block_reset(u64 arg0, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5,
2771 		      u64 arg6, u64 arg7, long info)
2772 {
2773 	struct thread_struct *th = &current->thread;
2774 	pfm_context_t *ctx = current->thread.pfm_context;
2775 	int ret;
2776 
2777 	/*
2778 	 * clear the flag, to make sure we won't get here
2779 	 * again
2780 	 */
2781 	th->pfm_ovfl_block_reset = 0;
2782 
2783 	/*
2784 	 * do some sanity checks first
2785 	 */
2786 	if (!ctx) {
2787 		printk(KERN_DEBUG "perfmon: [%d] has no PFM context\n", current->pid);
2788 		return;
2789 	}
2790 
2791 	if (CTX_OVFL_NOBLOCK(ctx)) goto non_blocking;
2792 
2793 	DBprintk(("[%d] before sleeping\n", current->pid));
2794 
2795 	/*
2796 	 * may go through without blocking on SMP systems
2797 	 * if restart has been received already by the time we call down()
2798 	 */
2799 	ret = down_interruptible(&ctx->ctx_restart_sem);
2800 
2801 	DBprintk(("[%d] after sleeping ret=%d\n", current->pid, ret));
2802 
2803 	/*
2804 	 * in case of interruption of down() we don't restart anything
2805 	 */
2806 	if (ret >= 0) {
2807 
2808 non_blocking:
2809 		/* we reactivate on context switch */
2810 		ctx->ctx_fl_frozen = 0;
2811 		/*
2812 		 * the ovfl_sem is cleared by the restart task and this is safe because we always
2813 		 * use the local reference
2814 		 */
2815 
2816 		pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
2817 
2818 		ctx->ctx_ovfl_regs[0] = 0UL;
2819 
2820 		/*
2821 		 * Unlock sampling buffer and reset index atomically
2822 		 * XXX: not really needed when blocking
2823 		 */
2824 		if (CTX_HAS_SMPL(ctx)) {
2825 			ctx->ctx_psb->psb_hdr->hdr_count = 0;
2826 			ctx->ctx_psb->psb_index = 0;
2827 		}
2828 
2829 		pfm_unfreeze_pmu();
2830 
2831 		/* state restored, can go back to work (user mode) */
2832 	}
2833 }
2834 
2835 /*
2836  * This function will record an entry in the sampling if it is not full already.
2837  * Input:
2838  * 	ovfl_mask: mask of overflowed PMD. MUST NEVER be 0.
2839  * Return:
2840  * 	0 : buffer is not full (did not BECOME full: still space or was already full)
2841  * 	1 : buffer is full (recorded the last entry)
2842  */
2843 static int
pfm_record_sample(struct task_struct * task,pfm_context_t * ctx,unsigned long ovfl_mask,struct pt_regs * regs)2844 pfm_record_sample(struct task_struct *task, pfm_context_t *ctx, unsigned long ovfl_mask, struct pt_regs *regs)
2845 {
2846 	pfm_smpl_buffer_desc_t *psb = ctx->ctx_psb;
2847 	unsigned long *e, m, idx;
2848 	perfmon_smpl_entry_t *h;
2849 	int j;
2850 
2851 	idx = ia64_fetch_and_add(1, &psb->psb_index);
2852 	DBprintk_ovfl(("recording index=%ld entries=%ld\n", idx-1, psb->psb_entries));
2853 
2854 	/*
2855 	 * XXX: there is a small chance that we could run out on index before resetting
2856 	 * but index is unsigned long, so it will take some time.....
2857 	 * We use > instead of == because fetch_and_add() is off by one (see below)
2858 	 *
2859 	 * This case can happen in non-blocking mode or with multiple processes.
2860 	 * For non-blocking, we need to reload and continue.
2861 	 */
2862 	if (idx > psb->psb_entries) return 0;
2863 
2864 	/* first entry is really entry 0, not 1 caused by fetch_and_add */
2865 	idx--;
2866 
2867 	h = (perfmon_smpl_entry_t *)(((char *)psb->psb_addr) + idx*(psb->psb_entry_size));
2868 
2869 	/*
2870 	 * initialize entry header
2871 	 */
2872 	h->pid  = ctx->ctx_fl_system ? current->pid : task->pid;
2873 	h->cpu  = smp_processor_id();
2874 	h->last_reset_value = ovfl_mask ? ctx->ctx_soft_pmds[ffz(~ovfl_mask)].lval : 0UL;
2875 	h->ip   = regs ? regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3): 0x0UL;
2876 	h->regs = ovfl_mask; 			/* which registers overflowed */
2877 
2878 	/* guaranteed to monotonically increase on each cpu */
2879 	h->stamp  = pfm_get_stamp();
2880 
2881 	/* position for first pmd */
2882 	e = (unsigned long *)(h+1);
2883 
2884 	/*
2885 	 * selectively store PMDs in increasing index number
2886 	 */
2887 	m = ctx->ctx_smpl_regs[0];
2888 	for (j=0; m; m >>=1, j++) {
2889 
2890 		if ((m & 0x1) == 0) continue;
2891 
2892 		if (PMD_IS_COUNTING(j)) {
2893 			*e  =  pfm_read_soft_counter(ctx, j);
2894 		} else {
2895 			*e = ia64_get_pmd(j); /* slow */
2896 		}
2897 		DBprintk_ovfl(("e=%p pmd%d =0x%lx\n", (void *)e, j, *e));
2898 		e++;
2899 	}
2900 	pfm_stats[smp_processor_id()].pfm_recorded_samples_count++;
2901 
2902 	/*
2903 	 * make the new entry visible to user, needs to be atomic
2904 	 */
2905 	ia64_fetch_and_add(1, &psb->psb_hdr->hdr_count);
2906 
2907 	DBprintk_ovfl(("index=%ld entries=%ld hdr_count=%ld\n",
2908 				idx, psb->psb_entries, psb->psb_hdr->hdr_count));
2909 	/*
2910 	 * sampling buffer full ?
2911 	 */
2912 	if (idx == (psb->psb_entries-1)) {
2913 		DBprintk_ovfl(("sampling buffer full\n"));
2914 		/*
2915 		 * XXX: must reset buffer in blocking mode and lost notified
2916 		 */
2917 		pfm_stats[smp_processor_id()].pfm_full_smpl_buffer_count++;
2918 		return 1;
2919 	}
2920 	return 0;
2921 }
2922 
2923 /*
2924  * main overflow processing routine.
2925  * it can be called from the interrupt path or explicitely during the context switch code
2926  * Return:
2927  *	new value of pmc[0]. if 0x0 then unfreeze, else keep frozen
2928  */
2929 static unsigned long
pfm_overflow_handler(struct task_struct * task,pfm_context_t * ctx,u64 pmc0,struct pt_regs * regs)2930 pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs)
2931 {
2932 	unsigned long mask;
2933 	struct thread_struct *t;
2934 	unsigned long old_val;
2935 	unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL;
2936 	int i;
2937 	int ret = 1;
2938 	/*
2939 	 * It is never safe to access the task for which the overflow interrupt is destinated
2940 	 * using the current variable as the interrupt may occur in the middle of a context switch
2941 	 * where current does not hold the task that is running yet.
2942 	 *
2943 	 * For monitoring, however, we do need to get access to the task which caused the overflow
2944 	 * to account for overflow on the counters.
2945 	 *
2946 	 * We accomplish this by maintaining a current owner of the PMU per CPU. During context
2947 	 * switch the ownership is changed in a way such that the reflected owner is always the
2948 	 * valid one, i.e. the one that caused the interrupt.
2949 	 */
2950 
2951 	t   = &task->thread;
2952 
2953 	/*
2954 	 * XXX: debug test
2955 	 * Don't think this could happen given upfront tests
2956 	 */
2957 	if ((t->flags & IA64_THREAD_PM_VALID) == 0 && ctx->ctx_fl_system == 0) {
2958 		printk(KERN_DEBUG "perfmon: Spurious overflow interrupt: process %d not "
2959 		       "using perfmon\n", task->pid);
2960 		return 0x1;
2961 	}
2962 	/*
2963 	 * sanity test. Should never happen
2964 	 */
2965 	if ((pmc0 & 0x1) == 0) {
2966 		printk(KERN_DEBUG "perfmon: pid %d pmc0=0x%lx assumption error for freeze bit\n",
2967 		       task->pid, pmc0);
2968 		return 0x0;
2969 	}
2970 
2971 	mask = pmc0 >> PMU_FIRST_COUNTER;
2972 
2973 	DBprintk_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s"
2974 		  " mode used_pmds=0x%lx used_pmcs=0x%lx reload_pmcs=0x%lx\n",
2975 			pmc0, task->pid, (regs ? regs->cr_iip : 0),
2976 			CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking",
2977 			ctx->ctx_used_pmds[0],
2978 			ctx->ctx_used_pmcs[0],
2979 			ctx->ctx_reload_pmcs[0]));
2980 
2981 	/*
2982 	 * First we update the virtual counters
2983 	 */
2984 	for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) {
2985 
2986 		/* skip pmd which did not overflow */
2987 		if ((mask & 0x1) == 0) continue;
2988 
2989 		DBprintk_ovfl(("pmd[%d] overflowed hw_pmd=0x%lx soft_pmd=0x%lx\n",
2990 			  i, ia64_get_pmd(i), ctx->ctx_soft_pmds[i].val));
2991 
2992 		/*
2993 		 * Note that the pmd is not necessarily 0 at this point as qualified events
2994 		 * may have happened before the PMU was frozen. The residual count is not
2995 		 * taken into consideration here but will be with any read of the pmd via
2996 		 * pfm_read_pmds().
2997 		 */
2998 		old_val                    = ctx->ctx_soft_pmds[i].val;
2999 		ctx->ctx_soft_pmds[i].val += 1 + pmu_conf.ovfl_val;
3000 
3001 		/*
3002 		 * check for overflow condition
3003 		 */
3004 		if (old_val > ctx->ctx_soft_pmds[i].val) {
3005 
3006 			ovfl_pmds |= 1UL << i;
3007 
3008 			if (PMC_OVFL_NOTIFY(ctx, i)) {
3009 				ovfl_notify |= 1UL << i;
3010 			}
3011 		} else {
3012 			/*
3013 			 * clear top bits (maintain counts in lower part, may not always be zero)
3014 			 */
3015 			ia64_set_pmd(i, ia64_get_pmd(i) & pmu_conf.ovfl_val);
3016 		}
3017 		DBprintk_ovfl(("soft_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n",
3018 			  i, ctx->ctx_soft_pmds[i].val, old_val,
3019 			  ia64_get_pmd(i) & pmu_conf.ovfl_val, ovfl_pmds, ovfl_notify));
3020 	}
3021 
3022 	/*
3023 	 * check for sampling buffer
3024 	 *
3025 	 * if present, record sample only when a 64-bit counter has overflowed.
3026 	 * We propagate notification ONLY when buffer becomes full.
3027 	 */
3028 	if(CTX_HAS_SMPL(ctx) && ovfl_pmds) {
3029 		ret = pfm_record_sample(task, ctx, ovfl_pmds, regs);
3030 		if (ret == 1) {
3031 			/*
3032 			 * Sampling buffer became full
3033 			 * If no notication was requested, then we reset buffer index
3034 			 * and reset registers (done below) and resume.
3035 			 * If notification requested, then defer reset until pfm_restart()
3036 			 */
3037 			if (ovfl_notify == 0UL) {
3038 				ctx->ctx_psb->psb_hdr->hdr_count = 0UL;
3039 				ctx->ctx_psb->psb_index		 = 0UL;
3040 			}
3041 		} else {
3042 			/*
3043 			 * sample recorded in buffer, no need to notify user
3044 			 */
3045 			ovfl_notify = 0UL;
3046 		}
3047 	}
3048 
3049 	/*
3050 	 * No overflow requiring a user level notification
3051 	 */
3052 	if (ovfl_notify == 0UL) {
3053 		if (ovfl_pmds)
3054 			pfm_reset_regs(ctx, &ovfl_pmds, PFM_PMD_SHORT_RESET);
3055 		return 0x0;
3056 	}
3057 
3058 	/*
3059 	 * keep track of what to reset when unblocking
3060 	 */
3061 	ctx->ctx_ovfl_regs[0]  = ovfl_pmds;
3062 
3063 	/*
3064 	 * As a consequence of the overflow, we always resume
3065 	 * with monitoring turned off. pfm_restart() will
3066 	 * reactivate.
3067 	 */
3068 	ctx->ctx_fl_frozen = 1;
3069 
3070 	/*
3071 	 * we have come to this point because there was an overflow and that notification
3072 	 * was requested. The notify_task may have disappeared, in which case notify_task
3073 	 * is NULL.
3074 	 */
3075 	LOCK_CTX(ctx);
3076 
3077 	if (ctx->ctx_notify_task) {
3078 		if (CTX_OVFL_NOBLOCK(ctx) == 0 && ctx->ctx_notify_task != task) {
3079 			t->pfm_ovfl_block_reset = 1; /* will cause blocking */
3080 		} else {
3081 			t->pfm_ovfl_block_reset = 0;
3082 		}
3083 
3084 		DBprintk_ovfl(("[%d] scheduling tasklet\n", current->pid));
3085 
3086 		/*
3087 		 * the tasklet is responsible for sending the notification
3088 		 * not the PMU owner nor the current task.
3089 		 */
3090 		tasklet_schedule(&ctx->ctx_tasklet);
3091 
3092 	} else {
3093 		DBprintk_ovfl(("notification task has disappeared !\n"));
3094 		t->pfm_ovfl_block_reset = 0;
3095 	}
3096 
3097 	UNLOCK_CTX(ctx);
3098 
3099 	DBprintk_ovfl(("return pmc0=0x%x must_block=%ld\n",
3100 				ctx->ctx_fl_frozen ? 0x1 : 0x0, t->pfm_ovfl_block_reset));
3101 
3102 	return ctx->ctx_fl_frozen ? 0x1 : 0x0;
3103 }
3104 
3105 static void
pfm_interrupt_handler(int irq,void * arg,struct pt_regs * regs)3106 pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
3107 {
3108 	u64 pmc0;
3109 	struct task_struct *task;
3110 	pfm_context_t *ctx;
3111 
3112 	pfm_stats[smp_processor_id()].pfm_ovfl_intr_count++;
3113 
3114 	/*
3115 	 * if an alternate handler is registered, just bypass the default one
3116 	 */
3117 	if (pfm_alternate_intr_handler) {
3118 		(*pfm_alternate_intr_handler->handler)(irq, arg, regs);
3119 		return;
3120 	}
3121 
3122 	/*
3123 	 * srlz.d done before arriving here
3124 	 *
3125 	 * This is slow
3126 	 */
3127 	pmc0 = ia64_get_pmc(0);
3128 	task = PMU_OWNER();
3129 	/*
3130 	 * if we have some pending bits set
3131 	 * assumes : if any PM[0].bit[63-1] is set, then PMC[0].fr = 1
3132 	 */
3133 	if (PMC0_HAS_OVFL(pmc0) && task) {
3134 		/*
3135 		 * we assume that pmc0.fr is always set here
3136 		 */
3137 		ctx = PFM_GET_CTX(task);
3138 
3139 		/* sanity check */
3140 		if (!ctx) {
3141 			printk(KERN_DEBUG "perfmon: Spurious overflow interrupt: process %d has "
3142 			       "no PFM context\n", task->pid);
3143 			return;
3144 		}
3145 		/*
3146 		 * assume PMC[0].fr = 1 at this point
3147 		 */
3148 		pmc0 = pfm_overflow_handler(task, ctx, pmc0, regs);
3149 
3150 		/*
3151 		 * we can only update pmc0 when the overflow
3152 		 * is for the current context or we are in system
3153 		 * wide mode. In UP (per-task) the current
3154 		 * task may not be the one owning the PMU,
3155 		 * same thing for system-wide.
3156 		 */
3157 		if (task == current || ctx->ctx_fl_system) {
3158 			/*
3159 		 	 * We always clear the overflow status bits and either unfreeze
3160 		 	 * or keep the PMU frozen.
3161 		 	 */
3162 			ia64_set_pmc(0, pmc0);
3163 			ia64_srlz_d();
3164 		} else {
3165 			task->thread.pmc[0] = pmc0;
3166 		}
3167 	} else {
3168 		pfm_stats[smp_processor_id()].pfm_spurious_ovfl_intr_count++;
3169 	}
3170 }
3171 
3172 #define PFM_PROC_SHOW_HEADER	((void *)NR_CPUS+1)
3173 
3174 static void *
pfm_proc_start(struct seq_file * m,loff_t * pos)3175 pfm_proc_start(struct seq_file *m, loff_t *pos)
3176 {
3177 	if (*pos == 0) {
3178 		return PFM_PROC_SHOW_HEADER;
3179 	}
3180 
3181 	while (*pos <= NR_CPUS) {
3182 		if (cpu_online(*pos - 1)) {
3183 			return (void *)*pos;
3184 		}
3185 		++*pos;
3186 	}
3187 	return NULL;
3188 }
3189 
3190 static void *
pfm_proc_next(struct seq_file * m,void * v,loff_t * pos)3191 pfm_proc_next(struct seq_file *m, void *v, loff_t *pos)
3192 {
3193 	++*pos;
3194 	return pfm_proc_start(m, pos);
3195 }
3196 
3197 static void
pfm_proc_stop(struct seq_file * m,void * v)3198 pfm_proc_stop(struct seq_file *m, void *v)
3199 {
3200 }
3201 
3202 static void
pfm_proc_show_header(struct seq_file * m)3203 pfm_proc_show_header(struct seq_file *m)
3204 {
3205  	seq_printf(m,
3206 		"perfmon version           : %u.%u\n"
3207 		"fastctxsw                 : %s\n"
3208 		"ovfl_mask                 : 0x%lx\n",
3209 		PFM_VERSION_MAJ, PFM_VERSION_MIN,
3210 		pfm_sysctl.fastctxsw > 0 ? "Yes": "No",
3211 		pmu_conf.ovfl_val);
3212 
3213   	LOCK_PFS();
3214 
3215  	seq_printf(m,
3216  		"proc_sessions             : %u\n"
3217  		"sys_sessions              : %u\n"
3218  		"sys_use_dbregs            : %u\n"
3219  		"ptrace_use_dbregs         : %u\n",
3220  		pfm_sessions.pfs_task_sessions,
3221  		pfm_sessions.pfs_sys_sessions,
3222  		pfm_sessions.pfs_sys_use_dbregs,
3223  		pfm_sessions.pfs_ptrace_use_dbregs);
3224 
3225   	UNLOCK_PFS();
3226 }
3227 
3228 static int
pfm_proc_show(struct seq_file * m,void * v)3229 pfm_proc_show(struct seq_file *m, void *v)
3230 {
3231 	int cpu;
3232 
3233 	if (v == PFM_PROC_SHOW_HEADER) {
3234 		pfm_proc_show_header(m);
3235 		return 0;
3236 	}
3237 
3238 	/* show info for CPU (v - 1) */
3239 
3240 	cpu = (long)v - 1;
3241 	seq_printf(m,
3242 		"CPU%-2d overflow intrs      : %lu\n"
3243 		"CPU%-2d spurious intrs      : %lu\n"
3244 		"CPU%-2d recorded samples    : %lu\n"
3245 		"CPU%-2d smpl buffer full    : %lu\n"
3246 		"CPU%-2d syst_wide           : %d\n"
3247 		"CPU%-2d dcr_pp              : %d\n"
3248 		"CPU%-2d exclude idle        : %d\n"
3249 		"CPU%-2d owner               : %d\n"
3250 		"CPU%-2d activations         : %lu\n",
3251 		cpu, pfm_stats[cpu].pfm_ovfl_intr_count,
3252 		cpu, pfm_stats[cpu].pfm_spurious_ovfl_intr_count,
3253 		cpu, pfm_stats[cpu].pfm_recorded_samples_count,
3254 		cpu, pfm_stats[cpu].pfm_full_smpl_buffer_count,
3255 		cpu, cpu_data(cpu)->pfm_syst_info & PFM_CPUINFO_SYST_WIDE ? 1 : 0,
3256 		cpu, cpu_data(cpu)->pfm_syst_info & PFM_CPUINFO_DCR_PP ? 1 : 0,
3257 		cpu, cpu_data(cpu)->pfm_syst_info & PFM_CPUINFO_EXCL_IDLE ? 1 : 0,
3258 		cpu, pmu_owners[cpu].owner ? pmu_owners[cpu].owner->pid: -1,
3259 		cpu, pmu_owners[cpu].activation_number);
3260 
3261 	return 0;
3262 }
3263 
3264 struct seq_operations pfm_seq_ops = {
3265 	.start =	pfm_proc_start,
3266  	.next =		pfm_proc_next,
3267  	.stop =		pfm_proc_stop,
3268  	.show =		pfm_proc_show
3269 };
3270 
3271 static int
pfm_proc_open(struct inode * inode,struct file * file)3272 pfm_proc_open(struct inode *inode, struct file *file)
3273 {
3274 	return seq_open(file, &pfm_seq_ops);
3275 }
3276 
3277 /*
3278  * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens
3279  * during pfm_enable() hence before pfm_start(). We cannot assume monitoring
3280  * is active or inactive based on mode. We must rely on the value in
3281  * local_cpu_data->pfm_syst_info
3282  */
3283 void
pfm_syst_wide_update_task(struct task_struct * task,unsigned long info,int is_ctxswin)3284 pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin)
3285 {
3286 	struct pt_regs *regs;
3287 	unsigned long dcr;
3288 	unsigned long dcr_pp;
3289 
3290 	dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0;
3291 
3292 	/*
3293 	 * pid 0 is guaranteed to be the idle task. There is one such task with pid 0
3294 	 * on every CPU, so we can rely on the pid to identify the idle task.
3295 	 */
3296 	if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) {
3297 		regs = (struct pt_regs *)((unsigned long) task + IA64_STK_OFFSET);
3298 		regs--;
3299 		ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0;
3300 		return;
3301 	}
3302 	/*
3303 	 * we are the idle task  and there is exclusion.
3304 	 *
3305 	 * if monitoring has started
3306 	 */
3307 	if (dcr_pp) {
3308 		dcr = ia64_get_dcr();
3309 		/*
3310 		 * context switching in?
3311 		 */
3312 		if (is_ctxswin) {
3313 			/* mask monitoring for the idle task */
3314 			ia64_set_dcr(dcr & ~IA64_DCR_PP);
3315 			pfm_clear_psr_pp();
3316 			ia64_srlz_i();
3317 			return;
3318 		}
3319 		/*
3320 		 * context switching out
3321 		 * restore normal kernel level settings
3322 		 *
3323 		 * Due to inlining this odd if-then-else construction generates
3324 		 * better code.
3325 	         */
3326 		ia64_set_dcr(dcr |IA64_DCR_PP);
3327 		pfm_set_psr_pp();
3328 		ia64_srlz_i();
3329 	}
3330 }
3331 
3332 #ifdef CONFIG_SMP
3333 void
pfm_save_regs(struct task_struct * task)3334 pfm_save_regs(struct task_struct *task)
3335 {
3336 	pfm_context_t *ctx;
3337 	struct thread_struct *t;
3338 	u64 psr;
3339 
3340 	ctx = PFM_GET_CTX(task);
3341 	if (ctx == NULL) goto save_error;
3342 	t = &task->thread;
3343 
3344 	/*
3345 	 * sanity check
3346 	 */
3347 	if (ctx->ctx_last_activation != GET_ACTIVATION()) {
3348 		DBprintk(("ctx_activation=%lu activation=%lu: no save\n",
3349 			ctx->ctx_last_activation, GET_ACTIVATION()));
3350 		return;
3351 	}
3352 
3353 	/*
3354 	 * save current PSR: needed because we modify it
3355 	 */
3356 	psr = pfm_get_psr();
3357 
3358 	/*
3359 	 * stop monitoring:
3360 	 * This is the last instruction which may generate an overflow
3361 	 *
3362 	 * We do not need to set psr.sp because, it is irrelevant in kernel.
3363 	 * It will be restored from ipsr when going back to user level
3364 	 */
3365 	pfm_clear_psr_up();
3366 
3367 	/*
3368 	 * keep a copy of the saved psr (for reload)
3369 	 */
3370 	ctx->ctx_saved_psr = psr;
3371 
3372 	/*
3373 	 * release ownership of this PMU.
3374 	 */
3375 	SET_PMU_OWNER(NULL);
3376 
3377 	/*
3378 	 * we systematically save the PMD as we have no
3379 	 * guarantee we will be schedule at that same
3380 	 * CPU again.
3381 	 */
3382 	pfm_save_pmds(t->pmd, ctx->ctx_used_pmds[0]);
3383 
3384 	/*
3385 	 * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
3386 	 * we will need it on the restore path to check
3387 	 * for pending overflow.
3388 	 */
3389 	t->pmc[0] = ia64_get_pmc(0);
3390 
3391 	return;
3392 
3393 save_error:
3394 	printk(KERN_ERR "perfmon: pfm_save_regs CPU%d [%d] NULL context PM_VALID=%ld\n",
3395 		smp_processor_id(), task->pid,
3396 		task->thread.flags & IA64_THREAD_PM_VALID);
3397 }
3398 
3399 #else /* !CONFIG_SMP */
3400 
3401 void
pfm_save_regs(struct task_struct * task)3402 pfm_save_regs(struct task_struct *task)
3403 {
3404 	pfm_context_t *ctx;
3405 	u64 psr;
3406 
3407 	ctx = PFM_GET_CTX(task);
3408 	if (ctx == NULL) goto save_error;
3409 	/*
3410 	 * save current PSR: needed because we modify it
3411 	 */
3412 	psr = pfm_get_psr();
3413 
3414 	/*
3415 	 * stop monitoring:
3416 	 * This is the last instruction which may generate an overflow
3417 	 *
3418 	 * We do not need to set psr.sp because, it is irrelevant in kernel.
3419 	 * It will be restored from ipsr when going back to user level
3420 	 */
3421 	pfm_clear_psr_up();
3422 
3423 	/*
3424 	 * keep a copy of the saved psr (for reload)
3425 	 */
3426 	ctx->ctx_saved_psr = psr;
3427 
3428 	return;
3429 save_error:
3430 	printk(KERN_ERR "perfmon: pfm_save_regs CPU%d [%d] NULL context PM_VALID=%ld\n",
3431 		smp_processor_id(), task->pid,
3432 		task->thread.flags & IA64_THREAD_PM_VALID);
3433 }
3434 
3435 static unsigned long
pfm_lazy_save_regs(struct task_struct * task)3436 pfm_lazy_save_regs (struct task_struct *task)
3437 {
3438 	pfm_context_t *ctx;
3439 	struct thread_struct *t;
3440 
3441 	ctx = PFM_GET_CTX(task);
3442 	t   = &task->thread;
3443 
3444 	DBprintk(("on [%d] used_pmds=0x%lx\n", task->pid, ctx->ctx_used_pmds[0]));
3445 
3446 	/*
3447 	 * release ownership of this PMU.
3448 	 * must be done before we save the registers.
3449 	 *
3450 	 * after this call any PMU interrupt is treated
3451 	 * as spurious.
3452 	 */
3453 	SET_PMU_OWNER(NULL);
3454 
3455 	/*
3456 	 * save all the pmds we use
3457 	 */
3458 	pfm_save_pmds(t->pmd, ctx->ctx_used_pmds[0]);
3459 
3460 	/*
3461 	 * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
3462 	 * it is needed to check for pended overflow
3463 	 * on the restore path
3464 	 */
3465 	t->pmc[0] = ia64_get_pmc(0);
3466 
3467 	return t->pmc[0];
3468 }
3469 #endif /* CONFIG_SMP */
3470 
3471 #ifdef CONFIG_SMP
3472 void
pfm_load_regs(struct task_struct * task)3473 pfm_load_regs (struct task_struct *task)
3474 {
3475 	pfm_context_t *ctx;
3476 	struct thread_struct *t;
3477 	struct task_struct *owner;
3478 	unsigned long pmc_mask = 0UL, pmd_mask = 0UL;
3479 	u64 psr;
3480 
3481 	ctx = PFM_GET_CTX(task);
3482 	if (unlikely(ctx == NULL)) {
3483 		printk(KERN_ERR "perfmon: pfm_load_regs() null context\n");
3484 		return;
3485 	}
3486 
3487 	owner = PMU_OWNER();
3488 	t     = &task->thread;
3489 
3490 	/*
3491 	 * possible on unload
3492 	 */
3493 	if ((t->flags & IA64_THREAD_PM_VALID) == 0) {
3494 		DBprintk(("[%d] PM_VALID=0, nothing to do\n", task->pid));
3495 		return;
3496 	}
3497 
3498 	/*
3499 	 * we restore ALL the debug registers to avoid picking up
3500 	 * stale state.
3501 	 *
3502 	 * This must be done even when the task is still the owner
3503 	 * as the registers may have been modified via ptrace()
3504 	 * (not perfmon) by the previous task.
3505 	 */
3506 	if (ctx->ctx_fl_using_dbreg) {
3507 		pfm_restore_ibrs(t->ibr, pmu_conf.num_ibrs);
3508 		pfm_restore_dbrs(t->dbr, pmu_conf.num_dbrs);
3509 	}
3510 
3511 	/*
3512 	 * retrieve saved psr
3513 	 */
3514 	psr = ctx->ctx_saved_psr;
3515 
3516 	/*
3517 	 * if we were the last user of the PMU on that CPU,
3518 	 * then nothing to do except restore psr
3519 	 */
3520 	if (GET_LAST_CPU(ctx) == smp_processor_id() && ctx->ctx_last_activation == GET_ACTIVATION()) {
3521 		/*
3522 		 * retrieve partial reload masks (due to user modifications)
3523 		 */
3524 		pmc_mask = 0UL;
3525 		pmd_mask = 0UL;
3526 
3527 		if (pmc_mask || pmd_mask) DBprintk(("partial reload [%d] pmd_mask=0x%lx pmc_mask=0x%lx\n", task->pid, pmd_mask, pmc_mask));
3528 	} else {
3529 		/*
3530 	 	 * To avoid leaking information to the user level when psr.sp=0,
3531 	 	 * we must reload ALL implemented pmds (even the ones we don't use).
3532 	 	 * In the kernel we only allow PFM_READ_PMDS on registers which
3533 	 	 * we initialized or requested (sampling) so there is no risk there.
3534 	 	 */
3535 		pmd_mask = pfm_sysctl.fastctxsw ?  ctx->ctx_used_pmds[0] : ctx->ctx_reload_pmds[0];
3536 
3537 		/*
3538 	 	 * ALL accessible PMCs are systematically reloaded, unused registers
3539 	 	 * get their default (from pfm_reset_pmu_state()) values to avoid picking
3540 	 	 * up stale configuration.
3541 	 	 *
3542 	 	 * PMC0 is never in the mask. It is always restored separately.
3543 	 	 */
3544 		pmc_mask = ctx->ctx_reload_pmcs[0];
3545 
3546 		DBprintk(("full reload for [%d] owner=%d activation=%lu last_activation=%lu last_cpu=%d pmd_mask=0x%lx pmc_mask=0x%lx\n",
3547 			task->pid, owner ? owner->pid : -1,
3548 			GET_ACTIVATION(), ctx->ctx_last_activation,
3549 			GET_LAST_CPU(ctx), pmd_mask, pmc_mask));
3550 
3551 	}
3552 
3553 	if (pmd_mask) pfm_restore_pmds(t->pmd, pmd_mask);
3554 	if (pmc_mask) pfm_restore_pmcs(t->pmc, pmc_mask);
3555 
3556 	/*
3557 	 * check for pending overflow at the time the state
3558 	 * was saved.
3559 	 */
3560 	if (PMC0_HAS_OVFL(t->pmc[0])) {
3561 		struct pt_regs *regs = TASK_PTREGS(task);
3562 		pfm_overflow_handler(task, ctx, t->pmc[0], regs);
3563 	}
3564 
3565 	/*
3566 	 * fl_frozen==1 when we are in blocking mode waiting for restart
3567 	 */
3568 	if (ctx->ctx_fl_frozen == 0) {
3569 		pfm_unfreeze_pmu();
3570 	}
3571 
3572 	SET_LAST_CPU(ctx, smp_processor_id());
3573 
3574 	/*
3575 	 * dump activation value for this PMU
3576 	 */
3577 	INC_ACTIVATION();
3578 	/*
3579 	 * record current activation for this context
3580 	 */
3581 	SET_ACTIVATION(ctx);
3582 
3583 	/*
3584 	 * establish new ownership. Interrupts
3585 	 * are still masked at this point.
3586 	 */
3587 	SET_PMU_OWNER(task);
3588 
3589 	/*
3590 	 * restore the psr we changed
3591 	 */
3592 	pfm_set_psr_l(psr);
3593 
3594 }
3595 #else /*  !CONFIG_SMP */
3596 /*
3597  * reload PMU state for UP kernels
3598  */
3599 void
pfm_load_regs(struct task_struct * task)3600 pfm_load_regs (struct task_struct *task)
3601 {
3602 	struct thread_struct *t;
3603 	pfm_context_t *ctx;
3604 	struct task_struct *owner;
3605 	unsigned long pmd_mask, pmc_mask;
3606 	unsigned long prev_pmc0 = ~0UL;
3607 	u64 psr;
3608 
3609 	owner      = PMU_OWNER();
3610 	ctx        = PFM_GET_CTX(task);
3611 	t          = &task->thread;
3612 
3613 	/*
3614 	 * we restore ALL the debug registers to avoid picking up
3615 	 * stale state.
3616 	 *
3617 	 * This must be done even when the task is still the owner
3618 	 * as the registers may have been modified via ptrace()
3619 	 * (not perfmon) by the previous task.
3620 	 */
3621 	if (ctx->ctx_fl_using_dbreg) {
3622 		pfm_restore_ibrs(t->ibr, pmu_conf.num_ibrs);
3623 		pfm_restore_dbrs(t->dbr, pmu_conf.num_dbrs);
3624 	}
3625 
3626 	/*
3627 	 * retrieved save psr
3628 	 */
3629 	psr = ctx->ctx_saved_psr;
3630 
3631 	/*
3632 	 * short path, our state is still there, just
3633 	 * need to restore psr and we go
3634 	 *
3635 	 * we do not touch either PMC nor PMD. the psr is not touched
3636 	 * by the overflow_handler. So we are safe w.r.t. to interrupt
3637 	 * concurrency even without interrupt masking.
3638 	 */
3639 	if (owner == task) {
3640 		pfm_set_psr_l(psr);
3641 		return;
3642 	}
3643 
3644 	DBprintk(("reload for [%d] owner=%d\n", task->pid, owner ? owner->pid : -1));
3645 
3646 	/*
3647 	 * someone else is still using the PMU, first push it out and
3648 	 * then we'll be able to install our stuff !
3649 	 *
3650 	 * Upon return, there will be no owner for the current PMU
3651 	 */
3652 	if (owner) prev_pmc0 = pfm_lazy_save_regs(owner);
3653 	/*
3654 	 * To avoid leaking information to the user level when psr.sp=0,
3655 	 * we must reload ALL implemented pmds (even the ones we don't use).
3656 	 * In the kernel we only allow PFM_READ_PMDS on registers which
3657 	 * we initialized or requested (sampling) so there is no risk there.
3658 	 */
3659 	pmd_mask = pfm_sysctl.fastctxsw ?  ctx->ctx_used_pmds[0] : ctx->ctx_reload_pmds[0];
3660 
3661 	/*
3662 	 * ALL accessible PMCs are systematically reloaded, unused registers
3663 	 * get their default (from pfm_reset_pmu_state()) values to avoid picking
3664 	 * up stale configuration.
3665 	 *
3666 	 * PMC0 is never in the mask. It is always restored separately.
3667 	 */
3668 	pmc_mask = ctx->ctx_reload_pmcs[0];
3669 
3670 	pfm_restore_pmds(t->pmd, pmd_mask);
3671 	pfm_restore_pmcs(t->pmc, pmc_mask);
3672 
3673 	/*
3674 	 * Check for pending overflow when state was last saved.
3675 	 * invoked handler is overflow status bits set.
3676 	 *
3677 	 * Any PMU overflow in flight at this point, will still
3678 	 * be treated as spurious because we have no declared
3679 	 * owner. Note that the first level interrupt handler
3680 	 * DOES NOT TOUCH any PMC except PMC0 for which we have
3681 	 * a copy already.
3682 	 */
3683 	if (PMC0_HAS_OVFL(t->pmc[0])) {
3684 		struct pt_regs *regs = TASK_PTREGS(task);
3685 		pfm_overflow_handler(task, ctx, t->pmc[0], regs);
3686 	}
3687 
3688 
3689 
3690 	/*
3691 	 * fl_frozen==1 when we are in blocking mode waiting for restart
3692 	 */
3693 	if (ctx->ctx_fl_frozen == 0) {
3694 		pfm_unfreeze_pmu();
3695 	} else if (prev_pmc0 == 0UL && ctx->ctx_fl_frozen) {
3696 		/*
3697 		 * owner is still NULL at this point.
3698 		 *
3699 		 * if the previous owner (from lazy_save_regs())
3700 		 * was not in frozen state, then we need to freeze
3701 		 * the PMU if the new context is frozen.
3702 		 *
3703 		 * on McKinley this will generate a spurious interrupt
3704 		 * but we have no other way.
3705 		 */
3706 		pfm_freeze_pmu();
3707 	}
3708 
3709 	/*
3710 	 * establish new ownership. If there was an in-flight
3711 	 * overflow interrupt, it will be treated as spurious
3712 	 * before and after the call, because no overflow
3713 	 * status bit can possibly be set. No new overflow
3714 	 * can be generated because, at this point, psr.up
3715 	 * is still cleared.
3716 	 */
3717 	SET_PMU_OWNER(task);
3718 
3719 	/*
3720 	 * restore the psr. This is the point at which
3721 	 * new overflow interrupts can be generated again.
3722 	 */
3723 	pfm_set_psr_l(psr);
3724 }
3725 #endif /* CONFIG_SMP */
3726 
3727 /*
3728  * XXX: make this routine able to work with non current context
3729  */
3730 static void
pfm_reset_pmu(struct task_struct * task)3731 pfm_reset_pmu(struct task_struct *task)
3732 {
3733 	struct thread_struct *t = &task->thread;
3734 	pfm_context_t *ctx = t->pfm_context;
3735 	int i;
3736 
3737 	if (task != current) {
3738 		printk("perfmon: invalid task in pfm_reset_pmu()\n");
3739 		return;
3740 	}
3741 
3742 	/* Let's make sure the PMU is frozen */
3743 	pfm_freeze_pmu();
3744 
3745 	/*
3746 	 * install reset values for PMC. We skip PMC0 (done above)
3747 	 * XX: good up to 64 PMCS
3748 	 */
3749 	for (i=1; (pmu_conf.pmc_desc[i].type & PFM_REG_END) == 0; i++) {
3750 		if ((pmu_conf.pmc_desc[i].type & PFM_REG_IMPL) == 0) continue;
3751 		ia64_set_pmc(i, PMC_DFL_VAL(i));
3752 		/*
3753 		 * When restoring context, we must restore ALL pmcs, even the ones
3754 		 * that the task does not use to avoid leaks and possibly corruption
3755 		 * of the sesion because of configuration conflicts. So here, we
3756 		 * initialize the entire set used in the context switch restore routine.
3757 	 	 */
3758 		t->pmc[i] = PMC_DFL_VAL(i);
3759 		DBprintk(("pmc[%d]=0x%lx\n", i, t->pmc[i]));
3760 	}
3761 
3762 	/*
3763 	 * clear reset values for PMD.
3764 	 * XXX: good up to 64 PMDS.
3765 	 */
3766 	for (i=0; (pmu_conf.pmd_desc[i].type & PFM_REG_END) == 0; i++) {
3767 		if ((pmu_conf.pmd_desc[i].type & PFM_REG_IMPL) == 0) continue;
3768 		ia64_set_pmd(i, 0UL);
3769 		t->pmd[i] = 0UL;
3770 	}
3771 
3772 	/*
3773 	 * On context switched restore, we must restore ALL pmc and ALL pmd even
3774 	 * when they are not actively used by the task. In UP, the incoming process
3775 	 * may otherwise pick up left over PMC, PMD state from the previous process.
3776 	 * As opposed to PMD, stale PMC can cause harm to the incoming
3777 	 * process because they may change what is being measured.
3778 	 * Therefore, we must systematically reinstall the entire
3779 	 * PMC state. In SMP, the same thing is possible on the
3780 	 * same CPU but also on between 2 CPUs.
3781 	 *
3782 	 * The problem with PMD is information leaking especially
3783 	 * to user level when psr.sp=0
3784 	 *
3785 	 * There is unfortunately no easy way to avoid this problem
3786 	 * on either UP or SMP. This definitively slows down the
3787 	 * pfm_load_regs() function.
3788 	 */
3789 
3790 	 /*
3791 	  * We must include all the PMC in this mask to make sure we don't
3792 	  * see any side effect of a stale state, such as opcode matching
3793 	  * or range restrictions, for instance.
3794 	  *
3795 	  * We never directly restore PMC0 so we do not include it in the mask.
3796 	  */
3797 	ctx->ctx_reload_pmcs[0] = pmu_conf.impl_pmcs[0] & ~0x1;
3798 	/*
3799 	 * We must include all the PMD in this mask to avoid picking
3800 	 * up stale value and leak information, especially directly
3801 	 * at the user level when psr.sp=0
3802 	 */
3803 	ctx->ctx_reload_pmds[0] = pmu_conf.impl_pmds[0];
3804 
3805 	/*
3806 	 * Keep track of the pmds we want to sample
3807 	 * XXX: may be we don't need to save/restore the DEAR/IEAR pmds
3808 	 * but we do need the BTB for sure. This is because of a hardware
3809 	 * buffer of 1 only for non-BTB pmds.
3810 	 *
3811 	 * We ignore the unimplemented pmds specified by the user
3812 	 */
3813 	ctx->ctx_used_pmds[0] = ctx->ctx_smpl_regs[0];
3814 	ctx->ctx_used_pmcs[0] = 1; /* always save/restore PMC[0] */
3815 
3816 	/*
3817 	 * useful in case of re-enable after disable
3818 	 */
3819 	ctx->ctx_used_ibrs[0] = 0UL;
3820 	ctx->ctx_used_dbrs[0] = 0UL;
3821 
3822 	ia64_srlz_d();
3823 }
3824 
3825 /*
3826  * This function is called when a thread exits (from exit_thread()).
3827  * This is a simplified pfm_save_regs() that simply flushes the current
3828  * register state into the save area taking into account any pending
3829  * overflow. This time no notification is sent because the task is dying
3830  * anyway. The inline processing of overflows avoids loosing some counts.
3831  * The PMU is frozen on exit from this call and is to never be reenabled
3832  * again for this task.
3833  *
3834  */
3835 void
pfm_flush_regs(struct task_struct * task)3836 pfm_flush_regs (struct task_struct *task)
3837 {
3838 	pfm_context_t *ctx;
3839 	u64 pmc0;
3840 	unsigned long mask2, val;
3841 	int i;
3842 
3843 	ctx = task->thread.pfm_context;
3844 
3845 	if (ctx == NULL) return;
3846 
3847 	/*
3848 	 * that's it if context already disabled
3849 	 */
3850 	if (ctx->ctx_flags.state == PFM_CTX_DISABLED) return;
3851 
3852 	/*
3853 	 * stop monitoring:
3854 	 * This is the only way to stop monitoring without destroying overflow
3855 	 * information in PMC[0].
3856 	 * This is the last instruction which can cause overflow when monitoring
3857 	 * in kernel.
3858 	 * By now, we could still have an overflow interrupt in-flight.
3859 	 */
3860 	if (ctx->ctx_fl_system) {
3861 
3862 		/* disable dcr pp */
3863 		ia64_set_dcr(ia64_get_dcr() & ~IA64_DCR_PP);
3864 
3865 		/* stop monitoring */
3866 		pfm_clear_psr_pp();
3867 		ia64_srlz_i();
3868 
3869 		PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE);
3870 		PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
3871 		PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE);
3872 	} else  {
3873 
3874 		/* stop monitoring */
3875 		pfm_clear_psr_up();
3876 		ia64_srlz_i();
3877 
3878 		/* no more save/restore on ctxsw */
3879 		current->thread.flags &= ~IA64_THREAD_PM_VALID;
3880 	}
3881 
3882 	/*
3883 	 * Mark the PMU as not owned
3884 	 * This will cause the interrupt handler to do nothing in case an overflow
3885 	 * interrupt was in-flight
3886 	 * This also guarantees that pmc0 will contain the final state
3887 	 * It virtually gives us full control on overflow processing from that point
3888 	 * on.
3889 	 * It must be an atomic operation.
3890 	 */
3891 	SET_PMU_OWNER(NULL);
3892 
3893 	/*
3894 	 * read current overflow status:
3895 	 *
3896 	 * we are guaranteed to read the final stable state
3897 	 */
3898 	ia64_srlz_d();
3899 	pmc0 = ia64_get_pmc(0); /* slow */
3900 
3901 	/*
3902 	 * freeze PMU:
3903 	 *
3904 	 * This destroys the overflow information. This is required to make sure
3905 	 * next process does not start with monitoring on if not requested
3906 	 */
3907 	pfm_freeze_pmu();
3908 
3909 	/*
3910 	 * We don't need to restore psr, because we are on our way out
3911 	 */
3912 
3913 	/*
3914 	 * This loop flushes the PMD into the PFM context.
3915 	 * It also processes overflow inline.
3916 	 *
3917 	 * IMPORTANT: No notification is sent at this point as the process is dying.
3918 	 * The implicit notification will come from a SIGCHILD or a return from a
3919 	 * waitpid().
3920 	 *
3921 	 */
3922 #ifdef CONFIG_SMP
3923 	if (GET_LAST_CPU(ctx) != smp_processor_id())
3924 		printk(KERN_DEBUG "perfmon: [%d] last_cpu=%d\n",
3925 		       task->pid, GET_LAST_CPU(ctx));
3926 #endif
3927 
3928 	/*
3929 	 * we save all the used pmds
3930 	 * we take care of overflows for pmds used as counters
3931 	 */
3932 	mask2 = ctx->ctx_used_pmds[0];
3933 	for (i = 0; mask2; i++, mask2>>=1) {
3934 
3935 		/* skip non used pmds */
3936 		if ((mask2 & 0x1) == 0) continue;
3937 
3938 		val = ia64_get_pmd(i);
3939 
3940 		if (PMD_IS_COUNTING(i)) {
3941 			DBprintk(("[%d] pmd[%d] soft_pmd=0x%lx hw_pmd=0x%lx\n",
3942 				task->pid,
3943 				i,
3944 				ctx->ctx_soft_pmds[i].val,
3945 				val & pmu_conf.ovfl_val));
3946 
3947 			/* collect latest results */
3948 			ctx->ctx_soft_pmds[i].val += val & pmu_conf.ovfl_val;
3949 
3950 			/*
3951 			 * now everything is in ctx_soft_pmds[] and we need
3952 			 * to clear the saved context from save_regs() such that
3953 			 * pfm_read_pmds() gets the correct value
3954 			 */
3955 			task->thread.pmd[i] = 0;
3956 
3957 			/*
3958 			 * take care of overflow inline
3959 			 */
3960 			if (pmc0 & (1UL << i)) {
3961 				ctx->ctx_soft_pmds[i].val += 1 + pmu_conf.ovfl_val;
3962 				DBprintk(("[%d] pmd[%d] overflowed soft_pmd=0x%lx\n",
3963 					task->pid, i, ctx->ctx_soft_pmds[i].val));
3964 			}
3965 		} else {
3966 			DBprintk(("[%d] pmd[%d] hw_pmd=0x%lx\n", task->pid, i, val));
3967 			/*
3968 			 * not a counter, just save value as is
3969 			 */
3970 			task->thread.pmd[i] = val;
3971 		}
3972 	}
3973 	SET_LAST_CPU(ctx, -1);
3974 }
3975 
3976 
3977 /*
3978  * task is the newly created task, pt_regs for new child
3979  */
3980 int
pfm_inherit(struct task_struct * task,struct pt_regs * regs)3981 pfm_inherit(struct task_struct *task, struct pt_regs *regs)
3982 {
3983 	pfm_context_t *ctx;
3984 	pfm_context_t *nctx;
3985 	struct thread_struct *thread;
3986 	unsigned long m;
3987 	int i;
3988 
3989 	/*
3990 	 * the new task was copied from parent and therefore points
3991 	 * to the parent's context at this point
3992 	 */
3993 	ctx    = task->thread.pfm_context;
3994 	thread = &task->thread;
3995 
3996 	/*
3997 	 * for secure sessions, make sure child cannot mess up
3998 	 * the monitoring session.
3999 	 */
4000 	if (ctx->ctx_fl_unsecure == 0) {
4001 		ia64_psr(regs)->sp = 1;
4002 	 	DBprintk(("enabling psr.sp for [%d]\n", task->pid));
4003 	} else {
4004 	 	DBprintk(("psr.sp=%d [%d]\n", ia64_psr(regs)->sp, task->pid));
4005 	}
4006 
4007 
4008 	/*
4009 	 * if there was a virtual mapping for the sampling buffer
4010 	 * the mapping is NOT inherited across fork() (see VM_DONTCOPY),
4011 	 * so we don't have to explicitely remove it here.
4012 	 *
4013 	 *
4014 	 * Part of the clearing of fields is also done in
4015 	 * copy_thread() because the fiels are outside the
4016 	 * pfm_context structure and can affect tasks not
4017 	 * using perfmon.
4018 	 */
4019 
4020 	/* clear pending notification */
4021 	task->thread.pfm_ovfl_block_reset = 0;
4022 
4023 	/*
4024 	 * clear cpu pinning restriction for child
4025 	 */
4026 	if (ctx->ctx_fl_system) {
4027 		task->cpus_allowed = ctx->ctx_saved_cpus_allowed;
4028 		task->need_resched = 1;
4029 
4030 	 	DBprintk(("setting cpus_allowed for [%d] to 0x%lx from 0x%lx\n",
4031 			task->pid,
4032 			ctx->ctx_saved_cpus_allowed,
4033 			current->cpus_allowed));
4034 	}
4035 
4036 	/*
4037 	 * takes care of easiest case first
4038 	 */
4039 	if (CTX_INHERIT_MODE(ctx) == PFM_FL_INHERIT_NONE) {
4040 
4041 		DBprintk(("removing PFM context for [%d]\n", task->pid));
4042 
4043 		task->thread.pfm_context = NULL;
4044 
4045 		/*
4046 		 * we must clear psr.up because the new child does
4047 		 * not have a context and the PM_VALID flag is cleared
4048 		 * in copy_thread().
4049 		 *
4050 		 * we do not clear psr.pp because it is always
4051 		 * controlled by the system wide logic and we should
4052 		 * never be here when system wide is running anyway
4053 		 */
4054 	 	ia64_psr(regs)->up = 0;
4055 
4056 		/* copy_thread() clears IA64_THREAD_PM_VALID */
4057 		return 0;
4058 	}
4059 	nctx = pfm_context_alloc();
4060 	if (nctx == NULL) return -ENOMEM;
4061 
4062 	/* copy content */
4063 	*nctx = *ctx;
4064 
4065 	if (CTX_INHERIT_MODE(ctx) == PFM_FL_INHERIT_ONCE) {
4066 		nctx->ctx_fl_inherit = PFM_FL_INHERIT_NONE;
4067 		DBprintk(("downgrading to INHERIT_NONE for [%d]\n", task->pid));
4068 		/*
4069 		 * downgrade parent: once means only first child!
4070 		 */
4071 		ctx->ctx_fl_inherit = PFM_FL_INHERIT_NONE;
4072 	}
4073 	/*
4074 	 * task is not yet visible in the tasklist, so we do
4075 	 * not need to lock the newly created context.
4076 	 * However, we must grab the tasklist_lock to ensure
4077 	 * that the ctx_owner or ctx_notify_task do not disappear
4078 	 * while we increment their check counters.
4079 	 */
4080 	read_lock(&tasklist_lock);
4081 
4082 	if (nctx->ctx_notify_task)
4083 		atomic_inc(&nctx->ctx_notify_task->thread.pfm_notifiers_check);
4084 
4085 	if (nctx->ctx_owner)
4086 		atomic_inc(&nctx->ctx_owner->thread.pfm_owners_check);
4087 
4088 	read_unlock(&tasklist_lock);
4089 
4090 
4091 	LOCK_PFS();
4092 	pfm_sessions.pfs_task_sessions++;
4093 	UNLOCK_PFS();
4094 
4095 	/* initialize counters in new context */
4096 	m = nctx->ctx_used_pmds[0] >> PMU_FIRST_COUNTER;
4097 	for(i = PMU_FIRST_COUNTER ; m ; m>>=1, i++) {
4098 		if ((m & 0x1) && pmu_conf.pmd_desc[i].type == PFM_REG_COUNTING) {
4099 			nctx->ctx_soft_pmds[i].val = nctx->ctx_soft_pmds[i].lval & ~pmu_conf.ovfl_val;
4100 			thread->pmd[i]	      	   = nctx->ctx_soft_pmds[i].lval & pmu_conf.ovfl_val;
4101 		} else {
4102 			thread->pmd[i]	      	   = 0UL; /* reset to initial state */
4103 		}
4104 	}
4105 
4106 	nctx->ctx_fl_frozen    = 0;
4107 	nctx->ctx_ovfl_regs[0] = 0UL;
4108 	SET_LAST_CPU(nctx, -1);
4109 
4110 	/*
4111 	 * here nctx->ctx_psb == ctx->ctx_psb
4112 	 *
4113 	 * increment reference count to sampling
4114 	 * buffer, if any. Note that this is independent
4115 	 * from the virtual mapping. The latter is never
4116 	 * inherited while the former will be if context
4117 	 * is setup to something different from PFM_FL_INHERIT_NONE
4118 	 */
4119 	if (nctx->ctx_psb) {
4120 		LOCK_PSB(nctx->ctx_psb);
4121 
4122 		nctx->ctx_psb->psb_refcnt++;
4123 
4124 	 	DBprintk(("updated smpl @ %p refcnt=%lu psb_flags=0x%x\n",
4125 			ctx->ctx_psb->psb_hdr,
4126 			ctx->ctx_psb->psb_refcnt,
4127 			ctx->ctx_psb->psb_flags));
4128 
4129 		UNLOCK_PSB(nctx->ctx_psb);
4130 
4131 		/*
4132 	 	 * remove any pointer to sampling buffer mapping
4133 	 	 */
4134 		nctx->ctx_smpl_vaddr = 0;
4135 	}
4136 
4137 	sema_init(&nctx->ctx_restart_sem, 0); /* reset this semaphore to locked */
4138 
4139 	/*
4140 	 * propagate kernel psr in new context (used for first ctxsw in
4141 	 */
4142 	nctx->ctx_saved_psr = pfm_get_psr();
4143 
4144 	/*
4145 	 * force a full reload on ctxsw in
4146 	 */
4147 	nctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
4148 	SET_LAST_CPU(nctx, -1);
4149 
4150 	/*
4151 	 * initialize tasklet for signal notifications
4152 	 *
4153 	 * ALL signal-based (or any notification using data structures
4154 	 * external to perfmon) MUST use tasklets to avoid lock contentions
4155 	 * when a signal has to be sent for overflow interrupt handler.
4156 	 */
4157 	tasklet_init(&nctx->ctx_tasklet, pfm_send_notification_signal, (unsigned long)nctx);
4158 
4159 	/* link with new task */
4160 	thread->pfm_context = nctx;
4161 
4162 	DBprintk(("nctx=%p for process [%d]\n", (void *)nctx, task->pid));
4163 
4164 	/*
4165 	 * the copy_thread routine automatically clears
4166 	 * IA64_THREAD_PM_VALID, so we need to reenable it, if it was used by the caller
4167 	 */
4168 	if (current->thread.flags & IA64_THREAD_PM_VALID) {
4169 		DBprintk(("setting PM_VALID for [%d]\n", task->pid));
4170 		thread->flags |= IA64_THREAD_PM_VALID;
4171 	}
4172 	return 0;
4173 }
4174 
4175 /*
4176  *
4177  * We cannot touch any of the PMU registers at this point as we may
4178  * not be running on the same CPU the task was last run on.  Therefore
4179  * it is assumed that the PMU has been stopped appropriately in
4180  * pfm_flush_regs() called from exit_thread().
4181  *
4182  * The function is called in the context of the parent via a release_thread()
4183  * and wait4(). The task is not in the tasklist anymore.
4184  */
4185 void
pfm_context_exit(struct task_struct * task)4186 pfm_context_exit(struct task_struct *task)
4187 {
4188 	pfm_context_t *ctx = task->thread.pfm_context;
4189 
4190 	/*
4191 	 * check sampling buffer
4192 	 */
4193 	if (ctx->ctx_psb) {
4194 		pfm_smpl_buffer_desc_t *psb = ctx->ctx_psb;
4195 
4196 		LOCK_PSB(psb);
4197 
4198 		DBprintk(("sampling buffer from [%d] @%p size %ld refcnt=%lu psb_flags=0x%x\n",
4199 			task->pid,
4200 			psb->psb_hdr, psb->psb_size, psb->psb_refcnt, psb->psb_flags));
4201 
4202 		/*
4203 		 * in the case where we are the last user, we may be able to free
4204 		 * the buffer
4205 		 */
4206 		psb->psb_refcnt--;
4207 
4208 		if (psb->psb_refcnt == 0) {
4209 
4210 			/*
4211 			 * The flag is cleared in pfm_vm_close(). which gets
4212 			 * called from do_exit() via exit_mm().
4213 			 * By the time we come here, the task has no more mm context.
4214 			 *
4215 			 * We can only free the psb and buffer here after the vm area
4216 			 * describing the buffer has been removed. This normally happens
4217 			 * as part of do_exit() but the entire mm context is ONLY removed
4218 			 * once its reference counts goes to zero. This is typically
4219 			 * the case except for multi-threaded (several tasks) processes.
4220 			 *
4221 			 * See pfm_vm_close() and pfm_cleanup_smpl_buf() for more details.
4222 			 */
4223 			if ((psb->psb_flags & PSB_HAS_VMA) == 0) {
4224 
4225 				DBprintk(("cleaning sampling buffer from [%d] @%p size %ld\n",
4226 					task->pid,
4227 					psb->psb_hdr, psb->psb_size));
4228 
4229 				/*
4230 				 * free the buffer and psb
4231 				 */
4232 				pfm_rvfree(psb->psb_hdr, psb->psb_size);
4233 				kfree(psb);
4234 				psb = NULL;
4235 			}
4236 		}
4237 		/* psb may have been deleted */
4238 		if (psb) UNLOCK_PSB(psb);
4239 	}
4240 
4241 	DBprintk(("cleaning [%d] pfm_context @%p notify_task=%p check=%d mm=%p\n",
4242 		task->pid, ctx,
4243 		ctx->ctx_notify_task,
4244 		atomic_read(&task->thread.pfm_notifiers_check), task->mm));
4245 
4246 	/*
4247 	 * To avoid getting the notified task or owner task scan the entire process
4248 	 * list when they exit, we decrement notifiers_check and owners_check respectively.
4249 	 *
4250 	 * Of course, there is race condition between decreasing the value and the
4251 	 * task exiting. The danger comes from the fact that, in both cases, we have a
4252 	 * direct pointer to a task structure thereby bypassing the tasklist.
4253 	 * We must make sure that, if we have task!= NULL, the target task is still
4254 	 * present and is identical to the initial task specified
4255 	 * during pfm_context_create(). It may already be detached from the tasklist but
4256 	 * that's okay. Note that it is okay if we miss the deadline and the task scans
4257 	 * the list for nothing, it will affect performance but not correctness.
4258 	 * The correctness is ensured by using the ctx_lock which prevents the
4259 	 * notify_task from changing the fields in our context.
4260 	 * Once holdhing this lock, if we see task!= NULL, then it will stay like
4261 	 * that until we release the lock. If it is NULL already then we came too late.
4262 	 */
4263 	LOCK_CTX(ctx);
4264 
4265 	if (ctx->ctx_notify_task != NULL) {
4266 		DBprintk(("[%d], [%d] atomic_sub on [%d] notifiers=%u\n", current->pid,
4267 			task->pid,
4268 			ctx->ctx_notify_task->pid,
4269 			atomic_read(&ctx->ctx_notify_task->thread.pfm_notifiers_check)));
4270 
4271 		atomic_dec(&ctx->ctx_notify_task->thread.pfm_notifiers_check);
4272 	}
4273 
4274 	if (ctx->ctx_owner != NULL) {
4275 		DBprintk(("[%d], [%d] atomic_sub on [%d] owners=%u\n",
4276 			 current->pid,
4277 			 task->pid,
4278 			 ctx->ctx_owner->pid,
4279 			 atomic_read(&ctx->ctx_owner->thread.pfm_owners_check)));
4280 
4281 		atomic_dec(&ctx->ctx_owner->thread.pfm_owners_check);
4282 	}
4283 
4284 	UNLOCK_CTX(ctx);
4285 
4286 	pfm_unreserve_session(task, ctx->ctx_fl_system, 1UL << ctx->ctx_cpu);
4287 
4288 	if (ctx->ctx_fl_system) {
4289 		/*
4290 	 	 * remove any CPU pinning
4291 	 	 */
4292 		task->cpus_allowed = ctx->ctx_saved_cpus_allowed;
4293 		task->need_resched = 1;
4294 	}
4295 
4296 	pfm_context_free(ctx);
4297 	/*
4298 	 *  clean pfm state in thread structure,
4299 	 */
4300 	task->thread.pfm_context          = NULL;
4301 	task->thread.pfm_ovfl_block_reset = 0;
4302 
4303 	/* pfm_notifiers is cleaned in pfm_cleanup_notifiers() */
4304 }
4305 
4306 /*
4307  * function invoked from release_thread when pfm_smpl_buf_list is not NULL
4308  */
4309 int
pfm_cleanup_smpl_buf(struct task_struct * task)4310 pfm_cleanup_smpl_buf(struct task_struct *task)
4311 {
4312 	pfm_smpl_buffer_desc_t *tmp, *psb = task->thread.pfm_smpl_buf_list;
4313 
4314 	if (psb == NULL) {
4315 		printk(KERN_DEBUG "perfmon: psb is null in [%d]\n", current->pid);
4316 		return -1;
4317 	}
4318 	/*
4319 	 * Walk through the list and free the sampling buffer and psb
4320 	 */
4321 	while (psb) {
4322 		DBprintk(("[%d] freeing smpl @%p size %ld\n", current->pid, psb->psb_hdr, psb->psb_size));
4323 
4324 		pfm_rvfree(psb->psb_hdr, psb->psb_size);
4325 		tmp = psb->psb_next;
4326 		kfree(psb);
4327 		psb = tmp;
4328 	}
4329 
4330 	/* just in case */
4331 	task->thread.pfm_smpl_buf_list = NULL;
4332 
4333 	return 0;
4334 }
4335 
4336 /*
4337  * function invoked from release_thread to make sure that the ctx_owner field does not
4338  * point to an unexisting task.
4339  */
4340 void
pfm_cleanup_owners(struct task_struct * task)4341 pfm_cleanup_owners(struct task_struct *task)
4342 {
4343 	struct task_struct *p;
4344 	pfm_context_t *ctx;
4345 
4346 	DBprintk(("called by [%d] for [%d]\n", current->pid, task->pid));
4347 
4348 	read_lock(&tasklist_lock);
4349 
4350 	for_each_task(p) {
4351 		/*
4352 		 * It is safe to do the 2-step test here, because thread.ctx
4353 		 * is cleaned up only in release_thread() and at that point
4354 		 * the task has been detached from the tasklist which is an
4355 		 * operation which uses the write_lock() on the tasklist_lock
4356 		 * so it cannot run concurrently to this loop. So we have the
4357 		 * guarantee that if we find p and it has a perfmon ctx then
4358 		 * it is going to stay like this for the entire execution of this
4359 		 * loop.
4360 		 */
4361 		ctx = p->thread.pfm_context;
4362 
4363 		//DBprintk(("[%d] scanning task [%d] ctx=%p\n", task->pid, p->pid, ctx));
4364 
4365 		if (ctx && ctx->ctx_owner == task) {
4366 			DBprintk(("trying for owner [%d] in [%d]\n", task->pid, p->pid));
4367 			/*
4368 			 * the spinlock is required to take care of a race condition
4369 			 * with the send_sig_info() call. We must make sure that
4370 			 * either the send_sig_info() completes using a valid task,
4371 			 * or the notify_task is cleared before the send_sig_info()
4372 			 * can pick up a stale value. Note that by the time this
4373 			 * function is executed the 'task' is already detached from the
4374 			 * tasklist. The problem is that the notifiers have a direct
4375 			 * pointer to it. It is okay to send a signal to a task in this
4376 			 * stage, it simply will have no effect. But it is better than sending
4377 			 * to a completely destroyed task or worse to a new task using the same
4378 			 * task_struct address.
4379 			 */
4380 			LOCK_CTX(ctx);
4381 
4382 			ctx->ctx_owner = NULL;
4383 
4384 			UNLOCK_CTX(ctx);
4385 
4386 			DBprintk(("done for notifier [%d] in [%d]\n", task->pid, p->pid));
4387 		}
4388 	}
4389 	read_unlock(&tasklist_lock);
4390 
4391 	atomic_set(&task->thread.pfm_owners_check, 0);
4392 }
4393 
4394 
4395 /*
4396  * function called from release_thread to make sure that the ctx_notify_task is not pointing
4397  * to an unexisting task
4398  */
4399 void
pfm_cleanup_notifiers(struct task_struct * task)4400 pfm_cleanup_notifiers(struct task_struct *task)
4401 {
4402 	struct task_struct *p;
4403 	pfm_context_t *ctx;
4404 
4405 	DBprintk(("called by [%d] for [%d]\n", current->pid, task->pid));
4406 
4407 	read_lock(&tasklist_lock);
4408 
4409 	for_each_task(p) {
4410 		/*
4411 		 * It is safe to do the 2-step test here, because thread.ctx
4412 		 * is cleaned up only in release_thread() and at that point
4413 		 * the task has been detached from the tasklist which is an
4414 		 * operation which uses the write_lock() on the tasklist_lock
4415 		 * so it cannot run concurrently to this loop. So we have the
4416 		 * guarantee that if we find p and it has a perfmon ctx then
4417 		 * it is going to stay like this for the entire execution of this
4418 		 * loop.
4419 		 */
4420 		ctx = p->thread.pfm_context;
4421 
4422 		//DBprintk(("[%d] scanning task [%d] ctx=%p\n", task->pid, p->pid, ctx));
4423 
4424 		if (ctx && ctx->ctx_notify_task == task) {
4425 			DBprintk(("trying for notifier [%d] in [%d]\n", task->pid, p->pid));
4426 			/*
4427 			 * the spinlock is required to take care of a race condition
4428 			 * with the send_sig_info() call. We must make sure that
4429 			 * either the send_sig_info() completes using a valid task,
4430 			 * or the notify_task is cleared before the send_sig_info()
4431 			 * can pick up a stale value. Note that by the time this
4432 			 * function is executed the 'task' is already detached from the
4433 			 * tasklist. The problem is that the notifiers have a direct
4434 			 * pointer to it. It is okay to send a signal to a task in this
4435 			 * stage, it simply will have no effect. But it is better than sending
4436 			 * to a completely destroyed task or worse to a new task using the same
4437 			 * task_struct address.
4438 			 */
4439 			LOCK_CTX(ctx);
4440 
4441 			ctx->ctx_notify_task = NULL;
4442 
4443 			UNLOCK_CTX(ctx);
4444 
4445 			DBprintk(("done for notifier [%d] in [%d]\n", task->pid, p->pid));
4446 		}
4447 	}
4448 	read_unlock(&tasklist_lock);
4449 
4450 	atomic_set(&task->thread.pfm_notifiers_check, 0);
4451 }
4452 
4453 static struct irqaction perfmon_irqaction = {
4454 	.handler = pfm_interrupt_handler,
4455 	.flags   = SA_INTERRUPT,
4456 	.name    = "perfmon"
4457 };
4458 
4459 int
pfm_install_alternate_syswide_subsystem(pfm_intr_handler_desc_t * hdl)4460 pfm_install_alternate_syswide_subsystem(pfm_intr_handler_desc_t *hdl)
4461 {
4462 	int ret;
4463 
4464 	/* some sanity checks */
4465 	if (hdl == NULL || hdl->handler == NULL) return -EINVAL;
4466 
4467 	/* do the easy test first */
4468 	if (pfm_alternate_intr_handler) return -EBUSY;
4469 
4470 	/* reserve our session */
4471 	ret = pfm_reserve_session(NULL, 1, cpu_online_map);
4472 	if (ret) return ret;
4473 
4474 	if (pfm_alternate_intr_handler) {
4475 		printk(KERN_DEBUG "perfmon: install_alternate, intr_handler not NULL "
4476 		       "after reserve\n");
4477 		return -EINVAL;
4478 	}
4479 
4480 	pfm_alternate_intr_handler = hdl;
4481 
4482 	return 0;
4483 }
4484 
4485 int
pfm_remove_alternate_syswide_subsystem(pfm_intr_handler_desc_t * hdl)4486 pfm_remove_alternate_syswide_subsystem(pfm_intr_handler_desc_t *hdl)
4487 {
4488 	if (hdl == NULL) return -EINVAL;
4489 
4490 	/* cannot remove someone else's handler! */
4491 	if (pfm_alternate_intr_handler != hdl) return -EINVAL;
4492 
4493 	pfm_alternate_intr_handler = NULL;
4494 
4495 	/*
4496 	 * XXX: assume cpu_online_map has not changed since reservation
4497 	 */
4498 	pfm_unreserve_session(NULL, 1, cpu_online_map);
4499 
4500 	return 0;
4501 }
4502 
4503 static struct file_operations pfm_proc_fops = {
4504 	.open		= pfm_proc_open,
4505 	.read		= seq_read,
4506 	.llseek		= seq_lseek,
4507 	.release	= seq_release,
4508 };
4509 
4510 /*
4511  * perfmon initialization routine, called from the initcall() table
4512  */
4513 int __init
pfm_init(void)4514 pfm_init(void)
4515 {
4516 	unsigned int n, n_counters, i;
4517 
4518 	pmu_conf.disabled = 1;
4519 
4520 	printk(KERN_INFO "perfmon: version %u.%u IRQ %u\n", PFM_VERSION_MAJ, PFM_VERSION_MIN,
4521 	       IA64_PERFMON_VECTOR);
4522 
4523 	/*
4524 	 * compute the number of implemented PMD/PMC from the
4525 	 * description tables
4526 	 */
4527 	n = 0;
4528 	for (i=0; PMC_IS_LAST(i) == 0;  i++) {
4529 		if (PMC_IS_IMPL(i) == 0) continue;
4530 		pmu_conf.impl_pmcs[i>>6] |= 1UL << (i&63);
4531 		n++;
4532 	}
4533 	pmu_conf.num_pmcs = n;
4534 
4535 	n = 0; n_counters = 0;
4536 	for (i=0; PMD_IS_LAST(i) == 0;  i++) {
4537 		if (PMD_IS_IMPL(i) == 0) continue;
4538 		pmu_conf.impl_pmds[i>>6] |= 1UL << (i&63);
4539 		n++;
4540 		if (PMD_IS_COUNTING(i)) n_counters++;
4541 	}
4542 	pmu_conf.num_pmds      = n;
4543 	pmu_conf.num_counters  = n_counters;
4544 
4545 	printk(KERN_INFO "perfmon: %u PMCs, %u PMDs, %u counters (%lu bits)\n",
4546 	       pmu_conf.num_pmcs,
4547 	       pmu_conf.num_pmds,
4548 	       pmu_conf.num_counters,
4549 	       ffz(pmu_conf.ovfl_val));
4550 
4551 	/* sanity check */
4552 	if (pmu_conf.num_pmds >= IA64_NUM_PMD_REGS || pmu_conf.num_pmcs >= IA64_NUM_PMC_REGS) {
4553 		printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n");
4554 		return -1;
4555 	}
4556 
4557 	/*
4558 	 * for now here for debug purposes
4559 	 */
4560  	perfmon_dir = create_proc_entry("perfmon", S_IRUGO, NULL);
4561 	if (perfmon_dir == NULL) {
4562 		printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n");
4563 		return -1;
4564 	}
4565   	/*
4566  	 * install customized file operations for /proc/perfmon entry
4567  	 */
4568  	perfmon_dir->proc_fops = &pfm_proc_fops;
4569 
4570 	/*
4571 	 * create /proc/sys/kernel/perfmon
4572 	 */
4573 	pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root, 0);
4574 
4575 	/*
4576 	 * initialize all our spinlocks
4577 	 */
4578 	spin_lock_init(&pfm_sessions.pfs_lock);
4579 
4580 	/* we are all set */
4581 	pmu_conf.disabled = 0;
4582 
4583 	return 0;
4584 }
4585 
4586 __initcall(pfm_init);
4587 
4588 void
pfm_init_percpu(void)4589 pfm_init_percpu(void)
4590 {
4591 	int i;
4592 
4593 	if (smp_processor_id() == 0)
4594 		register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
4595 
4596 	ia64_set_pmv(IA64_PERFMON_VECTOR);
4597 	ia64_srlz_d();
4598 
4599 	/*
4600 	 * we first initialize the PMU to a stable state.
4601 	 * the values may have been changed from their power-up
4602 	 * values by software executed before the kernel took over.
4603 	 *
4604 	 * At this point, pmu_conf has not yet been initialized
4605 	 *
4606 	 * On McKinley, this code is ineffective until PMC4 is initialized.
4607 	 */
4608 	for (i=1; PMC_IS_LAST(i) == 0;  i++) {
4609 		if (PMC_IS_IMPL(i) == 0) continue;
4610 		ia64_set_pmc(i, PMC_DFL_VAL(i));
4611 	}
4612 
4613 	for (i=0; PMD_IS_LAST(i); i++) {
4614 		if (PMD_IS_IMPL(i) == 0) continue;
4615 		ia64_set_pmd(i, 0UL);
4616 	}
4617 	pfm_freeze_pmu();
4618 }
4619 
4620 #else /* !CONFIG_PERFMON */
4621 
4622 asmlinkage long
sys_perfmonctl(int pid,int cmd,void * req,int count,long arg5,long arg6,long arg7,long arg8,long stack)4623 sys_perfmonctl (int pid, int cmd, void *req, int count, long arg5, long arg6,
4624 		long arg7, long arg8, long stack)
4625 {
4626 	return -ENOSYS;
4627 }
4628 
4629 #endif /* !CONFIG_PERFMON */
4630