1 /******************************************************************************
2  * arch/ia64/xen/time.c
3  *
4  * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
5  *                    VA Linux Systems Japan K.K.
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  *
21  */
22 
23 #include <linux/delay.h>
24 #include <linux/kernel_stat.h>
25 #include <linux/posix-timers.h>
26 #include <linux/irq.h>
27 #include <linux/clocksource.h>
28 
29 #include <asm/timex.h>
30 
31 #include <asm/xen/hypervisor.h>
32 
33 #include <xen/interface/vcpu.h>
34 
35 #include "../kernel/fsyscall_gtod_data.h"
36 
37 static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
38 static DEFINE_PER_CPU(unsigned long, xen_stolen_time);
39 static DEFINE_PER_CPU(unsigned long, xen_blocked_time);
40 
41 /* taken from i386/kernel/time-xen.c */
xen_init_missing_ticks_accounting(int cpu)42 static void xen_init_missing_ticks_accounting(int cpu)
43 {
44 	struct vcpu_register_runstate_memory_area area;
45 	struct vcpu_runstate_info *runstate = &per_cpu(xen_runstate, cpu);
46 	int rc;
47 
48 	memset(runstate, 0, sizeof(*runstate));
49 
50 	area.addr.v = runstate;
51 	rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu,
52 				&area);
53 	WARN_ON(rc && rc != -ENOSYS);
54 
55 	per_cpu(xen_blocked_time, cpu) = runstate->time[RUNSTATE_blocked];
56 	per_cpu(xen_stolen_time, cpu) = runstate->time[RUNSTATE_runnable]
57 					    + runstate->time[RUNSTATE_offline];
58 }
59 
60 /*
61  * Runstate accounting
62  */
63 /* stolen from arch/x86/xen/time.c */
get_runstate_snapshot(struct vcpu_runstate_info * res)64 static void get_runstate_snapshot(struct vcpu_runstate_info *res)
65 {
66 	u64 state_time;
67 	struct vcpu_runstate_info *state;
68 
69 	BUG_ON(preemptible());
70 
71 	state = &__get_cpu_var(xen_runstate);
72 
73 	/*
74 	 * The runstate info is always updated by the hypervisor on
75 	 * the current CPU, so there's no need to use anything
76 	 * stronger than a compiler barrier when fetching it.
77 	 */
78 	do {
79 		state_time = state->state_entry_time;
80 		rmb();
81 		*res = *state;
82 		rmb();
83 	} while (state->state_entry_time != state_time);
84 }
85 
86 #define NS_PER_TICK (1000000000LL/HZ)
87 
88 static unsigned long
consider_steal_time(unsigned long new_itm)89 consider_steal_time(unsigned long new_itm)
90 {
91 	unsigned long stolen, blocked;
92 	unsigned long delta_itm = 0, stolentick = 0;
93 	int cpu = smp_processor_id();
94 	struct vcpu_runstate_info runstate;
95 	struct task_struct *p = current;
96 
97 	get_runstate_snapshot(&runstate);
98 
99 	/*
100 	 * Check for vcpu migration effect
101 	 * In this case, itc value is reversed.
102 	 * This causes huge stolen value.
103 	 * This function just checks and reject this effect.
104 	 */
105 	if (!time_after_eq(runstate.time[RUNSTATE_blocked],
106 			   per_cpu(xen_blocked_time, cpu)))
107 		blocked = 0;
108 
109 	if (!time_after_eq(runstate.time[RUNSTATE_runnable] +
110 			   runstate.time[RUNSTATE_offline],
111 			   per_cpu(xen_stolen_time, cpu)))
112 		stolen = 0;
113 
114 	if (!time_after(delta_itm + new_itm, ia64_get_itc()))
115 		stolentick = ia64_get_itc() - new_itm;
116 
117 	do_div(stolentick, NS_PER_TICK);
118 	stolentick++;
119 
120 	do_div(stolen, NS_PER_TICK);
121 
122 	if (stolen > stolentick)
123 		stolen = stolentick;
124 
125 	stolentick -= stolen;
126 	do_div(blocked, NS_PER_TICK);
127 
128 	if (blocked > stolentick)
129 		blocked = stolentick;
130 
131 	if (stolen > 0 || blocked > 0) {
132 		account_steal_ticks(stolen);
133 		account_idle_ticks(blocked);
134 		run_local_timers();
135 
136 		rcu_check_callbacks(cpu, user_mode(get_irq_regs()));
137 
138 		scheduler_tick();
139 		run_posix_cpu_timers(p);
140 		delta_itm += local_cpu_data->itm_delta * (stolen + blocked);
141 
142 		if (cpu == time_keeper_id)
143 			xtime_update(stolen + blocked);
144 
145 		local_cpu_data->itm_next = delta_itm + new_itm;
146 
147 		per_cpu(xen_stolen_time, cpu) += NS_PER_TICK * stolen;
148 		per_cpu(xen_blocked_time, cpu) += NS_PER_TICK * blocked;
149 	}
150 	return delta_itm;
151 }
152 
xen_do_steal_accounting(unsigned long * new_itm)153 static int xen_do_steal_accounting(unsigned long *new_itm)
154 {
155 	unsigned long delta_itm;
156 	delta_itm = consider_steal_time(*new_itm);
157 	*new_itm += delta_itm;
158 	if (time_after(*new_itm, ia64_get_itc()) && delta_itm)
159 		return 1;
160 
161 	return 0;
162 }
163 
xen_itc_jitter_data_reset(void)164 static void xen_itc_jitter_data_reset(void)
165 {
166 	u64 lcycle, ret;
167 
168 	do {
169 		lcycle = itc_jitter_data.itc_lastcycle;
170 		ret = cmpxchg(&itc_jitter_data.itc_lastcycle, lcycle, 0);
171 	} while (unlikely(ret != lcycle));
172 }
173 
174 /* based on xen_sched_clock() in arch/x86/xen/time.c. */
175 /*
176  * This relies on HAVE_UNSTABLE_SCHED_CLOCK. If it can't be defined,
177  * something similar logic should be implemented here.
178  */
179 /*
180  * Xen sched_clock implementation.  Returns the number of unstolen
181  * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
182  * states.
183  */
xen_sched_clock(void)184 static unsigned long long xen_sched_clock(void)
185 {
186 	struct vcpu_runstate_info runstate;
187 
188 	unsigned long long now;
189 	unsigned long long offset;
190 	unsigned long long ret;
191 
192 	/*
193 	 * Ideally sched_clock should be called on a per-cpu basis
194 	 * anyway, so preempt should already be disabled, but that's
195 	 * not current practice at the moment.
196 	 */
197 	preempt_disable();
198 
199 	/*
200 	 * both ia64_native_sched_clock() and xen's runstate are
201 	 * based on mAR.ITC. So difference of them makes sense.
202 	 */
203 	now = ia64_native_sched_clock();
204 
205 	get_runstate_snapshot(&runstate);
206 
207 	WARN_ON(runstate.state != RUNSTATE_running);
208 
209 	offset = 0;
210 	if (now > runstate.state_entry_time)
211 		offset = now - runstate.state_entry_time;
212 	ret = runstate.time[RUNSTATE_blocked] +
213 		runstate.time[RUNSTATE_running] +
214 		offset;
215 
216 	preempt_enable();
217 
218 	return ret;
219 }
220 
221 struct pv_time_ops xen_time_ops __initdata = {
222 	.init_missing_ticks_accounting	= xen_init_missing_ticks_accounting,
223 	.do_steal_accounting		= xen_do_steal_accounting,
224 	.clocksource_resume		= xen_itc_jitter_data_reset,
225 	.sched_clock			= xen_sched_clock,
226 };
227 
228 /* Called after suspend, to resume time.  */
xen_local_tick_resume(void)229 static void xen_local_tick_resume(void)
230 {
231 	/* Just trigger a tick.  */
232 	ia64_cpu_local_tick();
233 	touch_softlockup_watchdog();
234 }
235 
236 void
xen_timer_resume(void)237 xen_timer_resume(void)
238 {
239 	unsigned int cpu;
240 
241 	xen_local_tick_resume();
242 
243 	for_each_online_cpu(cpu)
244 		xen_init_missing_ticks_accounting(cpu);
245 }
246 
ia64_cpu_local_tick_fn(void * unused)247 static void ia64_cpu_local_tick_fn(void *unused)
248 {
249 	xen_local_tick_resume();
250 	xen_init_missing_ticks_accounting(smp_processor_id());
251 }
252 
253 void
xen_timer_resume_on_aps(void)254 xen_timer_resume_on_aps(void)
255 {
256 	smp_call_function(&ia64_cpu_local_tick_fn, NULL, 1);
257 }
258