1 /*
2  * Floating proportions
3  *
4  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
5  *
6  * Description:
7  *
8  * The floating proportion is a time derivative with an exponentially decaying
9  * history:
10  *
11  *   p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
12  *
13  * Where j is an element from {prop_local}, x_{j} is j's number of events,
14  * and i the time period over which the differential is taken. So d/dt_{-i} is
15  * the differential over the i-th last period.
16  *
17  * The decaying history gives smooth transitions. The time differential carries
18  * the notion of speed.
19  *
20  * The denominator is 2^(1+i) because we want the series to be normalised, ie.
21  *
22  *   \Sum_{i=0} 1/2^(1+i) = 1
23  *
24  * Further more, if we measure time (t) in the same events as x; so that:
25  *
26  *   t = \Sum_{j} x_{j}
27  *
28  * we get that:
29  *
30  *   \Sum_{j} p_{j} = 1
31  *
32  * Writing this in an iterative fashion we get (dropping the 'd's):
33  *
34  *   if (++x_{j}, ++t > period)
35  *     t /= 2;
36  *     for_each (j)
37  *       x_{j} /= 2;
38  *
39  * so that:
40  *
41  *   p_{j} = x_{j} / t;
42  *
43  * We optimize away the '/= 2' for the global time delta by noting that:
44  *
45  *   if (++t > period) t /= 2:
46  *
47  * Can be approximated by:
48  *
49  *   period/2 + (++t % period/2)
50  *
51  * [ Furthermore, when we choose period to be 2^n it can be written in terms of
52  *   binary operations and wraparound artefacts disappear. ]
53  *
54  * Also note that this yields a natural counter of the elapsed periods:
55  *
56  *   c = t / (period/2)
57  *
58  * [ Its monotonic increasing property can be applied to mitigate the wrap-
59  *   around issue. ]
60  *
61  * This allows us to do away with the loop over all prop_locals on each period
62  * expiration. By remembering the period count under which it was last accessed
63  * as c_{j}, we can obtain the number of 'missed' cycles from:
64  *
65  *   c - c_{j}
66  *
67  * We can then lazily catch up to the global period count every time we are
68  * going to use x_{j}, by doing:
69  *
70  *   x_{j} /= 2^(c - c_{j}), c_{j} = c
71  */
72 
73 #include <linux/proportions.h>
74 #include <linux/rcupdate.h>
75 
prop_descriptor_init(struct prop_descriptor * pd,int shift)76 int prop_descriptor_init(struct prop_descriptor *pd, int shift)
77 {
78 	int err;
79 
80 	if (shift > PROP_MAX_SHIFT)
81 		shift = PROP_MAX_SHIFT;
82 
83 	pd->index = 0;
84 	pd->pg[0].shift = shift;
85 	mutex_init(&pd->mutex);
86 	err = percpu_counter_init(&pd->pg[0].events, 0);
87 	if (err)
88 		goto out;
89 
90 	err = percpu_counter_init(&pd->pg[1].events, 0);
91 	if (err)
92 		percpu_counter_destroy(&pd->pg[0].events);
93 
94 out:
95 	return err;
96 }
97 
98 /*
99  * We have two copies, and flip between them to make it seem like an atomic
100  * update. The update is not really atomic wrt the events counter, but
101  * it is internally consistent with the bit layout depending on shift.
102  *
103  * We copy the events count, move the bits around and flip the index.
104  */
prop_change_shift(struct prop_descriptor * pd,int shift)105 void prop_change_shift(struct prop_descriptor *pd, int shift)
106 {
107 	int index;
108 	int offset;
109 	u64 events;
110 	unsigned long flags;
111 
112 	if (shift > PROP_MAX_SHIFT)
113 		shift = PROP_MAX_SHIFT;
114 
115 	mutex_lock(&pd->mutex);
116 
117 	index = pd->index ^ 1;
118 	offset = pd->pg[pd->index].shift - shift;
119 	if (!offset)
120 		goto out;
121 
122 	pd->pg[index].shift = shift;
123 
124 	local_irq_save(flags);
125 	events = percpu_counter_sum(&pd->pg[pd->index].events);
126 	if (offset < 0)
127 		events <<= -offset;
128 	else
129 		events >>= offset;
130 	percpu_counter_set(&pd->pg[index].events, events);
131 
132 	/*
133 	 * ensure the new pg is fully written before the switch
134 	 */
135 	smp_wmb();
136 	pd->index = index;
137 	local_irq_restore(flags);
138 
139 	synchronize_rcu();
140 
141 out:
142 	mutex_unlock(&pd->mutex);
143 }
144 
145 /*
146  * wrap the access to the data in an rcu_read_lock() section;
147  * this is used to track the active references.
148  */
prop_get_global(struct prop_descriptor * pd)149 static struct prop_global *prop_get_global(struct prop_descriptor *pd)
150 __acquires(RCU)
151 {
152 	int index;
153 
154 	rcu_read_lock();
155 	index = pd->index;
156 	/*
157 	 * match the wmb from vcd_flip()
158 	 */
159 	smp_rmb();
160 	return &pd->pg[index];
161 }
162 
prop_put_global(struct prop_descriptor * pd,struct prop_global * pg)163 static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
164 __releases(RCU)
165 {
166 	rcu_read_unlock();
167 }
168 
169 static void
prop_adjust_shift(int * pl_shift,unsigned long * pl_period,int new_shift)170 prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
171 {
172 	int offset = *pl_shift - new_shift;
173 
174 	if (!offset)
175 		return;
176 
177 	if (offset < 0)
178 		*pl_period <<= -offset;
179 	else
180 		*pl_period >>= offset;
181 
182 	*pl_shift = new_shift;
183 }
184 
185 /*
186  * PERCPU
187  */
188 
189 #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
190 
prop_local_init_percpu(struct prop_local_percpu * pl)191 int prop_local_init_percpu(struct prop_local_percpu *pl)
192 {
193 	spin_lock_init(&pl->lock);
194 	pl->shift = 0;
195 	pl->period = 0;
196 	return percpu_counter_init(&pl->events, 0);
197 }
198 
prop_local_destroy_percpu(struct prop_local_percpu * pl)199 void prop_local_destroy_percpu(struct prop_local_percpu *pl)
200 {
201 	percpu_counter_destroy(&pl->events);
202 }
203 
204 /*
205  * Catch up with missed period expirations.
206  *
207  *   until (c_{j} == c)
208  *     x_{j} -= x_{j}/2;
209  *     c_{j}++;
210  */
211 static
prop_norm_percpu(struct prop_global * pg,struct prop_local_percpu * pl)212 void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
213 {
214 	unsigned long period = 1UL << (pg->shift - 1);
215 	unsigned long period_mask = ~(period - 1);
216 	unsigned long global_period;
217 	unsigned long flags;
218 
219 	global_period = percpu_counter_read(&pg->events);
220 	global_period &= period_mask;
221 
222 	/*
223 	 * Fast path - check if the local and global period count still match
224 	 * outside of the lock.
225 	 */
226 	if (pl->period == global_period)
227 		return;
228 
229 	spin_lock_irqsave(&pl->lock, flags);
230 	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
231 
232 	/*
233 	 * For each missed period, we half the local counter.
234 	 * basically:
235 	 *   pl->events >> (global_period - pl->period);
236 	 */
237 	period = (global_period - pl->period) >> (pg->shift - 1);
238 	if (period < BITS_PER_LONG) {
239 		s64 val = percpu_counter_read(&pl->events);
240 
241 		if (val < (nr_cpu_ids * PROP_BATCH))
242 			val = percpu_counter_sum(&pl->events);
243 
244 		__percpu_counter_add(&pl->events, -val + (val >> period),
245 					PROP_BATCH);
246 	} else
247 		percpu_counter_set(&pl->events, 0);
248 
249 	pl->period = global_period;
250 	spin_unlock_irqrestore(&pl->lock, flags);
251 }
252 
253 /*
254  *   ++x_{j}, ++t
255  */
__prop_inc_percpu(struct prop_descriptor * pd,struct prop_local_percpu * pl)256 void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
257 {
258 	struct prop_global *pg = prop_get_global(pd);
259 
260 	prop_norm_percpu(pg, pl);
261 	__percpu_counter_add(&pl->events, 1, PROP_BATCH);
262 	percpu_counter_add(&pg->events, 1);
263 	prop_put_global(pd, pg);
264 }
265 
266 /*
267  * identical to __prop_inc_percpu, except that it limits this pl's fraction to
268  * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
269  */
__prop_inc_percpu_max(struct prop_descriptor * pd,struct prop_local_percpu * pl,long frac)270 void __prop_inc_percpu_max(struct prop_descriptor *pd,
271 			   struct prop_local_percpu *pl, long frac)
272 {
273 	struct prop_global *pg = prop_get_global(pd);
274 
275 	prop_norm_percpu(pg, pl);
276 
277 	if (unlikely(frac != PROP_FRAC_BASE)) {
278 		unsigned long period_2 = 1UL << (pg->shift - 1);
279 		unsigned long counter_mask = period_2 - 1;
280 		unsigned long global_count;
281 		long numerator, denominator;
282 
283 		numerator = percpu_counter_read_positive(&pl->events);
284 		global_count = percpu_counter_read(&pg->events);
285 		denominator = period_2 + (global_count & counter_mask);
286 
287 		if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
288 			goto out_put;
289 	}
290 
291 	percpu_counter_add(&pl->events, 1);
292 	percpu_counter_add(&pg->events, 1);
293 
294 out_put:
295 	prop_put_global(pd, pg);
296 }
297 
298 /*
299  * Obtain a fraction of this proportion
300  *
301  *   p_{j} = x_{j} / (period/2 + t % period/2)
302  */
prop_fraction_percpu(struct prop_descriptor * pd,struct prop_local_percpu * pl,long * numerator,long * denominator)303 void prop_fraction_percpu(struct prop_descriptor *pd,
304 		struct prop_local_percpu *pl,
305 		long *numerator, long *denominator)
306 {
307 	struct prop_global *pg = prop_get_global(pd);
308 	unsigned long period_2 = 1UL << (pg->shift - 1);
309 	unsigned long counter_mask = period_2 - 1;
310 	unsigned long global_count;
311 
312 	prop_norm_percpu(pg, pl);
313 	*numerator = percpu_counter_read_positive(&pl->events);
314 
315 	global_count = percpu_counter_read(&pg->events);
316 	*denominator = period_2 + (global_count & counter_mask);
317 
318 	prop_put_global(pd, pg);
319 }
320 
321 /*
322  * SINGLE
323  */
324 
prop_local_init_single(struct prop_local_single * pl)325 int prop_local_init_single(struct prop_local_single *pl)
326 {
327 	spin_lock_init(&pl->lock);
328 	pl->shift = 0;
329 	pl->period = 0;
330 	pl->events = 0;
331 	return 0;
332 }
333 
prop_local_destroy_single(struct prop_local_single * pl)334 void prop_local_destroy_single(struct prop_local_single *pl)
335 {
336 }
337 
338 /*
339  * Catch up with missed period expirations.
340  */
341 static
prop_norm_single(struct prop_global * pg,struct prop_local_single * pl)342 void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
343 {
344 	unsigned long period = 1UL << (pg->shift - 1);
345 	unsigned long period_mask = ~(period - 1);
346 	unsigned long global_period;
347 	unsigned long flags;
348 
349 	global_period = percpu_counter_read(&pg->events);
350 	global_period &= period_mask;
351 
352 	/*
353 	 * Fast path - check if the local and global period count still match
354 	 * outside of the lock.
355 	 */
356 	if (pl->period == global_period)
357 		return;
358 
359 	spin_lock_irqsave(&pl->lock, flags);
360 	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
361 	/*
362 	 * For each missed period, we half the local counter.
363 	 */
364 	period = (global_period - pl->period) >> (pg->shift - 1);
365 	if (likely(period < BITS_PER_LONG))
366 		pl->events >>= period;
367 	else
368 		pl->events = 0;
369 	pl->period = global_period;
370 	spin_unlock_irqrestore(&pl->lock, flags);
371 }
372 
373 /*
374  *   ++x_{j}, ++t
375  */
__prop_inc_single(struct prop_descriptor * pd,struct prop_local_single * pl)376 void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
377 {
378 	struct prop_global *pg = prop_get_global(pd);
379 
380 	prop_norm_single(pg, pl);
381 	pl->events++;
382 	percpu_counter_add(&pg->events, 1);
383 	prop_put_global(pd, pg);
384 }
385 
386 /*
387  * Obtain a fraction of this proportion
388  *
389  *   p_{j} = x_{j} / (period/2 + t % period/2)
390  */
prop_fraction_single(struct prop_descriptor * pd,struct prop_local_single * pl,long * numerator,long * denominator)391 void prop_fraction_single(struct prop_descriptor *pd,
392 	       	struct prop_local_single *pl,
393 		long *numerator, long *denominator)
394 {
395 	struct prop_global *pg = prop_get_global(pd);
396 	unsigned long period_2 = 1UL << (pg->shift - 1);
397 	unsigned long counter_mask = period_2 - 1;
398 	unsigned long global_count;
399 
400 	prop_norm_single(pg, pl);
401 	*numerator = pl->events;
402 
403 	global_count = percpu_counter_read(&pg->events);
404 	*denominator = period_2 + (global_count & counter_mask);
405 
406 	prop_put_global(pd, pg);
407 }
408