1 #ifndef __NET_PKT_SCHED_H
2 #define __NET_PKT_SCHED_H
3 
4 #define PSCHED_GETTIMEOFDAY	1
5 #define PSCHED_JIFFIES 		2
6 #define PSCHED_CPU 		3
7 
8 #define PSCHED_CLOCK_SOURCE	PSCHED_JIFFIES
9 
10 #include <linux/config.h>
11 #include <linux/types.h>
12 #include <linux/pkt_sched.h>
13 #include <net/pkt_cls.h>
14 
15 #ifdef CONFIG_X86_TSC
16 #include <asm/msr.h>
17 #endif
18 
19 struct rtattr;
20 struct Qdisc;
21 
22 struct qdisc_walker
23 {
24 	int	stop;
25 	int	skip;
26 	int	count;
27 	int	(*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *);
28 };
29 
30 struct Qdisc_class_ops
31 {
32 	/* Child qdisc manipulation */
33 	int			(*graft)(struct Qdisc *, unsigned long cl, struct Qdisc *, struct Qdisc **);
34 	struct Qdisc *		(*leaf)(struct Qdisc *, unsigned long cl);
35 
36 	/* Class manipulation routines */
37 	unsigned long		(*get)(struct Qdisc *, u32 classid);
38 	void			(*put)(struct Qdisc *, unsigned long);
39 	int			(*change)(struct Qdisc *, u32, u32, struct rtattr **, unsigned long *);
40 	int			(*delete)(struct Qdisc *, unsigned long);
41 	void			(*walk)(struct Qdisc *, struct qdisc_walker * arg);
42 
43 	/* Filter manipulation */
44 	struct tcf_proto **	(*tcf_chain)(struct Qdisc *, unsigned long);
45 	unsigned long		(*bind_tcf)(struct Qdisc *, unsigned long, u32 classid);
46 	void			(*unbind_tcf)(struct Qdisc *, unsigned long);
47 
48 	/* rtnetlink specific */
49 	int			(*dump)(struct Qdisc *, unsigned long, struct sk_buff *skb, struct tcmsg*);
50 };
51 
52 struct Qdisc_ops
53 {
54 	struct Qdisc_ops	*next;
55 	struct Qdisc_class_ops	*cl_ops;
56 	char			id[IFNAMSIZ];
57 	int			priv_size;
58 
59 	int 			(*enqueue)(struct sk_buff *, struct Qdisc *);
60 	struct sk_buff *	(*dequeue)(struct Qdisc *);
61 	int 			(*requeue)(struct sk_buff *, struct Qdisc *);
62 	unsigned int		(*drop)(struct Qdisc *);
63 
64 	int			(*init)(struct Qdisc *, struct rtattr *arg);
65 	void			(*reset)(struct Qdisc *);
66 	void			(*destroy)(struct Qdisc *);
67 	int			(*change)(struct Qdisc *, struct rtattr *arg);
68 
69 	int			(*dump)(struct Qdisc *, struct sk_buff *);
70 };
71 
72 extern rwlock_t qdisc_tree_lock;
73 
74 struct Qdisc
75 {
76 	int 			(*enqueue)(struct sk_buff *skb, struct Qdisc *dev);
77 	struct sk_buff *	(*dequeue)(struct Qdisc *dev);
78 	unsigned		flags;
79 #define TCQ_F_BUILTIN	1
80 #define TCQ_F_THROTTLED	2
81 #define TCQ_F_INGRESS	4
82 	struct Qdisc_ops	*ops;
83 	u32			handle;
84 	u32			parent;
85 	atomic_t		refcnt;
86 	struct sk_buff_head	q;
87 	struct net_device	*dev;
88 	struct list_head	list;
89 
90 	struct tc_stats		stats;
91 	int			(*reshape_fail)(struct sk_buff *skb, struct Qdisc *q);
92 
93 	/* This field is deprecated, but it is still used by CBQ
94 	 * and it will live until better solution will be invented.
95 	 */
96 	struct Qdisc		*__parent;
97 
98 	char			data[0];
99 };
100 
101 struct qdisc_rate_table
102 {
103 	struct tc_ratespec rate;
104 	u32		data[256];
105 	struct qdisc_rate_table *next;
106 	int		refcnt;
107 };
108 
sch_tree_lock(struct Qdisc * q)109 static inline void sch_tree_lock(struct Qdisc *q)
110 {
111 	write_lock(&qdisc_tree_lock);
112 	spin_lock_bh(&q->dev->queue_lock);
113 }
114 
sch_tree_unlock(struct Qdisc * q)115 static inline void sch_tree_unlock(struct Qdisc *q)
116 {
117 	spin_unlock_bh(&q->dev->queue_lock);
118 	write_unlock(&qdisc_tree_lock);
119 }
120 
tcf_tree_lock(struct tcf_proto * tp)121 static inline void tcf_tree_lock(struct tcf_proto *tp)
122 {
123 	write_lock(&qdisc_tree_lock);
124 	spin_lock_bh(&tp->q->dev->queue_lock);
125 }
126 
tcf_tree_unlock(struct tcf_proto * tp)127 static inline void tcf_tree_unlock(struct tcf_proto *tp)
128 {
129 	spin_unlock_bh(&tp->q->dev->queue_lock);
130 	write_unlock(&qdisc_tree_lock);
131 }
132 
133 
134 static inline unsigned long
cls_set_class(struct tcf_proto * tp,unsigned long * clp,unsigned long cl)135 cls_set_class(struct tcf_proto *tp, unsigned long *clp, unsigned long cl)
136 {
137 	unsigned long old_cl;
138 
139 	tcf_tree_lock(tp);
140 	old_cl = *clp;
141 	*clp = cl;
142 	tcf_tree_unlock(tp);
143 	return old_cl;
144 }
145 
146 static inline unsigned long
__cls_set_class(unsigned long * clp,unsigned long cl)147 __cls_set_class(unsigned long *clp, unsigned long cl)
148 {
149 	unsigned long old_cl;
150 
151 	old_cl = *clp;
152 	*clp = cl;
153 	return old_cl;
154 }
155 
156 
157 /*
158    Timer resolution MUST BE < 10% of min_schedulable_packet_size/bandwidth
159 
160    Normal IP packet size ~ 512byte, hence:
161 
162    0.5Kbyte/1Mbyte/sec = 0.5msec, so that we need 50usec timer for
163    10Mbit ethernet.
164 
165    10msec resolution -> <50Kbit/sec.
166 
167    The result: [34]86 is not good choice for QoS router :-(
168 
169    The things are not so bad, because we may use artifical
170    clock evaluated by integration of network data flow
171    in the most critical places.
172 
173    Note: we do not use fastgettimeofday.
174    The reason is that, when it is not the same thing as
175    gettimeofday, it returns invalid timestamp, which is
176    not updated, when net_bh is active.
177 
178    So, use PSCHED_CLOCK_SOURCE = PSCHED_CPU on alpha and pentiums
179    with rtdsc. And PSCHED_JIFFIES on all other architectures, including [34]86
180    and pentiums without rtdsc.
181    You can use PSCHED_GETTIMEOFDAY on another architectures,
182    which have fast and precise clock source, but it is too expensive.
183  */
184 
185 /* General note about internal clock.
186 
187    Any clock source returns time intervals, measured in units
188    close to 1usec. With source PSCHED_GETTIMEOFDAY it is precisely
189    microseconds, otherwise something close but different chosen to minimize
190    arithmetic cost. Ratio usec/internal untis in form nominator/denominator
191    may be read from /proc/net/psched.
192  */
193 
194 
195 #if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY
196 
197 typedef struct timeval	psched_time_t;
198 typedef long		psched_tdiff_t;
199 
200 #define PSCHED_GET_TIME(stamp) do_gettimeofday(&(stamp))
201 #define PSCHED_US2JIFFIE(usecs) (((usecs)+(1000000/HZ-1))/(1000000/HZ))
202 #define PSCHED_JIFFIE2US(delay) ((delay)*(1000000/HZ))
203 
204 #define PSCHED_EXPORTLIST EXPORT_SYMBOL(psched_tod_diff);
205 
206 #else /* PSCHED_CLOCK_SOURCE != PSCHED_GETTIMEOFDAY */
207 
208 #define PSCHED_EXPORTLIST PSCHED_EXPORTLIST_1 PSCHED_EXPORTLIST_2
209 
210 typedef u64	psched_time_t;
211 typedef long	psched_tdiff_t;
212 
213 extern psched_time_t	psched_time_base;
214 
215 #if PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES
216 
217 #if HZ < 96
218 #define PSCHED_JSCALE 14
219 #elif HZ >= 96 && HZ < 192
220 #define PSCHED_JSCALE 13
221 #elif HZ >= 192 && HZ < 384
222 #define PSCHED_JSCALE 12
223 #elif HZ >= 384 && HZ < 768
224 #define PSCHED_JSCALE 11
225 #elif HZ >= 768
226 #define PSCHED_JSCALE 10
227 #endif
228 
229 #define PSCHED_EXPORTLIST_2
230 
231 #if BITS_PER_LONG <= 32
232 
233 #define PSCHED_WATCHER unsigned long
234 
235 extern PSCHED_WATCHER psched_time_mark;
236 
237 #define PSCHED_GET_TIME(stamp) ((stamp) = psched_time_base + (((unsigned long)(jiffies-psched_time_mark))<<PSCHED_JSCALE))
238 
239 #define PSCHED_EXPORTLIST_1 EXPORT_SYMBOL(psched_time_base); \
240                             EXPORT_SYMBOL(psched_time_mark);
241 
242 #else
243 
244 #define PSCHED_GET_TIME(stamp) ((stamp) = (jiffies<<PSCHED_JSCALE))
245 
246 #define PSCHED_EXPORTLIST_1
247 
248 #endif
249 
250 #define PSCHED_US2JIFFIE(delay) (((delay)+(1<<PSCHED_JSCALE)-1)>>PSCHED_JSCALE)
251 #define PSCHED_JIFFIE2US(delay) ((delay)<<PSCHED_JSCALE)
252 
253 #elif PSCHED_CLOCK_SOURCE == PSCHED_CPU
254 
255 extern psched_tdiff_t psched_clock_per_hz;
256 extern int psched_clock_scale;
257 
258 #define PSCHED_EXPORTLIST_2 EXPORT_SYMBOL(psched_clock_per_hz); \
259                             EXPORT_SYMBOL(psched_clock_scale);
260 
261 #define PSCHED_US2JIFFIE(delay) (((delay)+psched_clock_per_hz-1)/psched_clock_per_hz)
262 #define PSCHED_JIFFIE2US(delay) ((delay)*psched_clock_per_hz)
263 
264 #ifdef CONFIG_X86_TSC
265 
266 #define PSCHED_GET_TIME(stamp) \
267 ({ u64 __cur; \
268    rdtscll(__cur); \
269    (stamp) = __cur>>psched_clock_scale; \
270 })
271 
272 #define PSCHED_EXPORTLIST_1
273 
274 #elif defined (__alpha__)
275 
276 #define PSCHED_WATCHER u32
277 
278 extern PSCHED_WATCHER psched_time_mark;
279 
280 #define PSCHED_GET_TIME(stamp) \
281 ({ u32 __res; \
282    __asm__ __volatile__ ("rpcc %0" : "r="(__res)); \
283    if (__res <= psched_time_mark) psched_time_base += 0x100000000UL; \
284    psched_time_mark = __res; \
285    (stamp) = (psched_time_base + __res)>>psched_clock_scale; \
286 })
287 
288 #define PSCHED_EXPORTLIST_1 EXPORT_SYMBOL(psched_time_base); \
289                             EXPORT_SYMBOL(psched_time_mark);
290 
291 #else
292 
293 #error PSCHED_CLOCK_SOURCE=PSCHED_CPU is not supported on this arch.
294 
295 #endif /* ARCH */
296 
297 #endif /* PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES */
298 
299 #endif /* PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY */
300 
301 #if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY
302 #define PSCHED_TDIFF(tv1, tv2) \
303 ({ \
304 	   int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \
305 	   int __delta = (tv1).tv_usec - (tv2).tv_usec; \
306 	   if (__delta_sec) { \
307 	           switch (__delta_sec) { \
308 		   default: \
309 			   __delta = 0; \
310 		   case 2: \
311 			   __delta += 1000000; \
312 		   case 1: \
313 			   __delta += 1000000; \
314 	           } \
315 	   } \
316 	   __delta; \
317 })
318 
319 extern int psched_tod_diff(int delta_sec, int bound);
320 
321 #define PSCHED_TDIFF_SAFE(tv1, tv2, bound, guard) \
322 ({ \
323 	   int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \
324 	   int __delta = (tv1).tv_usec - (tv2).tv_usec; \
325 	   switch (__delta_sec) { \
326 	   default: \
327 		   __delta = psched_tod_diff(__delta_sec, bound); guard; break; \
328 	   case 2: \
329 		   __delta += 1000000; \
330 	   case 1: \
331 		   __delta += 1000000; \
332 	   case 0: ; \
333 	   } \
334 	   __delta; \
335 })
336 
337 #define PSCHED_TLESS(tv1, tv2) (((tv1).tv_usec < (tv2).tv_usec && \
338 				(tv1).tv_sec <= (tv2).tv_sec) || \
339 				 (tv1).tv_sec < (tv2).tv_sec)
340 
341 #define PSCHED_TADD2(tv, delta, tv_res) \
342 ({ \
343 	   int __delta = (tv).tv_usec + (delta); \
344 	   (tv_res).tv_sec = (tv).tv_sec; \
345 	   if (__delta > 1000000) { (tv_res).tv_sec++; __delta -= 1000000; } \
346 	   (tv_res).tv_usec = __delta; \
347 })
348 
349 #define PSCHED_TADD(tv, delta) \
350 ({ \
351 	   (tv).tv_usec += (delta); \
352 	   if ((tv).tv_usec > 1000000) { (tv).tv_sec++; \
353 		 (tv).tv_usec -= 1000000; } \
354 })
355 
356 /* Set/check that time is in the "past perfect";
357    it depends on concrete representation of system time
358  */
359 
360 #define PSCHED_SET_PASTPERFECT(t)	((t).tv_sec = 0)
361 #define PSCHED_IS_PASTPERFECT(t)	((t).tv_sec == 0)
362 
363 #define	PSCHED_AUDIT_TDIFF(t) ({ if ((t) > 2000000) (t) = 2000000; })
364 
365 #else
366 
367 #define PSCHED_TDIFF(tv1, tv2) (long)((tv1) - (tv2))
368 #define PSCHED_TDIFF_SAFE(tv1, tv2, bound, guard) \
369 ({ \
370 	   long long __delta = (tv1) - (tv2); \
371 	   if ( __delta > (long long)(bound)) {  __delta = (bound); guard; } \
372 	   __delta; \
373 })
374 
375 
376 #define PSCHED_TLESS(tv1, tv2) ((tv1) < (tv2))
377 #define PSCHED_TADD2(tv, delta, tv_res) ((tv_res) = (tv) + (delta))
378 #define PSCHED_TADD(tv, delta) ((tv) += (delta))
379 #define PSCHED_SET_PASTPERFECT(t)	((t) = 0)
380 #define PSCHED_IS_PASTPERFECT(t)	((t) == 0)
381 #define	PSCHED_AUDIT_TDIFF(t)
382 
383 #endif
384 
385 struct tcf_police
386 {
387 	struct tcf_police *next;
388 	int		refcnt;
389 	u32		index;
390 
391 	int		action;
392 	int		result;
393 	u32		ewma_rate;
394 	u32		burst;
395 	u32		mtu;
396 
397 	u32		toks;
398 	u32		ptoks;
399 	psched_time_t	t_c;
400 	spinlock_t	lock;
401 	struct qdisc_rate_table *R_tab;
402 	struct qdisc_rate_table *P_tab;
403 
404 	struct tc_stats	stats;
405 };
406 
407 extern int qdisc_copy_stats(struct sk_buff *skb, struct tc_stats *st);
408 extern void tcf_police_destroy(struct tcf_police *p);
409 extern struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est);
410 extern int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p);
411 extern int tcf_police(struct sk_buff *skb, struct tcf_police *p);
412 
tcf_police_release(struct tcf_police * p)413 static inline void tcf_police_release(struct tcf_police *p)
414 {
415 	if (p && --p->refcnt == 0)
416 		tcf_police_destroy(p);
417 }
418 
419 extern struct Qdisc noop_qdisc;
420 extern struct Qdisc_ops noop_qdisc_ops;
421 extern struct Qdisc_ops pfifo_qdisc_ops;
422 extern struct Qdisc_ops bfifo_qdisc_ops;
423 
424 int register_qdisc(struct Qdisc_ops *qops);
425 int unregister_qdisc(struct Qdisc_ops *qops);
426 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle);
427 struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle);
428 void dev_init_scheduler(struct net_device *dev);
429 void dev_shutdown(struct net_device *dev);
430 void dev_activate(struct net_device *dev);
431 void dev_deactivate(struct net_device *dev);
432 void qdisc_reset(struct Qdisc *qdisc);
433 void qdisc_destroy(struct Qdisc *qdisc);
434 struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops);
435 int qdisc_new_estimator(struct tc_stats *stats, struct rtattr *opt);
436 void qdisc_kill_estimator(struct tc_stats *stats);
437 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab);
438 void qdisc_put_rtab(struct qdisc_rate_table *tab);
439 int teql_init(void);
440 int tc_filter_init(void);
441 int pktsched_init(void);
442 
443 extern int qdisc_restart(struct net_device *dev);
444 
qdisc_run(struct net_device * dev)445 static inline void qdisc_run(struct net_device *dev)
446 {
447 	while (!netif_queue_stopped(dev) &&
448 	       qdisc_restart(dev)<0)
449 		/* NOTHING */;
450 }
451 
452 /* Calculate maximal size of packet seen by hard_start_xmit
453    routine of this device.
454  */
psched_mtu(struct net_device * dev)455 static inline unsigned psched_mtu(struct net_device *dev)
456 {
457 	unsigned mtu = dev->mtu;
458 	return dev->hard_header ? mtu + dev->hard_header_len : mtu;
459 }
460 
461 #endif
462