1 #ifndef __NET_PKT_SCHED_H
2 #define __NET_PKT_SCHED_H
3
4 #define PSCHED_GETTIMEOFDAY 1
5 #define PSCHED_JIFFIES 2
6 #define PSCHED_CPU 3
7
8 #define PSCHED_CLOCK_SOURCE PSCHED_JIFFIES
9
10 #include <linux/config.h>
11 #include <linux/types.h>
12 #include <linux/pkt_sched.h>
13 #include <net/pkt_cls.h>
14
15 #ifdef CONFIG_X86_TSC
16 #include <asm/msr.h>
17 #endif
18
19 struct rtattr;
20 struct Qdisc;
21
22 struct qdisc_walker
23 {
24 int stop;
25 int skip;
26 int count;
27 int (*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *);
28 };
29
30 struct Qdisc_class_ops
31 {
32 /* Child qdisc manipulation */
33 int (*graft)(struct Qdisc *, unsigned long cl, struct Qdisc *, struct Qdisc **);
34 struct Qdisc * (*leaf)(struct Qdisc *, unsigned long cl);
35
36 /* Class manipulation routines */
37 unsigned long (*get)(struct Qdisc *, u32 classid);
38 void (*put)(struct Qdisc *, unsigned long);
39 int (*change)(struct Qdisc *, u32, u32, struct rtattr **, unsigned long *);
40 int (*delete)(struct Qdisc *, unsigned long);
41 void (*walk)(struct Qdisc *, struct qdisc_walker * arg);
42
43 /* Filter manipulation */
44 struct tcf_proto ** (*tcf_chain)(struct Qdisc *, unsigned long);
45 unsigned long (*bind_tcf)(struct Qdisc *, unsigned long, u32 classid);
46 void (*unbind_tcf)(struct Qdisc *, unsigned long);
47
48 /* rtnetlink specific */
49 int (*dump)(struct Qdisc *, unsigned long, struct sk_buff *skb, struct tcmsg*);
50 };
51
52 struct Qdisc_ops
53 {
54 struct Qdisc_ops *next;
55 struct Qdisc_class_ops *cl_ops;
56 char id[IFNAMSIZ];
57 int priv_size;
58
59 int (*enqueue)(struct sk_buff *, struct Qdisc *);
60 struct sk_buff * (*dequeue)(struct Qdisc *);
61 int (*requeue)(struct sk_buff *, struct Qdisc *);
62 unsigned int (*drop)(struct Qdisc *);
63
64 int (*init)(struct Qdisc *, struct rtattr *arg);
65 void (*reset)(struct Qdisc *);
66 void (*destroy)(struct Qdisc *);
67 int (*change)(struct Qdisc *, struct rtattr *arg);
68
69 int (*dump)(struct Qdisc *, struct sk_buff *);
70 };
71
72 extern rwlock_t qdisc_tree_lock;
73
74 struct Qdisc
75 {
76 int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev);
77 struct sk_buff * (*dequeue)(struct Qdisc *dev);
78 unsigned flags;
79 #define TCQ_F_BUILTIN 1
80 #define TCQ_F_THROTTLED 2
81 #define TCQ_F_INGRESS 4
82 struct Qdisc_ops *ops;
83 u32 handle;
84 u32 parent;
85 atomic_t refcnt;
86 struct sk_buff_head q;
87 struct net_device *dev;
88 struct list_head list;
89
90 struct tc_stats stats;
91 int (*reshape_fail)(struct sk_buff *skb, struct Qdisc *q);
92
93 /* This field is deprecated, but it is still used by CBQ
94 * and it will live until better solution will be invented.
95 */
96 struct Qdisc *__parent;
97
98 char data[0];
99 };
100
101 struct qdisc_rate_table
102 {
103 struct tc_ratespec rate;
104 u32 data[256];
105 struct qdisc_rate_table *next;
106 int refcnt;
107 };
108
sch_tree_lock(struct Qdisc * q)109 static inline void sch_tree_lock(struct Qdisc *q)
110 {
111 write_lock(&qdisc_tree_lock);
112 spin_lock_bh(&q->dev->queue_lock);
113 }
114
sch_tree_unlock(struct Qdisc * q)115 static inline void sch_tree_unlock(struct Qdisc *q)
116 {
117 spin_unlock_bh(&q->dev->queue_lock);
118 write_unlock(&qdisc_tree_lock);
119 }
120
tcf_tree_lock(struct tcf_proto * tp)121 static inline void tcf_tree_lock(struct tcf_proto *tp)
122 {
123 write_lock(&qdisc_tree_lock);
124 spin_lock_bh(&tp->q->dev->queue_lock);
125 }
126
tcf_tree_unlock(struct tcf_proto * tp)127 static inline void tcf_tree_unlock(struct tcf_proto *tp)
128 {
129 spin_unlock_bh(&tp->q->dev->queue_lock);
130 write_unlock(&qdisc_tree_lock);
131 }
132
133
134 static inline unsigned long
cls_set_class(struct tcf_proto * tp,unsigned long * clp,unsigned long cl)135 cls_set_class(struct tcf_proto *tp, unsigned long *clp, unsigned long cl)
136 {
137 unsigned long old_cl;
138
139 tcf_tree_lock(tp);
140 old_cl = *clp;
141 *clp = cl;
142 tcf_tree_unlock(tp);
143 return old_cl;
144 }
145
146 static inline unsigned long
__cls_set_class(unsigned long * clp,unsigned long cl)147 __cls_set_class(unsigned long *clp, unsigned long cl)
148 {
149 unsigned long old_cl;
150
151 old_cl = *clp;
152 *clp = cl;
153 return old_cl;
154 }
155
156
157 /*
158 Timer resolution MUST BE < 10% of min_schedulable_packet_size/bandwidth
159
160 Normal IP packet size ~ 512byte, hence:
161
162 0.5Kbyte/1Mbyte/sec = 0.5msec, so that we need 50usec timer for
163 10Mbit ethernet.
164
165 10msec resolution -> <50Kbit/sec.
166
167 The result: [34]86 is not good choice for QoS router :-(
168
169 The things are not so bad, because we may use artifical
170 clock evaluated by integration of network data flow
171 in the most critical places.
172
173 Note: we do not use fastgettimeofday.
174 The reason is that, when it is not the same thing as
175 gettimeofday, it returns invalid timestamp, which is
176 not updated, when net_bh is active.
177
178 So, use PSCHED_CLOCK_SOURCE = PSCHED_CPU on alpha and pentiums
179 with rtdsc. And PSCHED_JIFFIES on all other architectures, including [34]86
180 and pentiums without rtdsc.
181 You can use PSCHED_GETTIMEOFDAY on another architectures,
182 which have fast and precise clock source, but it is too expensive.
183 */
184
185 /* General note about internal clock.
186
187 Any clock source returns time intervals, measured in units
188 close to 1usec. With source PSCHED_GETTIMEOFDAY it is precisely
189 microseconds, otherwise something close but different chosen to minimize
190 arithmetic cost. Ratio usec/internal untis in form nominator/denominator
191 may be read from /proc/net/psched.
192 */
193
194
195 #if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY
196
197 typedef struct timeval psched_time_t;
198 typedef long psched_tdiff_t;
199
200 #define PSCHED_GET_TIME(stamp) do_gettimeofday(&(stamp))
201 #define PSCHED_US2JIFFIE(usecs) (((usecs)+(1000000/HZ-1))/(1000000/HZ))
202 #define PSCHED_JIFFIE2US(delay) ((delay)*(1000000/HZ))
203
204 #define PSCHED_EXPORTLIST EXPORT_SYMBOL(psched_tod_diff);
205
206 #else /* PSCHED_CLOCK_SOURCE != PSCHED_GETTIMEOFDAY */
207
208 #define PSCHED_EXPORTLIST PSCHED_EXPORTLIST_1 PSCHED_EXPORTLIST_2
209
210 typedef u64 psched_time_t;
211 typedef long psched_tdiff_t;
212
213 extern psched_time_t psched_time_base;
214
215 #if PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES
216
217 #if HZ < 96
218 #define PSCHED_JSCALE 14
219 #elif HZ >= 96 && HZ < 192
220 #define PSCHED_JSCALE 13
221 #elif HZ >= 192 && HZ < 384
222 #define PSCHED_JSCALE 12
223 #elif HZ >= 384 && HZ < 768
224 #define PSCHED_JSCALE 11
225 #elif HZ >= 768
226 #define PSCHED_JSCALE 10
227 #endif
228
229 #define PSCHED_EXPORTLIST_2
230
231 #if BITS_PER_LONG <= 32
232
233 #define PSCHED_WATCHER unsigned long
234
235 extern PSCHED_WATCHER psched_time_mark;
236
237 #define PSCHED_GET_TIME(stamp) ((stamp) = psched_time_base + (((unsigned long)(jiffies-psched_time_mark))<<PSCHED_JSCALE))
238
239 #define PSCHED_EXPORTLIST_1 EXPORT_SYMBOL(psched_time_base); \
240 EXPORT_SYMBOL(psched_time_mark);
241
242 #else
243
244 #define PSCHED_GET_TIME(stamp) ((stamp) = (jiffies<<PSCHED_JSCALE))
245
246 #define PSCHED_EXPORTLIST_1
247
248 #endif
249
250 #define PSCHED_US2JIFFIE(delay) (((delay)+(1<<PSCHED_JSCALE)-1)>>PSCHED_JSCALE)
251 #define PSCHED_JIFFIE2US(delay) ((delay)<<PSCHED_JSCALE)
252
253 #elif PSCHED_CLOCK_SOURCE == PSCHED_CPU
254
255 extern psched_tdiff_t psched_clock_per_hz;
256 extern int psched_clock_scale;
257
258 #define PSCHED_EXPORTLIST_2 EXPORT_SYMBOL(psched_clock_per_hz); \
259 EXPORT_SYMBOL(psched_clock_scale);
260
261 #define PSCHED_US2JIFFIE(delay) (((delay)+psched_clock_per_hz-1)/psched_clock_per_hz)
262 #define PSCHED_JIFFIE2US(delay) ((delay)*psched_clock_per_hz)
263
264 #ifdef CONFIG_X86_TSC
265
266 #define PSCHED_GET_TIME(stamp) \
267 ({ u64 __cur; \
268 rdtscll(__cur); \
269 (stamp) = __cur>>psched_clock_scale; \
270 })
271
272 #define PSCHED_EXPORTLIST_1
273
274 #elif defined (__alpha__)
275
276 #define PSCHED_WATCHER u32
277
278 extern PSCHED_WATCHER psched_time_mark;
279
280 #define PSCHED_GET_TIME(stamp) \
281 ({ u32 __res; \
282 __asm__ __volatile__ ("rpcc %0" : "r="(__res)); \
283 if (__res <= psched_time_mark) psched_time_base += 0x100000000UL; \
284 psched_time_mark = __res; \
285 (stamp) = (psched_time_base + __res)>>psched_clock_scale; \
286 })
287
288 #define PSCHED_EXPORTLIST_1 EXPORT_SYMBOL(psched_time_base); \
289 EXPORT_SYMBOL(psched_time_mark);
290
291 #else
292
293 #error PSCHED_CLOCK_SOURCE=PSCHED_CPU is not supported on this arch.
294
295 #endif /* ARCH */
296
297 #endif /* PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES */
298
299 #endif /* PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY */
300
301 #if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY
302 #define PSCHED_TDIFF(tv1, tv2) \
303 ({ \
304 int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \
305 int __delta = (tv1).tv_usec - (tv2).tv_usec; \
306 if (__delta_sec) { \
307 switch (__delta_sec) { \
308 default: \
309 __delta = 0; \
310 case 2: \
311 __delta += 1000000; \
312 case 1: \
313 __delta += 1000000; \
314 } \
315 } \
316 __delta; \
317 })
318
319 extern int psched_tod_diff(int delta_sec, int bound);
320
321 #define PSCHED_TDIFF_SAFE(tv1, tv2, bound, guard) \
322 ({ \
323 int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \
324 int __delta = (tv1).tv_usec - (tv2).tv_usec; \
325 switch (__delta_sec) { \
326 default: \
327 __delta = psched_tod_diff(__delta_sec, bound); guard; break; \
328 case 2: \
329 __delta += 1000000; \
330 case 1: \
331 __delta += 1000000; \
332 case 0: ; \
333 } \
334 __delta; \
335 })
336
337 #define PSCHED_TLESS(tv1, tv2) (((tv1).tv_usec < (tv2).tv_usec && \
338 (tv1).tv_sec <= (tv2).tv_sec) || \
339 (tv1).tv_sec < (tv2).tv_sec)
340
341 #define PSCHED_TADD2(tv, delta, tv_res) \
342 ({ \
343 int __delta = (tv).tv_usec + (delta); \
344 (tv_res).tv_sec = (tv).tv_sec; \
345 if (__delta > 1000000) { (tv_res).tv_sec++; __delta -= 1000000; } \
346 (tv_res).tv_usec = __delta; \
347 })
348
349 #define PSCHED_TADD(tv, delta) \
350 ({ \
351 (tv).tv_usec += (delta); \
352 if ((tv).tv_usec > 1000000) { (tv).tv_sec++; \
353 (tv).tv_usec -= 1000000; } \
354 })
355
356 /* Set/check that time is in the "past perfect";
357 it depends on concrete representation of system time
358 */
359
360 #define PSCHED_SET_PASTPERFECT(t) ((t).tv_sec = 0)
361 #define PSCHED_IS_PASTPERFECT(t) ((t).tv_sec == 0)
362
363 #define PSCHED_AUDIT_TDIFF(t) ({ if ((t) > 2000000) (t) = 2000000; })
364
365 #else
366
367 #define PSCHED_TDIFF(tv1, tv2) (long)((tv1) - (tv2))
368 #define PSCHED_TDIFF_SAFE(tv1, tv2, bound, guard) \
369 ({ \
370 long long __delta = (tv1) - (tv2); \
371 if ( __delta > (long long)(bound)) { __delta = (bound); guard; } \
372 __delta; \
373 })
374
375
376 #define PSCHED_TLESS(tv1, tv2) ((tv1) < (tv2))
377 #define PSCHED_TADD2(tv, delta, tv_res) ((tv_res) = (tv) + (delta))
378 #define PSCHED_TADD(tv, delta) ((tv) += (delta))
379 #define PSCHED_SET_PASTPERFECT(t) ((t) = 0)
380 #define PSCHED_IS_PASTPERFECT(t) ((t) == 0)
381 #define PSCHED_AUDIT_TDIFF(t)
382
383 #endif
384
385 struct tcf_police
386 {
387 struct tcf_police *next;
388 int refcnt;
389 u32 index;
390
391 int action;
392 int result;
393 u32 ewma_rate;
394 u32 burst;
395 u32 mtu;
396
397 u32 toks;
398 u32 ptoks;
399 psched_time_t t_c;
400 spinlock_t lock;
401 struct qdisc_rate_table *R_tab;
402 struct qdisc_rate_table *P_tab;
403
404 struct tc_stats stats;
405 };
406
407 extern int qdisc_copy_stats(struct sk_buff *skb, struct tc_stats *st);
408 extern void tcf_police_destroy(struct tcf_police *p);
409 extern struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est);
410 extern int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p);
411 extern int tcf_police(struct sk_buff *skb, struct tcf_police *p);
412
tcf_police_release(struct tcf_police * p)413 static inline void tcf_police_release(struct tcf_police *p)
414 {
415 if (p && --p->refcnt == 0)
416 tcf_police_destroy(p);
417 }
418
419 extern struct Qdisc noop_qdisc;
420 extern struct Qdisc_ops noop_qdisc_ops;
421 extern struct Qdisc_ops pfifo_qdisc_ops;
422 extern struct Qdisc_ops bfifo_qdisc_ops;
423
424 int register_qdisc(struct Qdisc_ops *qops);
425 int unregister_qdisc(struct Qdisc_ops *qops);
426 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle);
427 struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle);
428 void dev_init_scheduler(struct net_device *dev);
429 void dev_shutdown(struct net_device *dev);
430 void dev_activate(struct net_device *dev);
431 void dev_deactivate(struct net_device *dev);
432 void qdisc_reset(struct Qdisc *qdisc);
433 void qdisc_destroy(struct Qdisc *qdisc);
434 struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops);
435 int qdisc_new_estimator(struct tc_stats *stats, struct rtattr *opt);
436 void qdisc_kill_estimator(struct tc_stats *stats);
437 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab);
438 void qdisc_put_rtab(struct qdisc_rate_table *tab);
439 int teql_init(void);
440 int tc_filter_init(void);
441 int pktsched_init(void);
442
443 extern int qdisc_restart(struct net_device *dev);
444
qdisc_run(struct net_device * dev)445 static inline void qdisc_run(struct net_device *dev)
446 {
447 while (!netif_queue_stopped(dev) &&
448 qdisc_restart(dev)<0)
449 /* NOTHING */;
450 }
451
452 /* Calculate maximal size of packet seen by hard_start_xmit
453 routine of this device.
454 */
psched_mtu(struct net_device * dev)455 static inline unsigned psched_mtu(struct net_device *dev)
456 {
457 unsigned mtu = dev->mtu;
458 return dev->hard_header ? mtu + dev->hard_header_len : mtu;
459 }
460
461 #endif
462