1 /*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 */
12
13 #include <linux/config.h>
14 #include <linux/module.h>
15 #include <asm/uaccess.h>
16 #include <asm/system.h>
17 #include <asm/bitops.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
22 #include <linux/mm.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
25 #include <linux/in.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/if_ether.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/etherdevice.h>
32 #include <linux/notifier.h>
33 #include <net/ip.h>
34 #include <net/route.h>
35 #include <linux/skbuff.h>
36 #include <net/sock.h>
37 #include <net/pkt_sched.h>
38
39
40 /* Class-Based Queueing (CBQ) algorithm.
41 =======================================
42
43 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
44 Management Models for Packet Networks",
45 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
46
47 [2] Sally Floyd, "Notes on CBQ and Guaranted Service", 1995
48
49 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
50 Parameters", 1996
51
52 [4] Sally Floyd and Michael Speer, "Experimental Results
53 for Class-Based Queueing", 1998, not published.
54
55 -----------------------------------------------------------------------
56
57 Algorithm skeleton was taken from NS simulator cbq.cc.
58 If someone wants to check this code against the LBL version,
59 he should take into account that ONLY the skeleton was borrowed,
60 the implementation is different. Particularly:
61
62 --- The WRR algorithm is different. Our version looks more
63 reasonable (I hope) and works when quanta are allowed to be
64 less than MTU, which is always the case when real time classes
65 have small rates. Note, that the statement of [3] is
66 incomplete, delay may actually be estimated even if class
67 per-round allotment is less than MTU. Namely, if per-round
68 allotment is W*r_i, and r_1+...+r_k = r < 1
69
70 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
71
72 In the worst case we have IntServ estimate with D = W*r+k*MTU
73 and C = MTU*r. The proof (if correct at all) is trivial.
74
75
76 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
77 interpret some places, which look like wrong translations
78 from NS. Anyone is advised to find these differences
79 and explain to me, why I am wrong 8).
80
81 --- Linux has no EOI event, so that we cannot estimate true class
82 idle time. Workaround is to consider the next dequeue event
83 as sign that previous packet is finished. This is wrong because of
84 internal device queueing, but on a permanently loaded link it is true.
85 Moreover, combined with clock integrator, this scheme looks
86 very close to an ideal solution. */
87
88 struct cbq_sched_data;
89
90
91 struct cbq_class
92 {
93 struct cbq_class *next; /* hash table link */
94 struct cbq_class *next_alive; /* next class with backlog in this priority band */
95
96 /* Parameters */
97 u32 classid;
98 unsigned char priority; /* class priority */
99 unsigned char priority2; /* priority to be used after overlimit */
100 unsigned char ewma_log; /* time constant for idle time calculation */
101 unsigned char ovl_strategy;
102 #ifdef CONFIG_NET_CLS_POLICE
103 unsigned char police;
104 #endif
105
106 u32 defmap;
107
108 /* Link-sharing scheduler parameters */
109 long maxidle; /* Class paramters: see below. */
110 long offtime;
111 long minidle;
112 u32 avpkt;
113 struct qdisc_rate_table *R_tab;
114
115 /* Overlimit strategy parameters */
116 void (*overlimit)(struct cbq_class *cl);
117 long penalty;
118
119 /* General scheduler (WRR) parameters */
120 long allot;
121 long quantum; /* Allotment per WRR round */
122 long weight; /* Relative allotment: see below */
123
124 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
125 struct cbq_class *split; /* Ptr to split node */
126 struct cbq_class *share; /* Ptr to LS parent in the class tree */
127 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
128 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
129 parent otherwise */
130 struct cbq_class *sibling; /* Sibling chain */
131 struct cbq_class *children; /* Pointer to children chain */
132
133 struct Qdisc *q; /* Elementary queueing discipline */
134
135
136 /* Variables */
137 unsigned char cpriority; /* Effective priority */
138 unsigned char delayed;
139 unsigned char level; /* level of the class in hierarchy:
140 0 for leaf classes, and maximal
141 level of children + 1 for nodes.
142 */
143
144 psched_time_t last; /* Last end of service */
145 psched_time_t undertime;
146 long avgidle;
147 long deficit; /* Saved deficit for WRR */
148 unsigned long penalized;
149 struct tc_stats stats;
150 struct tc_cbq_xstats xstats;
151
152 struct tcf_proto *filter_list;
153
154 int refcnt;
155 int filters;
156
157 struct cbq_class *defaults[TC_PRIO_MAX+1];
158 };
159
160 struct cbq_sched_data
161 {
162 struct cbq_class *classes[16]; /* Hash table of all classes */
163 int nclasses[TC_CBQ_MAXPRIO+1];
164 unsigned quanta[TC_CBQ_MAXPRIO+1];
165
166 struct cbq_class link;
167
168 unsigned activemask;
169 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
170 with backlog */
171
172 #ifdef CONFIG_NET_CLS_POLICE
173 struct cbq_class *rx_class;
174 #endif
175 struct cbq_class *tx_class;
176 struct cbq_class *tx_borrowed;
177 int tx_len;
178 psched_time_t now; /* Cached timestamp */
179 psched_time_t now_rt; /* Cached real time */
180 unsigned pmask;
181
182 struct timer_list delay_timer;
183 struct timer_list wd_timer; /* Watchdog timer,
184 started when CBQ has
185 backlog, but cannot
186 transmit just now */
187 long wd_expires;
188 int toplevel;
189 u32 hgenerator;
190 };
191
192
193 #define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
194
195
cbq_hash(u32 h)196 static __inline__ unsigned cbq_hash(u32 h)
197 {
198 h ^= h>>8;
199 h ^= h>>4;
200 return h&0xF;
201 }
202
203 static __inline__ struct cbq_class *
cbq_class_lookup(struct cbq_sched_data * q,u32 classid)204 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
205 {
206 struct cbq_class *cl;
207
208 for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
209 if (cl->classid == classid)
210 return cl;
211 return NULL;
212 }
213
214 #ifdef CONFIG_NET_CLS_POLICE
215
216 static struct cbq_class *
cbq_reclassify(struct sk_buff * skb,struct cbq_class * this)217 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
218 {
219 struct cbq_class *cl, *new;
220
221 for (cl = this->tparent; cl; cl = cl->tparent)
222 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
223 return new;
224
225 return NULL;
226 }
227
228 #endif
229
230 /* Classify packet. The procedure is pretty complicated, but
231 it allows us to combine link sharing and priority scheduling
232 transparently.
233
234 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
235 so that it resolves to split nodes. Then packets are classified
236 by logical priority, or a more specific classifier may be attached
237 to the split node.
238 */
239
240 static struct cbq_class *
cbq_classify(struct sk_buff * skb,struct Qdisc * sch)241 cbq_classify(struct sk_buff *skb, struct Qdisc *sch)
242 {
243 struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
244 struct cbq_class *head = &q->link;
245 struct cbq_class **defmap;
246 struct cbq_class *cl = NULL;
247 u32 prio = skb->priority;
248 struct tcf_result res;
249
250 /*
251 * Step 1. If skb->priority points to one of our classes, use it.
252 */
253 if (TC_H_MAJ(prio^sch->handle) == 0 &&
254 (cl = cbq_class_lookup(q, prio)) != NULL)
255 return cl;
256
257 for (;;) {
258 int result = 0;
259
260 defmap = head->defaults;
261
262 /*
263 * Step 2+n. Apply classifier.
264 */
265 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
266 goto fallback;
267
268 if ((cl = (void*)res.class) == NULL) {
269 if (TC_H_MAJ(res.classid))
270 cl = cbq_class_lookup(q, res.classid);
271 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
272 cl = defmap[TC_PRIO_BESTEFFORT];
273
274 if (cl == NULL || cl->level >= head->level)
275 goto fallback;
276 }
277
278 #ifdef CONFIG_NET_CLS_POLICE
279 switch (result) {
280 case TC_POLICE_RECLASSIFY:
281 return cbq_reclassify(skb, cl);
282 case TC_POLICE_SHOT:
283 return NULL;
284 default:
285 break;
286 }
287 #endif
288 if (cl->level == 0)
289 return cl;
290
291 /*
292 * Step 3+n. If classifier selected a link sharing class,
293 * apply agency specific classifier.
294 * Repeat this procdure until we hit a leaf node.
295 */
296 head = cl;
297 }
298
299 fallback:
300 cl = head;
301
302 /*
303 * Step 4. No success...
304 */
305 if (TC_H_MAJ(prio) == 0 &&
306 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
307 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
308 return head;
309
310 return cl;
311 }
312
313 /*
314 A packet has just been enqueued on the empty class.
315 cbq_activate_class adds it to the tail of active class list
316 of its priority band.
317 */
318
cbq_activate_class(struct cbq_class * cl)319 static __inline__ void cbq_activate_class(struct cbq_class *cl)
320 {
321 struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
322 int prio = cl->cpriority;
323 struct cbq_class *cl_tail;
324
325 cl_tail = q->active[prio];
326 q->active[prio] = cl;
327
328 if (cl_tail != NULL) {
329 cl->next_alive = cl_tail->next_alive;
330 cl_tail->next_alive = cl;
331 } else {
332 cl->next_alive = cl;
333 q->activemask |= (1<<prio);
334 }
335 }
336
337 /*
338 Unlink class from active chain.
339 Note that this same procedure is done directly in cbq_dequeue*
340 during round-robin procedure.
341 */
342
cbq_deactivate_class(struct cbq_class * this)343 static void cbq_deactivate_class(struct cbq_class *this)
344 {
345 struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data;
346 int prio = this->cpriority;
347 struct cbq_class *cl;
348 struct cbq_class *cl_prev = q->active[prio];
349
350 do {
351 cl = cl_prev->next_alive;
352 if (cl == this) {
353 cl_prev->next_alive = cl->next_alive;
354 cl->next_alive = NULL;
355
356 if (cl == q->active[prio]) {
357 q->active[prio] = cl_prev;
358 if (cl == q->active[prio]) {
359 q->active[prio] = NULL;
360 q->activemask &= ~(1<<prio);
361 return;
362 }
363 }
364
365 cl = cl_prev->next_alive;
366 return;
367 }
368 } while ((cl_prev = cl) != q->active[prio]);
369 }
370
371 static void
cbq_mark_toplevel(struct cbq_sched_data * q,struct cbq_class * cl)372 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
373 {
374 int toplevel = q->toplevel;
375
376 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
377 psched_time_t now;
378 psched_tdiff_t incr;
379
380 PSCHED_GET_TIME(now);
381 incr = PSCHED_TDIFF(now, q->now_rt);
382 PSCHED_TADD2(q->now, incr, now);
383
384 do {
385 if (PSCHED_TLESS(cl->undertime, now)) {
386 q->toplevel = cl->level;
387 return;
388 }
389 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
390 }
391 }
392
393 static int
cbq_enqueue(struct sk_buff * skb,struct Qdisc * sch)394 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
395 {
396 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
397 struct cbq_class *cl = cbq_classify(skb, sch);
398 int len = skb->len;
399 int ret = NET_XMIT_POLICED;
400
401 #ifdef CONFIG_NET_CLS_POLICE
402 q->rx_class = cl;
403 #endif
404 if (cl) {
405 #ifdef CONFIG_NET_CLS_POLICE
406 cl->q->__parent = sch;
407 #endif
408 if ((ret = cl->q->enqueue(skb, cl->q)) == 0) {
409 sch->q.qlen++;
410 sch->stats.packets++;
411 sch->stats.bytes+=len;
412 cbq_mark_toplevel(q, cl);
413 if (!cl->next_alive)
414 cbq_activate_class(cl);
415 return 0;
416 }
417 }
418
419 sch->stats.drops++;
420 if (cl == NULL)
421 kfree_skb(skb);
422 else {
423 cbq_mark_toplevel(q, cl);
424 cl->stats.drops++;
425 }
426 return ret;
427 }
428
429 static int
cbq_requeue(struct sk_buff * skb,struct Qdisc * sch)430 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
431 {
432 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
433 struct cbq_class *cl;
434 int ret;
435
436 if ((cl = q->tx_class) == NULL) {
437 kfree_skb(skb);
438 sch->stats.drops++;
439 return NET_XMIT_CN;
440 }
441 q->tx_class = NULL;
442
443 cbq_mark_toplevel(q, cl);
444
445 #ifdef CONFIG_NET_CLS_POLICE
446 q->rx_class = cl;
447 cl->q->__parent = sch;
448 #endif
449 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
450 sch->q.qlen++;
451 if (!cl->next_alive)
452 cbq_activate_class(cl);
453 return 0;
454 }
455 sch->stats.drops++;
456 cl->stats.drops++;
457 return ret;
458 }
459
460 /* Overlimit actions */
461
462 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
463
cbq_ovl_classic(struct cbq_class * cl)464 static void cbq_ovl_classic(struct cbq_class *cl)
465 {
466 struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
467 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
468
469 if (!cl->delayed) {
470 delay += cl->offtime;
471
472 /*
473 Class goes to sleep, so that it will have no
474 chance to work avgidle. Let's forgive it 8)
475
476 BTW cbq-2.0 has a crap in this
477 place, apparently they forgot to shift it by cl->ewma_log.
478 */
479 if (cl->avgidle < 0)
480 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
481 if (cl->avgidle < cl->minidle)
482 cl->avgidle = cl->minidle;
483 if (delay <= 0)
484 delay = 1;
485 PSCHED_TADD2(q->now, delay, cl->undertime);
486
487 cl->xstats.overactions++;
488 cl->delayed = 1;
489 }
490 if (q->wd_expires == 0 || q->wd_expires > delay)
491 q->wd_expires = delay;
492
493 /* Dirty work! We must schedule wakeups based on
494 real available rate, rather than leaf rate,
495 which may be tiny (even zero).
496 */
497 if (q->toplevel == TC_CBQ_MAXLEVEL) {
498 struct cbq_class *b;
499 psched_tdiff_t base_delay = q->wd_expires;
500
501 for (b = cl->borrow; b; b = b->borrow) {
502 delay = PSCHED_TDIFF(b->undertime, q->now);
503 if (delay < base_delay) {
504 if (delay <= 0)
505 delay = 1;
506 base_delay = delay;
507 }
508 }
509
510 q->wd_expires = base_delay;
511 }
512 }
513
514 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
515 they go overlimit
516 */
517
cbq_ovl_rclassic(struct cbq_class * cl)518 static void cbq_ovl_rclassic(struct cbq_class *cl)
519 {
520 struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
521 struct cbq_class *this = cl;
522
523 do {
524 if (cl->level > q->toplevel) {
525 cl = NULL;
526 break;
527 }
528 } while ((cl = cl->borrow) != NULL);
529
530 if (cl == NULL)
531 cl = this;
532 cbq_ovl_classic(cl);
533 }
534
535 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
536
cbq_ovl_delay(struct cbq_class * cl)537 static void cbq_ovl_delay(struct cbq_class *cl)
538 {
539 struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
540 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
541
542 if (!cl->delayed) {
543 unsigned long sched = jiffies;
544
545 delay += cl->offtime;
546 if (cl->avgidle < 0)
547 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
548 if (cl->avgidle < cl->minidle)
549 cl->avgidle = cl->minidle;
550 PSCHED_TADD2(q->now, delay, cl->undertime);
551
552 if (delay > 0) {
553 sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
554 cl->penalized = sched;
555 cl->cpriority = TC_CBQ_MAXPRIO;
556 q->pmask |= (1<<TC_CBQ_MAXPRIO);
557 if (del_timer(&q->delay_timer) &&
558 (long)(q->delay_timer.expires - sched) > 0)
559 q->delay_timer.expires = sched;
560 add_timer(&q->delay_timer);
561 cl->delayed = 1;
562 cl->xstats.overactions++;
563 return;
564 }
565 delay = 1;
566 }
567 if (q->wd_expires == 0 || q->wd_expires > delay)
568 q->wd_expires = delay;
569 }
570
571 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
572
cbq_ovl_lowprio(struct cbq_class * cl)573 static void cbq_ovl_lowprio(struct cbq_class *cl)
574 {
575 struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
576
577 cl->penalized = jiffies + cl->penalty;
578
579 if (cl->cpriority != cl->priority2) {
580 cl->cpriority = cl->priority2;
581 q->pmask |= (1<<cl->cpriority);
582 cl->xstats.overactions++;
583 }
584 cbq_ovl_classic(cl);
585 }
586
587 /* TC_CBQ_OVL_DROP: penalize class by dropping */
588
cbq_ovl_drop(struct cbq_class * cl)589 static void cbq_ovl_drop(struct cbq_class *cl)
590 {
591 if (cl->q->ops->drop)
592 if (cl->q->ops->drop(cl->q))
593 cl->qdisc->q.qlen--;
594 cl->xstats.overactions++;
595 cbq_ovl_classic(cl);
596 }
597
cbq_watchdog(unsigned long arg)598 static void cbq_watchdog(unsigned long arg)
599 {
600 struct Qdisc *sch = (struct Qdisc*)arg;
601
602 sch->flags &= ~TCQ_F_THROTTLED;
603 netif_schedule(sch->dev);
604 }
605
cbq_undelay_prio(struct cbq_sched_data * q,int prio)606 static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
607 {
608 struct cbq_class *cl;
609 struct cbq_class *cl_prev = q->active[prio];
610 unsigned long now = jiffies;
611 unsigned long sched = now;
612
613 if (cl_prev == NULL)
614 return now;
615
616 do {
617 cl = cl_prev->next_alive;
618 if ((long)(now - cl->penalized) > 0) {
619 cl_prev->next_alive = cl->next_alive;
620 cl->next_alive = NULL;
621 cl->cpriority = cl->priority;
622 cl->delayed = 0;
623 cbq_activate_class(cl);
624
625 if (cl == q->active[prio]) {
626 q->active[prio] = cl_prev;
627 if (cl == q->active[prio]) {
628 q->active[prio] = NULL;
629 return 0;
630 }
631 }
632
633 cl = cl_prev->next_alive;
634 } else if ((long)(sched - cl->penalized) > 0)
635 sched = cl->penalized;
636 } while ((cl_prev = cl) != q->active[prio]);
637
638 return (long)(sched - now);
639 }
640
cbq_undelay(unsigned long arg)641 static void cbq_undelay(unsigned long arg)
642 {
643 struct Qdisc *sch = (struct Qdisc*)arg;
644 struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
645 long delay = 0;
646 unsigned pmask;
647
648 pmask = q->pmask;
649 q->pmask = 0;
650
651 while (pmask) {
652 int prio = ffz(~pmask);
653 long tmp;
654
655 pmask &= ~(1<<prio);
656
657 tmp = cbq_undelay_prio(q, prio);
658 if (tmp > 0) {
659 q->pmask |= 1<<prio;
660 if (tmp < delay || delay == 0)
661 delay = tmp;
662 }
663 }
664
665 if (delay) {
666 q->delay_timer.expires = jiffies + delay;
667 add_timer(&q->delay_timer);
668 }
669
670 sch->flags &= ~TCQ_F_THROTTLED;
671 netif_schedule(sch->dev);
672 }
673
674
675 #ifdef CONFIG_NET_CLS_POLICE
676
cbq_reshape_fail(struct sk_buff * skb,struct Qdisc * child)677 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
678 {
679 int len = skb->len;
680 struct Qdisc *sch = child->__parent;
681 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
682 struct cbq_class *cl = q->rx_class;
683
684 q->rx_class = NULL;
685
686 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
687
688 cbq_mark_toplevel(q, cl);
689
690 q->rx_class = cl;
691 cl->q->__parent = sch;
692
693 if (cl->q->enqueue(skb, cl->q) == 0) {
694 sch->q.qlen++;
695 sch->stats.packets++;
696 sch->stats.bytes+=len;
697 if (!cl->next_alive)
698 cbq_activate_class(cl);
699 return 0;
700 }
701 sch->stats.drops++;
702 return 0;
703 }
704
705 sch->stats.drops++;
706 return -1;
707 }
708 #endif
709
710 /*
711 It is mission critical procedure.
712
713 We "regenerate" toplevel cutoff, if transmitting class
714 has backlog and it is not regulated. It is not part of
715 original CBQ description, but looks more reasonable.
716 Probably, it is wrong. This question needs further investigation.
717 */
718
719 static __inline__ void
cbq_update_toplevel(struct cbq_sched_data * q,struct cbq_class * cl,struct cbq_class * borrowed)720 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
721 struct cbq_class *borrowed)
722 {
723 if (cl && q->toplevel >= borrowed->level) {
724 if (cl->q->q.qlen > 1) {
725 do {
726 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
727 q->toplevel = borrowed->level;
728 return;
729 }
730 } while ((borrowed=borrowed->borrow) != NULL);
731 }
732 #if 0
733 /* It is not necessary now. Uncommenting it
734 will save CPU cycles, but decrease fairness.
735 */
736 q->toplevel = TC_CBQ_MAXLEVEL;
737 #endif
738 }
739 }
740
741 static void
cbq_update(struct cbq_sched_data * q)742 cbq_update(struct cbq_sched_data *q)
743 {
744 struct cbq_class *this = q->tx_class;
745 struct cbq_class *cl = this;
746 int len = q->tx_len;
747
748 q->tx_class = NULL;
749
750 for ( ; cl; cl = cl->share) {
751 long avgidle = cl->avgidle;
752 long idle;
753
754 cl->stats.packets++;
755 cl->stats.bytes += len;
756
757 /*
758 (now - last) is total time between packet right edges.
759 (last_pktlen/rate) is "virtual" busy time, so that
760
761 idle = (now - last) - last_pktlen/rate
762 */
763
764 idle = PSCHED_TDIFF(q->now, cl->last);
765 if ((unsigned long)idle > 128*1024*1024) {
766 avgidle = cl->maxidle;
767 } else {
768 idle -= L2T(cl, len);
769
770 /* true_avgidle := (1-W)*true_avgidle + W*idle,
771 where W=2^{-ewma_log}. But cl->avgidle is scaled:
772 cl->avgidle == true_avgidle/W,
773 hence:
774 */
775 avgidle += idle - (avgidle>>cl->ewma_log);
776 }
777
778 if (avgidle <= 0) {
779 /* Overlimit or at-limit */
780
781 if (avgidle < cl->minidle)
782 avgidle = cl->minidle;
783
784 cl->avgidle = avgidle;
785
786 /* Calculate expected time, when this class
787 will be allowed to send.
788 It will occur, when:
789 (1-W)*true_avgidle + W*delay = 0, i.e.
790 idle = (1/W - 1)*(-true_avgidle)
791 or
792 idle = (1 - W)*(-cl->avgidle);
793 */
794 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
795
796 /*
797 That is not all.
798 To maintain the rate allocated to the class,
799 we add to undertime virtual clock,
800 necesary to complete transmitted packet.
801 (len/phys_bandwidth has been already passed
802 to the moment of cbq_update)
803 */
804
805 idle -= L2T(&q->link, len);
806 idle += L2T(cl, len);
807
808 PSCHED_AUDIT_TDIFF(idle);
809
810 PSCHED_TADD2(q->now, idle, cl->undertime);
811 } else {
812 /* Underlimit */
813
814 PSCHED_SET_PASTPERFECT(cl->undertime);
815 if (avgidle > cl->maxidle)
816 cl->avgidle = cl->maxidle;
817 else
818 cl->avgidle = avgidle;
819 }
820 cl->last = q->now;
821 }
822
823 cbq_update_toplevel(q, this, q->tx_borrowed);
824 }
825
826 static __inline__ struct cbq_class *
cbq_under_limit(struct cbq_class * cl)827 cbq_under_limit(struct cbq_class *cl)
828 {
829 struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
830 struct cbq_class *this_cl = cl;
831
832 if (cl->tparent == NULL)
833 return cl;
834
835 if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
836 !PSCHED_TLESS(q->now, cl->undertime)) {
837 cl->delayed = 0;
838 return cl;
839 }
840
841 do {
842 /* It is very suspicious place. Now overlimit
843 action is generated for not bounded classes
844 only if link is completely congested.
845 Though it is in agree with ancestor-only paradigm,
846 it looks very stupid. Particularly,
847 it means that this chunk of code will either
848 never be called or result in strong amplification
849 of burstiness. Dangerous, silly, and, however,
850 no another solution exists.
851 */
852 if ((cl = cl->borrow) == NULL) {
853 this_cl->stats.overlimits++;
854 this_cl->overlimit(this_cl);
855 return NULL;
856 }
857 if (cl->level > q->toplevel)
858 return NULL;
859 } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
860 PSCHED_TLESS(q->now, cl->undertime));
861
862 cl->delayed = 0;
863 return cl;
864 }
865
866 static __inline__ struct sk_buff *
cbq_dequeue_prio(struct Qdisc * sch,int prio)867 cbq_dequeue_prio(struct Qdisc *sch, int prio)
868 {
869 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
870 struct cbq_class *cl_tail, *cl_prev, *cl;
871 struct sk_buff *skb;
872 int deficit;
873
874 cl_tail = cl_prev = q->active[prio];
875 cl = cl_prev->next_alive;
876
877 do {
878 deficit = 0;
879
880 /* Start round */
881 do {
882 struct cbq_class *borrow = cl;
883
884 if (cl->q->q.qlen &&
885 (borrow = cbq_under_limit(cl)) == NULL)
886 goto skip_class;
887
888 if (cl->deficit <= 0) {
889 /* Class exhausted its allotment per
890 this round. Switch to the next one.
891 */
892 deficit = 1;
893 cl->deficit += cl->quantum;
894 goto next_class;
895 }
896
897 skb = cl->q->dequeue(cl->q);
898
899 /* Class did not give us any skb :-(
900 It could occur even if cl->q->q.qlen != 0
901 f.e. if cl->q == "tbf"
902 */
903 if (skb == NULL)
904 goto skip_class;
905
906 cl->deficit -= skb->len;
907 q->tx_class = cl;
908 q->tx_borrowed = borrow;
909 if (borrow != cl) {
910 #ifndef CBQ_XSTATS_BORROWS_BYTES
911 borrow->xstats.borrows++;
912 cl->xstats.borrows++;
913 #else
914 borrow->xstats.borrows += skb->len;
915 cl->xstats.borrows += skb->len;
916 #endif
917 }
918 q->tx_len = skb->len;
919
920 if (cl->deficit <= 0) {
921 q->active[prio] = cl;
922 cl = cl->next_alive;
923 cl->deficit += cl->quantum;
924 }
925 return skb;
926
927 skip_class:
928 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
929 /* Class is empty or penalized.
930 Unlink it from active chain.
931 */
932 cl_prev->next_alive = cl->next_alive;
933 cl->next_alive = NULL;
934
935 /* Did cl_tail point to it? */
936 if (cl == cl_tail) {
937 /* Repair it! */
938 cl_tail = cl_prev;
939
940 /* Was it the last class in this band? */
941 if (cl == cl_tail) {
942 /* Kill the band! */
943 q->active[prio] = NULL;
944 q->activemask &= ~(1<<prio);
945 if (cl->q->q.qlen)
946 cbq_activate_class(cl);
947 return NULL;
948 }
949
950 q->active[prio] = cl_tail;
951 }
952 if (cl->q->q.qlen)
953 cbq_activate_class(cl);
954
955 cl = cl_prev;
956 }
957
958 next_class:
959 cl_prev = cl;
960 cl = cl->next_alive;
961 } while (cl_prev != cl_tail);
962 } while (deficit);
963
964 q->active[prio] = cl_prev;
965
966 return NULL;
967 }
968
969 static __inline__ struct sk_buff *
cbq_dequeue_1(struct Qdisc * sch)970 cbq_dequeue_1(struct Qdisc *sch)
971 {
972 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
973 struct sk_buff *skb;
974 unsigned activemask;
975
976 activemask = q->activemask&0xFF;
977 while (activemask) {
978 int prio = ffz(~activemask);
979 activemask &= ~(1<<prio);
980 skb = cbq_dequeue_prio(sch, prio);
981 if (skb)
982 return skb;
983 }
984 return NULL;
985 }
986
987 static struct sk_buff *
cbq_dequeue(struct Qdisc * sch)988 cbq_dequeue(struct Qdisc *sch)
989 {
990 struct sk_buff *skb;
991 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
992 psched_time_t now;
993 psched_tdiff_t incr;
994
995 PSCHED_GET_TIME(now);
996 incr = PSCHED_TDIFF(now, q->now_rt);
997
998 if (q->tx_class) {
999 psched_tdiff_t incr2;
1000 /* Time integrator. We calculate EOS time
1001 by adding expected packet transmittion time.
1002 If real time is greater, we warp artificial clock,
1003 so that:
1004
1005 cbq_time = max(real_time, work);
1006 */
1007 incr2 = L2T(&q->link, q->tx_len);
1008 PSCHED_TADD(q->now, incr2);
1009 cbq_update(q);
1010 if ((incr -= incr2) < 0)
1011 incr = 0;
1012 }
1013 PSCHED_TADD(q->now, incr);
1014 q->now_rt = now;
1015
1016 for (;;) {
1017 q->wd_expires = 0;
1018
1019 skb = cbq_dequeue_1(sch);
1020 if (skb) {
1021 sch->q.qlen--;
1022 sch->flags &= ~TCQ_F_THROTTLED;
1023 return skb;
1024 }
1025
1026 /* All the classes are overlimit.
1027
1028 It is possible, if:
1029
1030 1. Scheduler is empty.
1031 2. Toplevel cutoff inhibited borrowing.
1032 3. Root class is overlimit.
1033
1034 Reset 2d and 3d conditions and retry.
1035
1036 Note, that NS and cbq-2.0 are buggy, peeking
1037 an arbitrary class is appropriate for ancestor-only
1038 sharing, but not for toplevel algorithm.
1039
1040 Our version is better, but slower, because it requires
1041 two passes, but it is unavoidable with top-level sharing.
1042 */
1043
1044 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1045 PSCHED_IS_PASTPERFECT(q->link.undertime))
1046 break;
1047
1048 q->toplevel = TC_CBQ_MAXLEVEL;
1049 PSCHED_SET_PASTPERFECT(q->link.undertime);
1050 }
1051
1052 /* No packets in scheduler or nobody wants to give them to us :-(
1053 Sigh... start watchdog timer in the last case. */
1054
1055 if (sch->q.qlen) {
1056 sch->stats.overlimits++;
1057 if (q->wd_expires) {
1058 long delay = PSCHED_US2JIFFIE(q->wd_expires);
1059 if (delay <= 0)
1060 delay = 1;
1061 mod_timer(&q->wd_timer, jiffies + delay);
1062 sch->flags |= TCQ_F_THROTTLED;
1063 }
1064 }
1065 return NULL;
1066 }
1067
1068 /* CBQ class maintanance routines */
1069
cbq_adjust_levels(struct cbq_class * this)1070 static void cbq_adjust_levels(struct cbq_class *this)
1071 {
1072 if (this == NULL)
1073 return;
1074
1075 do {
1076 int level = 0;
1077 struct cbq_class *cl;
1078
1079 if ((cl = this->children) != NULL) {
1080 do {
1081 if (cl->level > level)
1082 level = cl->level;
1083 } while ((cl = cl->sibling) != this->children);
1084 }
1085 this->level = level+1;
1086 } while ((this = this->tparent) != NULL);
1087 }
1088
cbq_normalize_quanta(struct cbq_sched_data * q,int prio)1089 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1090 {
1091 struct cbq_class *cl;
1092 unsigned h;
1093
1094 if (q->quanta[prio] == 0)
1095 return;
1096
1097 for (h=0; h<16; h++) {
1098 for (cl = q->classes[h]; cl; cl = cl->next) {
1099 /* BUGGGG... Beware! This expression suffer of
1100 arithmetic overflows!
1101 */
1102 if (cl->priority == prio) {
1103 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1104 q->quanta[prio];
1105 }
1106 if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1107 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1108 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1109 }
1110 }
1111 }
1112 }
1113
cbq_sync_defmap(struct cbq_class * cl)1114 static void cbq_sync_defmap(struct cbq_class *cl)
1115 {
1116 struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
1117 struct cbq_class *split = cl->split;
1118 unsigned h;
1119 int i;
1120
1121 if (split == NULL)
1122 return;
1123
1124 for (i=0; i<=TC_PRIO_MAX; i++) {
1125 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1126 split->defaults[i] = NULL;
1127 }
1128
1129 for (i=0; i<=TC_PRIO_MAX; i++) {
1130 int level = split->level;
1131
1132 if (split->defaults[i])
1133 continue;
1134
1135 for (h=0; h<16; h++) {
1136 struct cbq_class *c;
1137
1138 for (c = q->classes[h]; c; c = c->next) {
1139 if (c->split == split && c->level < level &&
1140 c->defmap&(1<<i)) {
1141 split->defaults[i] = c;
1142 level = c->level;
1143 }
1144 }
1145 }
1146 }
1147 }
1148
cbq_change_defmap(struct cbq_class * cl,u32 splitid,u32 def,u32 mask)1149 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1150 {
1151 struct cbq_class *split = NULL;
1152
1153 if (splitid == 0) {
1154 if ((split = cl->split) == NULL)
1155 return;
1156 splitid = split->classid;
1157 }
1158
1159 if (split == NULL || split->classid != splitid) {
1160 for (split = cl->tparent; split; split = split->tparent)
1161 if (split->classid == splitid)
1162 break;
1163 }
1164
1165 if (split == NULL)
1166 return;
1167
1168 if (cl->split != split) {
1169 cl->defmap = 0;
1170 cbq_sync_defmap(cl);
1171 cl->split = split;
1172 cl->defmap = def&mask;
1173 } else
1174 cl->defmap = (cl->defmap&~mask)|(def&mask);
1175
1176 cbq_sync_defmap(cl);
1177 }
1178
cbq_unlink_class(struct cbq_class * this)1179 static void cbq_unlink_class(struct cbq_class *this)
1180 {
1181 struct cbq_class *cl, **clp;
1182 struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data;
1183
1184 for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1185 if (cl == this) {
1186 *clp = cl->next;
1187 cl->next = NULL;
1188 break;
1189 }
1190 }
1191
1192 if (this->tparent) {
1193 clp=&this->sibling;
1194 cl = *clp;
1195 do {
1196 if (cl == this) {
1197 *clp = cl->sibling;
1198 break;
1199 }
1200 clp = &cl->sibling;
1201 } while ((cl = *clp) != this->sibling);
1202
1203 if (this->tparent->children == this) {
1204 this->tparent->children = this->sibling;
1205 if (this->sibling == this)
1206 this->tparent->children = NULL;
1207 }
1208 } else {
1209 BUG_TRAP(this->sibling == this);
1210 }
1211 }
1212
cbq_link_class(struct cbq_class * this)1213 static void cbq_link_class(struct cbq_class *this)
1214 {
1215 struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data;
1216 unsigned h = cbq_hash(this->classid);
1217 struct cbq_class *parent = this->tparent;
1218
1219 this->sibling = this;
1220 this->next = q->classes[h];
1221 q->classes[h] = this;
1222
1223 if (parent == NULL)
1224 return;
1225
1226 if (parent->children == NULL) {
1227 parent->children = this;
1228 } else {
1229 this->sibling = parent->children->sibling;
1230 parent->children->sibling = this;
1231 }
1232 }
1233
cbq_drop(struct Qdisc * sch)1234 static unsigned int cbq_drop(struct Qdisc* sch)
1235 {
1236 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1237 struct cbq_class *cl, *cl_head;
1238 int prio;
1239 unsigned int len;
1240
1241 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1242 if ((cl_head = q->active[prio]) == NULL)
1243 continue;
1244
1245 cl = cl_head;
1246 do {
1247 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1248 sch->q.qlen--;
1249 return len;
1250 }
1251 } while ((cl = cl->next_alive) != cl_head);
1252 }
1253 return 0;
1254 }
1255
1256 static void
cbq_reset(struct Qdisc * sch)1257 cbq_reset(struct Qdisc* sch)
1258 {
1259 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1260 struct cbq_class *cl;
1261 int prio;
1262 unsigned h;
1263
1264 q->activemask = 0;
1265 q->pmask = 0;
1266 q->tx_class = NULL;
1267 q->tx_borrowed = NULL;
1268 del_timer(&q->wd_timer);
1269 del_timer(&q->delay_timer);
1270 q->toplevel = TC_CBQ_MAXLEVEL;
1271 PSCHED_GET_TIME(q->now);
1272 q->now_rt = q->now;
1273
1274 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1275 q->active[prio] = NULL;
1276
1277 for (h = 0; h < 16; h++) {
1278 for (cl = q->classes[h]; cl; cl = cl->next) {
1279 qdisc_reset(cl->q);
1280
1281 cl->next_alive = NULL;
1282 PSCHED_SET_PASTPERFECT(cl->undertime);
1283 cl->avgidle = cl->maxidle;
1284 cl->deficit = cl->quantum;
1285 cl->cpriority = cl->priority;
1286 }
1287 }
1288 sch->q.qlen = 0;
1289 }
1290
1291
cbq_set_lss(struct cbq_class * cl,struct tc_cbq_lssopt * lss)1292 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1293 {
1294 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1295 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1296 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1297 }
1298 if (lss->change&TCF_CBQ_LSS_EWMA)
1299 cl->ewma_log = lss->ewma_log;
1300 if (lss->change&TCF_CBQ_LSS_AVPKT)
1301 cl->avpkt = lss->avpkt;
1302 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1303 cl->minidle = -(long)lss->minidle;
1304 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1305 cl->maxidle = lss->maxidle;
1306 cl->avgidle = lss->maxidle;
1307 }
1308 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1309 cl->offtime = lss->offtime;
1310 return 0;
1311 }
1312
cbq_rmprio(struct cbq_sched_data * q,struct cbq_class * cl)1313 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1314 {
1315 q->nclasses[cl->priority]--;
1316 q->quanta[cl->priority] -= cl->weight;
1317 cbq_normalize_quanta(q, cl->priority);
1318 }
1319
cbq_addprio(struct cbq_sched_data * q,struct cbq_class * cl)1320 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1321 {
1322 q->nclasses[cl->priority]++;
1323 q->quanta[cl->priority] += cl->weight;
1324 cbq_normalize_quanta(q, cl->priority);
1325 }
1326
cbq_set_wrr(struct cbq_class * cl,struct tc_cbq_wrropt * wrr)1327 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1328 {
1329 struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
1330
1331 if (wrr->allot)
1332 cl->allot = wrr->allot;
1333 if (wrr->weight)
1334 cl->weight = wrr->weight;
1335 if (wrr->priority) {
1336 cl->priority = wrr->priority-1;
1337 cl->cpriority = cl->priority;
1338 if (cl->priority >= cl->priority2)
1339 cl->priority2 = TC_CBQ_MAXPRIO-1;
1340 }
1341
1342 cbq_addprio(q, cl);
1343 return 0;
1344 }
1345
cbq_set_overlimit(struct cbq_class * cl,struct tc_cbq_ovl * ovl)1346 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1347 {
1348 switch (ovl->strategy) {
1349 case TC_CBQ_OVL_CLASSIC:
1350 cl->overlimit = cbq_ovl_classic;
1351 break;
1352 case TC_CBQ_OVL_DELAY:
1353 cl->overlimit = cbq_ovl_delay;
1354 break;
1355 case TC_CBQ_OVL_LOWPRIO:
1356 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1357 ovl->priority2-1 <= cl->priority)
1358 return -EINVAL;
1359 cl->priority2 = ovl->priority2-1;
1360 cl->overlimit = cbq_ovl_lowprio;
1361 break;
1362 case TC_CBQ_OVL_DROP:
1363 cl->overlimit = cbq_ovl_drop;
1364 break;
1365 case TC_CBQ_OVL_RCLASSIC:
1366 cl->overlimit = cbq_ovl_rclassic;
1367 break;
1368 default:
1369 return -EINVAL;
1370 }
1371 cl->penalty = (ovl->penalty*HZ)/1000;
1372 return 0;
1373 }
1374
1375 #ifdef CONFIG_NET_CLS_POLICE
cbq_set_police(struct cbq_class * cl,struct tc_cbq_police * p)1376 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1377 {
1378 cl->police = p->police;
1379
1380 if (cl->q->handle) {
1381 if (p->police == TC_POLICE_RECLASSIFY)
1382 cl->q->reshape_fail = cbq_reshape_fail;
1383 else
1384 cl->q->reshape_fail = NULL;
1385 }
1386 return 0;
1387 }
1388 #endif
1389
cbq_set_fopt(struct cbq_class * cl,struct tc_cbq_fopt * fopt)1390 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1391 {
1392 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1393 return 0;
1394 }
1395
cbq_init(struct Qdisc * sch,struct rtattr * opt)1396 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1397 {
1398 struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
1399 struct rtattr *tb[TCA_CBQ_MAX];
1400 struct tc_ratespec *r;
1401
1402 if (rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0 ||
1403 tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1404 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1405 return -EINVAL;
1406
1407 if (tb[TCA_CBQ_LSSOPT-1] &&
1408 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1409 return -EINVAL;
1410
1411 r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1412
1413 MOD_INC_USE_COUNT;
1414 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL) {
1415 MOD_DEC_USE_COUNT;
1416 return -EINVAL;
1417 }
1418
1419 q->link.refcnt = 1;
1420 q->link.sibling = &q->link;
1421 q->link.classid = sch->handle;
1422 q->link.qdisc = sch;
1423 if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1424 q->link.q = &noop_qdisc;
1425
1426 q->link.priority = TC_CBQ_MAXPRIO-1;
1427 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1428 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1429 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1430 q->link.overlimit = cbq_ovl_classic;
1431 q->link.allot = psched_mtu(sch->dev);
1432 q->link.quantum = q->link.allot;
1433 q->link.weight = q->link.R_tab->rate.rate;
1434
1435 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1436 q->link.avpkt = q->link.allot/2;
1437 q->link.minidle = -0x7FFFFFFF;
1438 q->link.stats.lock = &sch->dev->queue_lock;
1439
1440 init_timer(&q->wd_timer);
1441 q->wd_timer.data = (unsigned long)sch;
1442 q->wd_timer.function = cbq_watchdog;
1443 init_timer(&q->delay_timer);
1444 q->delay_timer.data = (unsigned long)sch;
1445 q->delay_timer.function = cbq_undelay;
1446 q->toplevel = TC_CBQ_MAXLEVEL;
1447 PSCHED_GET_TIME(q->now);
1448 q->now_rt = q->now;
1449
1450 cbq_link_class(&q->link);
1451
1452 if (tb[TCA_CBQ_LSSOPT-1])
1453 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1454
1455 cbq_addprio(q, &q->link);
1456 return 0;
1457 }
1458
cbq_dump_rate(struct sk_buff * skb,struct cbq_class * cl)1459 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1460 {
1461 unsigned char *b = skb->tail;
1462
1463 RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1464 return skb->len;
1465
1466 rtattr_failure:
1467 skb_trim(skb, b - skb->data);
1468 return -1;
1469 }
1470
cbq_dump_lss(struct sk_buff * skb,struct cbq_class * cl)1471 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1472 {
1473 unsigned char *b = skb->tail;
1474 struct tc_cbq_lssopt opt;
1475
1476 opt.flags = 0;
1477 if (cl->borrow == NULL)
1478 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1479 if (cl->share == NULL)
1480 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1481 opt.ewma_log = cl->ewma_log;
1482 opt.level = cl->level;
1483 opt.avpkt = cl->avpkt;
1484 opt.maxidle = cl->maxidle;
1485 opt.minidle = (u32)(-cl->minidle);
1486 opt.offtime = cl->offtime;
1487 opt.change = ~0;
1488 RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1489 return skb->len;
1490
1491 rtattr_failure:
1492 skb_trim(skb, b - skb->data);
1493 return -1;
1494 }
1495
cbq_dump_wrr(struct sk_buff * skb,struct cbq_class * cl)1496 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1497 {
1498 unsigned char *b = skb->tail;
1499 struct tc_cbq_wrropt opt;
1500
1501 opt.flags = 0;
1502 opt.allot = cl->allot;
1503 opt.priority = cl->priority+1;
1504 opt.cpriority = cl->cpriority+1;
1505 opt.weight = cl->weight;
1506 RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1507 return skb->len;
1508
1509 rtattr_failure:
1510 skb_trim(skb, b - skb->data);
1511 return -1;
1512 }
1513
cbq_dump_ovl(struct sk_buff * skb,struct cbq_class * cl)1514 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1515 {
1516 unsigned char *b = skb->tail;
1517 struct tc_cbq_ovl opt;
1518
1519 opt.strategy = cl->ovl_strategy;
1520 opt.priority2 = cl->priority2+1;
1521 opt.pad = 0;
1522 opt.penalty = (cl->penalty*1000)/HZ;
1523 RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1524 return skb->len;
1525
1526 rtattr_failure:
1527 skb_trim(skb, b - skb->data);
1528 return -1;
1529 }
1530
cbq_dump_fopt(struct sk_buff * skb,struct cbq_class * cl)1531 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1532 {
1533 unsigned char *b = skb->tail;
1534 struct tc_cbq_fopt opt;
1535
1536 if (cl->split || cl->defmap) {
1537 opt.split = cl->split ? cl->split->classid : 0;
1538 opt.defmap = cl->defmap;
1539 opt.defchange = ~0;
1540 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1541 }
1542 return skb->len;
1543
1544 rtattr_failure:
1545 skb_trim(skb, b - skb->data);
1546 return -1;
1547 }
1548
1549 #ifdef CONFIG_NET_CLS_POLICE
cbq_dump_police(struct sk_buff * skb,struct cbq_class * cl)1550 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1551 {
1552 unsigned char *b = skb->tail;
1553 struct tc_cbq_police opt;
1554
1555 if (cl->police) {
1556 opt.police = cl->police;
1557 opt.__res1 = 0;
1558 opt.__res2 = 0;
1559 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1560 }
1561 return skb->len;
1562
1563 rtattr_failure:
1564 skb_trim(skb, b - skb->data);
1565 return -1;
1566 }
1567 #endif
1568
cbq_dump_attr(struct sk_buff * skb,struct cbq_class * cl)1569 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1570 {
1571 if (cbq_dump_lss(skb, cl) < 0 ||
1572 cbq_dump_rate(skb, cl) < 0 ||
1573 cbq_dump_wrr(skb, cl) < 0 ||
1574 cbq_dump_ovl(skb, cl) < 0 ||
1575 #ifdef CONFIG_NET_CLS_POLICE
1576 cbq_dump_police(skb, cl) < 0 ||
1577 #endif
1578 cbq_dump_fopt(skb, cl) < 0)
1579 return -1;
1580 return 0;
1581 }
1582
cbq_copy_xstats(struct sk_buff * skb,struct tc_cbq_xstats * st)1583 int cbq_copy_xstats(struct sk_buff *skb, struct tc_cbq_xstats *st)
1584 {
1585 RTA_PUT(skb, TCA_XSTATS, sizeof(*st), st);
1586 return 0;
1587
1588 rtattr_failure:
1589 return -1;
1590 }
1591
1592
cbq_dump(struct Qdisc * sch,struct sk_buff * skb)1593 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1594 {
1595 struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
1596 unsigned char *b = skb->tail;
1597 struct rtattr *rta;
1598
1599 rta = (struct rtattr*)b;
1600 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1601 if (cbq_dump_attr(skb, &q->link) < 0)
1602 goto rtattr_failure;
1603 rta->rta_len = skb->tail - b;
1604 spin_lock_bh(&sch->dev->queue_lock);
1605 q->link.xstats.avgidle = q->link.avgidle;
1606 if (cbq_copy_xstats(skb, &q->link.xstats)) {
1607 spin_unlock_bh(&sch->dev->queue_lock);
1608 goto rtattr_failure;
1609 }
1610 spin_unlock_bh(&sch->dev->queue_lock);
1611 return skb->len;
1612
1613 rtattr_failure:
1614 skb_trim(skb, b - skb->data);
1615 return -1;
1616 }
1617
1618 static int
cbq_dump_class(struct Qdisc * sch,unsigned long arg,struct sk_buff * skb,struct tcmsg * tcm)1619 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1620 struct sk_buff *skb, struct tcmsg *tcm)
1621 {
1622 struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
1623 struct cbq_class *cl = (struct cbq_class*)arg;
1624 unsigned char *b = skb->tail;
1625 struct rtattr *rta;
1626
1627 if (cl->tparent)
1628 tcm->tcm_parent = cl->tparent->classid;
1629 else
1630 tcm->tcm_parent = TC_H_ROOT;
1631 tcm->tcm_handle = cl->classid;
1632 tcm->tcm_info = cl->q->handle;
1633
1634 rta = (struct rtattr*)b;
1635 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1636 if (cbq_dump_attr(skb, cl) < 0)
1637 goto rtattr_failure;
1638 rta->rta_len = skb->tail - b;
1639 cl->stats.qlen = cl->q->q.qlen;
1640 if (qdisc_copy_stats(skb, &cl->stats))
1641 goto rtattr_failure;
1642 spin_lock_bh(&sch->dev->queue_lock);
1643 cl->xstats.avgidle = cl->avgidle;
1644 cl->xstats.undertime = 0;
1645 if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1646 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1647 if (cbq_copy_xstats(skb, &cl->xstats)) {
1648 spin_unlock_bh(&sch->dev->queue_lock);
1649 goto rtattr_failure;
1650 }
1651 spin_unlock_bh(&sch->dev->queue_lock);
1652
1653 return skb->len;
1654
1655 rtattr_failure:
1656 skb_trim(skb, b - skb->data);
1657 return -1;
1658 }
1659
cbq_graft(struct Qdisc * sch,unsigned long arg,struct Qdisc * new,struct Qdisc ** old)1660 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1661 struct Qdisc **old)
1662 {
1663 struct cbq_class *cl = (struct cbq_class*)arg;
1664
1665 if (cl) {
1666 if (new == NULL) {
1667 if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)) == NULL)
1668 return -ENOBUFS;
1669 } else {
1670 #ifdef CONFIG_NET_CLS_POLICE
1671 if (cl->police == TC_POLICE_RECLASSIFY)
1672 new->reshape_fail = cbq_reshape_fail;
1673 #endif
1674 }
1675 sch_tree_lock(sch);
1676 *old = cl->q;
1677 cl->q = new;
1678 sch->q.qlen -= (*old)->q.qlen;
1679 qdisc_reset(*old);
1680 sch_tree_unlock(sch);
1681
1682 return 0;
1683 }
1684 return -ENOENT;
1685 }
1686
1687 static struct Qdisc *
cbq_leaf(struct Qdisc * sch,unsigned long arg)1688 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1689 {
1690 struct cbq_class *cl = (struct cbq_class*)arg;
1691
1692 return cl ? cl->q : NULL;
1693 }
1694
cbq_get(struct Qdisc * sch,u32 classid)1695 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1696 {
1697 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1698 struct cbq_class *cl = cbq_class_lookup(q, classid);
1699
1700 if (cl) {
1701 cl->refcnt++;
1702 return (unsigned long)cl;
1703 }
1704 return 0;
1705 }
1706
cbq_destroy_filters(struct cbq_class * cl)1707 static void cbq_destroy_filters(struct cbq_class *cl)
1708 {
1709 struct tcf_proto *tp;
1710
1711 while ((tp = cl->filter_list) != NULL) {
1712 cl->filter_list = tp->next;
1713 tcf_destroy(tp);
1714 }
1715 }
1716
cbq_destroy_class(struct Qdisc * sch,struct cbq_class * cl)1717 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1718 {
1719 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1720
1721 BUG_TRAP(!cl->filters);
1722
1723 cbq_destroy_filters(cl);
1724 qdisc_destroy(cl->q);
1725 qdisc_put_rtab(cl->R_tab);
1726 #ifdef CONFIG_NET_ESTIMATOR
1727 qdisc_kill_estimator(&cl->stats);
1728 #endif
1729 if (cl != &q->link)
1730 kfree(cl);
1731 }
1732
1733 static void
cbq_destroy(struct Qdisc * sch)1734 cbq_destroy(struct Qdisc* sch)
1735 {
1736 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1737 struct cbq_class *cl;
1738 unsigned h;
1739
1740 #ifdef CONFIG_NET_CLS_POLICE
1741 q->rx_class = NULL;
1742 #endif
1743 /*
1744 * Filters must be destroyed first because we don't destroy the
1745 * classes from root to leafs which means that filters can still
1746 * be bound to classes which have been destroyed already. --TGR '04
1747 */
1748 for (h = 0; h < 16; h++)
1749 for (cl = q->classes[h]; cl; cl = cl->next)
1750 cbq_destroy_filters(cl);
1751
1752 for (h = 0; h < 16; h++) {
1753 struct cbq_class *next;
1754
1755 for (cl = q->classes[h]; cl; cl = next) {
1756 next = cl->next;
1757 cbq_destroy_class(sch, cl);
1758 }
1759 }
1760
1761 MOD_DEC_USE_COUNT;
1762 }
1763
cbq_put(struct Qdisc * sch,unsigned long arg)1764 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1765 {
1766 struct cbq_class *cl = (struct cbq_class*)arg;
1767
1768 if (--cl->refcnt == 0) {
1769 #ifdef CONFIG_NET_CLS_POLICE
1770 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1771
1772 spin_lock_bh(&sch->dev->queue_lock);
1773 if (q->rx_class == cl)
1774 q->rx_class = NULL;
1775 spin_unlock_bh(&sch->dev->queue_lock);
1776 #endif
1777
1778 cbq_destroy_class(sch, cl);
1779 }
1780 }
1781
1782 static int
cbq_change_class(struct Qdisc * sch,u32 classid,u32 parentid,struct rtattr ** tca,unsigned long * arg)1783 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1784 unsigned long *arg)
1785 {
1786 int err;
1787 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1788 struct cbq_class *cl = (struct cbq_class*)*arg;
1789 struct rtattr *opt = tca[TCA_OPTIONS-1];
1790 struct rtattr *tb[TCA_CBQ_MAX];
1791 struct cbq_class *parent;
1792 struct qdisc_rate_table *rtab = NULL;
1793
1794 if (opt==NULL ||
1795 rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)))
1796 return -EINVAL;
1797
1798 if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1799 RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1800 return -EINVAL;
1801
1802 if (tb[TCA_CBQ_FOPT-1] &&
1803 RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1804 return -EINVAL;
1805
1806 if (tb[TCA_CBQ_RATE-1] &&
1807 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1808 return -EINVAL;
1809
1810 if (tb[TCA_CBQ_LSSOPT-1] &&
1811 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1812 return -EINVAL;
1813
1814 if (tb[TCA_CBQ_WRROPT-1] &&
1815 RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1816 return -EINVAL;
1817
1818 #ifdef CONFIG_NET_CLS_POLICE
1819 if (tb[TCA_CBQ_POLICE-1] &&
1820 RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1821 return -EINVAL;
1822 #endif
1823
1824 if (cl) {
1825 /* Check parent */
1826 if (parentid) {
1827 if (cl->tparent && cl->tparent->classid != parentid)
1828 return -EINVAL;
1829 if (!cl->tparent && parentid != TC_H_ROOT)
1830 return -EINVAL;
1831 }
1832
1833 if (tb[TCA_CBQ_RATE-1]) {
1834 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1835 if (rtab == NULL)
1836 return -EINVAL;
1837 }
1838
1839 /* Change class parameters */
1840 sch_tree_lock(sch);
1841
1842 if (cl->next_alive != NULL)
1843 cbq_deactivate_class(cl);
1844
1845 if (rtab) {
1846 rtab = xchg(&cl->R_tab, rtab);
1847 qdisc_put_rtab(rtab);
1848 }
1849
1850 if (tb[TCA_CBQ_LSSOPT-1])
1851 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1852
1853 if (tb[TCA_CBQ_WRROPT-1]) {
1854 cbq_rmprio(q, cl);
1855 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1856 }
1857
1858 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1859 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1860
1861 #ifdef CONFIG_NET_CLS_POLICE
1862 if (tb[TCA_CBQ_POLICE-1])
1863 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1864 #endif
1865
1866 if (tb[TCA_CBQ_FOPT-1])
1867 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1868
1869 if (cl->q->q.qlen)
1870 cbq_activate_class(cl);
1871
1872 sch_tree_unlock(sch);
1873
1874 #ifdef CONFIG_NET_ESTIMATOR
1875 if (tca[TCA_RATE-1]) {
1876 qdisc_kill_estimator(&cl->stats);
1877 qdisc_new_estimator(&cl->stats, tca[TCA_RATE-1]);
1878 }
1879 #endif
1880 return 0;
1881 }
1882
1883 if (parentid == TC_H_ROOT)
1884 return -EINVAL;
1885
1886 if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1887 tb[TCA_CBQ_LSSOPT-1] == NULL)
1888 return -EINVAL;
1889
1890 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1891 if (rtab == NULL)
1892 return -EINVAL;
1893
1894 if (classid) {
1895 err = -EINVAL;
1896 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1897 goto failure;
1898 } else {
1899 int i;
1900 classid = TC_H_MAKE(sch->handle,0x8000);
1901
1902 for (i=0; i<0x8000; i++) {
1903 if (++q->hgenerator >= 0x8000)
1904 q->hgenerator = 1;
1905 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1906 break;
1907 }
1908 err = -ENOSR;
1909 if (i >= 0x8000)
1910 goto failure;
1911 classid = classid|q->hgenerator;
1912 }
1913
1914 parent = &q->link;
1915 if (parentid) {
1916 parent = cbq_class_lookup(q, parentid);
1917 err = -EINVAL;
1918 if (parent == NULL)
1919 goto failure;
1920 }
1921
1922 err = -ENOBUFS;
1923 cl = kmalloc(sizeof(*cl), GFP_KERNEL);
1924 if (cl == NULL)
1925 goto failure;
1926 memset(cl, 0, sizeof(*cl));
1927 cl->R_tab = rtab;
1928 rtab = NULL;
1929 cl->refcnt = 1;
1930 if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1931 cl->q = &noop_qdisc;
1932 cl->classid = classid;
1933 cl->tparent = parent;
1934 cl->qdisc = sch;
1935 cl->allot = parent->allot;
1936 cl->quantum = cl->allot;
1937 cl->weight = cl->R_tab->rate.rate;
1938 cl->stats.lock = &sch->dev->queue_lock;
1939
1940 sch_tree_lock(sch);
1941 cbq_link_class(cl);
1942 cl->borrow = cl->tparent;
1943 if (cl->tparent != &q->link)
1944 cl->share = cl->tparent;
1945 cbq_adjust_levels(parent);
1946 cl->minidle = -0x7FFFFFFF;
1947 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1948 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1949 if (cl->ewma_log==0)
1950 cl->ewma_log = q->link.ewma_log;
1951 if (cl->maxidle==0)
1952 cl->maxidle = q->link.maxidle;
1953 if (cl->avpkt==0)
1954 cl->avpkt = q->link.avpkt;
1955 cl->overlimit = cbq_ovl_classic;
1956 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1957 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1958 #ifdef CONFIG_NET_CLS_POLICE
1959 if (tb[TCA_CBQ_POLICE-1])
1960 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1961 #endif
1962 if (tb[TCA_CBQ_FOPT-1])
1963 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1964 sch_tree_unlock(sch);
1965
1966 #ifdef CONFIG_NET_ESTIMATOR
1967 if (tca[TCA_RATE-1])
1968 qdisc_new_estimator(&cl->stats, tca[TCA_RATE-1]);
1969 #endif
1970
1971 *arg = (unsigned long)cl;
1972 return 0;
1973
1974 failure:
1975 qdisc_put_rtab(rtab);
1976 return err;
1977 }
1978
cbq_delete(struct Qdisc * sch,unsigned long arg)1979 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1980 {
1981 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1982 struct cbq_class *cl = (struct cbq_class*)arg;
1983
1984 if (cl->filters || cl->children || cl == &q->link)
1985 return -EBUSY;
1986
1987 sch_tree_lock(sch);
1988
1989 if (cl->next_alive)
1990 cbq_deactivate_class(cl);
1991
1992 if (q->tx_borrowed == cl)
1993 q->tx_borrowed = q->tx_class;
1994 if (q->tx_class == cl) {
1995 q->tx_class = NULL;
1996 q->tx_borrowed = NULL;
1997 }
1998 #ifdef CONFIG_NET_CLS_POLICE
1999 if (q->rx_class == cl)
2000 q->rx_class = NULL;
2001 #endif
2002
2003 cbq_unlink_class(cl);
2004 cbq_adjust_levels(cl->tparent);
2005 cl->defmap = 0;
2006 cbq_sync_defmap(cl);
2007
2008 cbq_rmprio(q, cl);
2009 sch_tree_unlock(sch);
2010
2011 if (--cl->refcnt == 0)
2012 cbq_destroy_class(sch, cl);
2013
2014 return 0;
2015 }
2016
cbq_find_tcf(struct Qdisc * sch,unsigned long arg)2017 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2018 {
2019 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
2020 struct cbq_class *cl = (struct cbq_class *)arg;
2021
2022 if (cl == NULL)
2023 cl = &q->link;
2024
2025 return &cl->filter_list;
2026 }
2027
cbq_bind_filter(struct Qdisc * sch,unsigned long parent,u32 classid)2028 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2029 u32 classid)
2030 {
2031 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
2032 struct cbq_class *p = (struct cbq_class*)parent;
2033 struct cbq_class *cl = cbq_class_lookup(q, classid);
2034
2035 if (cl) {
2036 if (p && p->level <= cl->level)
2037 return 0;
2038 cl->filters++;
2039 return (unsigned long)cl;
2040 }
2041 return 0;
2042 }
2043
cbq_unbind_filter(struct Qdisc * sch,unsigned long arg)2044 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2045 {
2046 struct cbq_class *cl = (struct cbq_class*)arg;
2047
2048 cl->filters--;
2049 }
2050
cbq_walk(struct Qdisc * sch,struct qdisc_walker * arg)2051 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2052 {
2053 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
2054 unsigned h;
2055
2056 if (arg->stop)
2057 return;
2058
2059 for (h = 0; h < 16; h++) {
2060 struct cbq_class *cl;
2061
2062 for (cl = q->classes[h]; cl; cl = cl->next) {
2063 if (arg->count < arg->skip) {
2064 arg->count++;
2065 continue;
2066 }
2067 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2068 arg->stop = 1;
2069 return;
2070 }
2071 arg->count++;
2072 }
2073 }
2074 }
2075
2076 static struct Qdisc_class_ops cbq_class_ops =
2077 {
2078 cbq_graft,
2079 cbq_leaf,
2080 cbq_get,
2081 cbq_put,
2082 cbq_change_class,
2083 cbq_delete,
2084 cbq_walk,
2085
2086 cbq_find_tcf,
2087 cbq_bind_filter,
2088 cbq_unbind_filter,
2089
2090 cbq_dump_class,
2091 };
2092
2093 struct Qdisc_ops cbq_qdisc_ops =
2094 {
2095 NULL,
2096 &cbq_class_ops,
2097 "cbq",
2098 sizeof(struct cbq_sched_data),
2099
2100 cbq_enqueue,
2101 cbq_dequeue,
2102 cbq_requeue,
2103 cbq_drop,
2104
2105 cbq_init,
2106 cbq_reset,
2107 cbq_destroy,
2108 NULL /* cbq_change */,
2109
2110 cbq_dump,
2111 };
2112
2113 #ifdef MODULE
init_module(void)2114 int init_module(void)
2115 {
2116 return register_qdisc(&cbq_qdisc_ops);
2117 }
2118
cleanup_module(void)2119 void cleanup_module(void)
2120 {
2121 unregister_qdisc(&cbq_qdisc_ops);
2122 }
2123 #endif
2124 MODULE_LICENSE("GPL");
2125