1 /*
2  * net/sched/sch_cbq.c	Class-Based Queueing discipline.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  */
12 
13 #include <linux/config.h>
14 #include <linux/module.h>
15 #include <asm/uaccess.h>
16 #include <asm/system.h>
17 #include <asm/bitops.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
22 #include <linux/mm.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
25 #include <linux/in.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/if_ether.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/etherdevice.h>
32 #include <linux/notifier.h>
33 #include <net/ip.h>
34 #include <net/route.h>
35 #include <linux/skbuff.h>
36 #include <net/sock.h>
37 #include <net/pkt_sched.h>
38 
39 
40 /*	Class-Based Queueing (CBQ) algorithm.
41 	=======================================
42 
43 	Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
44 	         Management Models for Packet Networks",
45 		 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
46 
47 	         [2] Sally Floyd, "Notes on CBQ and Guaranted Service", 1995
48 
49 	         [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
50 		 Parameters", 1996
51 
52 		 [4] Sally Floyd and Michael Speer, "Experimental Results
53 		 for Class-Based Queueing", 1998, not published.
54 
55 	-----------------------------------------------------------------------
56 
57 	Algorithm skeleton was taken from NS simulator cbq.cc.
58 	If someone wants to check this code against the LBL version,
59 	he should take into account that ONLY the skeleton was borrowed,
60 	the implementation is different. Particularly:
61 
62 	--- The WRR algorithm is different. Our version looks more
63         reasonable (I hope) and works when quanta are allowed to be
64         less than MTU, which is always the case when real time classes
65         have small rates. Note, that the statement of [3] is
66         incomplete, delay may actually be estimated even if class
67         per-round allotment is less than MTU. Namely, if per-round
68         allotment is W*r_i, and r_1+...+r_k = r < 1
69 
70 	delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
71 
72 	In the worst case we have IntServ estimate with D = W*r+k*MTU
73 	and C = MTU*r. The proof (if correct at all) is trivial.
74 
75 
76 	--- It seems that cbq-2.0 is not very accurate. At least, I cannot
77 	interpret some places, which look like wrong translations
78 	from NS. Anyone is advised to find these differences
79 	and explain to me, why I am wrong 8).
80 
81 	--- Linux has no EOI event, so that we cannot estimate true class
82 	idle time. Workaround is to consider the next dequeue event
83 	as sign that previous packet is finished. This is wrong because of
84 	internal device queueing, but on a permanently loaded link it is true.
85 	Moreover, combined with clock integrator, this scheme looks
86 	very close to an ideal solution.  */
87 
88 struct cbq_sched_data;
89 
90 
91 struct cbq_class
92 {
93 	struct cbq_class	*next;		/* hash table link */
94 	struct cbq_class	*next_alive;	/* next class with backlog in this priority band */
95 
96 /* Parameters */
97 	u32			classid;
98 	unsigned char		priority;	/* class priority */
99 	unsigned char		priority2;	/* priority to be used after overlimit */
100 	unsigned char		ewma_log;	/* time constant for idle time calculation */
101 	unsigned char		ovl_strategy;
102 #ifdef CONFIG_NET_CLS_POLICE
103 	unsigned char		police;
104 #endif
105 
106 	u32			defmap;
107 
108 	/* Link-sharing scheduler parameters */
109 	long			maxidle;	/* Class paramters: see below. */
110 	long			offtime;
111 	long			minidle;
112 	u32			avpkt;
113 	struct qdisc_rate_table	*R_tab;
114 
115 	/* Overlimit strategy parameters */
116 	void			(*overlimit)(struct cbq_class *cl);
117 	long			penalty;
118 
119 	/* General scheduler (WRR) parameters */
120 	long			allot;
121 	long			quantum;	/* Allotment per WRR round */
122 	long			weight;		/* Relative allotment: see below */
123 
124 	struct Qdisc		*qdisc;		/* Ptr to CBQ discipline */
125 	struct cbq_class	*split;		/* Ptr to split node */
126 	struct cbq_class	*share;		/* Ptr to LS parent in the class tree */
127 	struct cbq_class	*tparent;	/* Ptr to tree parent in the class tree */
128 	struct cbq_class	*borrow;	/* NULL if class is bandwidth limited;
129 						   parent otherwise */
130 	struct cbq_class	*sibling;	/* Sibling chain */
131 	struct cbq_class	*children;	/* Pointer to children chain */
132 
133 	struct Qdisc		*q;		/* Elementary queueing discipline */
134 
135 
136 /* Variables */
137 	unsigned char		cpriority;	/* Effective priority */
138 	unsigned char		delayed;
139 	unsigned char		level;		/* level of the class in hierarchy:
140 						   0 for leaf classes, and maximal
141 						   level of children + 1 for nodes.
142 						 */
143 
144 	psched_time_t		last;		/* Last end of service */
145 	psched_time_t		undertime;
146 	long			avgidle;
147 	long			deficit;	/* Saved deficit for WRR */
148 	unsigned long		penalized;
149 	struct tc_stats		stats;
150 	struct tc_cbq_xstats	xstats;
151 
152 	struct tcf_proto	*filter_list;
153 
154 	int			refcnt;
155 	int			filters;
156 
157 	struct cbq_class 	*defaults[TC_PRIO_MAX+1];
158 };
159 
160 struct cbq_sched_data
161 {
162 	struct cbq_class	*classes[16];		/* Hash table of all classes */
163 	int			nclasses[TC_CBQ_MAXPRIO+1];
164 	unsigned		quanta[TC_CBQ_MAXPRIO+1];
165 
166 	struct cbq_class	link;
167 
168 	unsigned		activemask;
169 	struct cbq_class	*active[TC_CBQ_MAXPRIO+1];	/* List of all classes
170 								   with backlog */
171 
172 #ifdef CONFIG_NET_CLS_POLICE
173 	struct cbq_class	*rx_class;
174 #endif
175 	struct cbq_class	*tx_class;
176 	struct cbq_class	*tx_borrowed;
177 	int			tx_len;
178 	psched_time_t		now;		/* Cached timestamp */
179 	psched_time_t		now_rt;		/* Cached real time */
180 	unsigned		pmask;
181 
182 	struct timer_list	delay_timer;
183 	struct timer_list	wd_timer;	/* Watchdog timer,
184 						   started when CBQ has
185 						   backlog, but cannot
186 						   transmit just now */
187 	long			wd_expires;
188 	int			toplevel;
189 	u32			hgenerator;
190 };
191 
192 
193 #define L2T(cl,len)	((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
194 
195 
cbq_hash(u32 h)196 static __inline__ unsigned cbq_hash(u32 h)
197 {
198 	h ^= h>>8;
199 	h ^= h>>4;
200 	return h&0xF;
201 }
202 
203 static __inline__ struct cbq_class *
cbq_class_lookup(struct cbq_sched_data * q,u32 classid)204 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
205 {
206 	struct cbq_class *cl;
207 
208 	for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
209 		if (cl->classid == classid)
210 			return cl;
211 	return NULL;
212 }
213 
214 #ifdef CONFIG_NET_CLS_POLICE
215 
216 static struct cbq_class *
cbq_reclassify(struct sk_buff * skb,struct cbq_class * this)217 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
218 {
219 	struct cbq_class *cl, *new;
220 
221 	for (cl = this->tparent; cl; cl = cl->tparent)
222 		if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
223 			return new;
224 
225 	return NULL;
226 }
227 
228 #endif
229 
230 /* Classify packet. The procedure is pretty complicated, but
231    it allows us to combine link sharing and priority scheduling
232    transparently.
233 
234    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
235    so that it resolves to split nodes. Then packets are classified
236    by logical priority, or a more specific classifier may be attached
237    to the split node.
238  */
239 
240 static struct cbq_class *
cbq_classify(struct sk_buff * skb,struct Qdisc * sch)241 cbq_classify(struct sk_buff *skb, struct Qdisc *sch)
242 {
243 	struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
244 	struct cbq_class *head = &q->link;
245 	struct cbq_class **defmap;
246 	struct cbq_class *cl = NULL;
247 	u32 prio = skb->priority;
248 	struct tcf_result res;
249 
250 	/*
251 	 *  Step 1. If skb->priority points to one of our classes, use it.
252 	 */
253 	if (TC_H_MAJ(prio^sch->handle) == 0 &&
254 	    (cl = cbq_class_lookup(q, prio)) != NULL)
255 			return cl;
256 
257 	for (;;) {
258 		int result = 0;
259 
260 		defmap = head->defaults;
261 
262 		/*
263 		 * Step 2+n. Apply classifier.
264 		 */
265 		if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
266 			goto fallback;
267 
268 		if ((cl = (void*)res.class) == NULL) {
269 			if (TC_H_MAJ(res.classid))
270 				cl = cbq_class_lookup(q, res.classid);
271 			else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
272 				cl = defmap[TC_PRIO_BESTEFFORT];
273 
274 			if (cl == NULL || cl->level >= head->level)
275 				goto fallback;
276 		}
277 
278 #ifdef CONFIG_NET_CLS_POLICE
279 		switch (result) {
280 		case TC_POLICE_RECLASSIFY:
281 			return cbq_reclassify(skb, cl);
282 		case TC_POLICE_SHOT:
283 			return NULL;
284 		default:
285 			break;
286 		}
287 #endif
288 		if (cl->level == 0)
289 			return cl;
290 
291 		/*
292 		 * Step 3+n. If classifier selected a link sharing class,
293 		 *	   apply agency specific classifier.
294 		 *	   Repeat this procdure until we hit a leaf node.
295 		 */
296 		head = cl;
297 	}
298 
299 fallback:
300 	cl = head;
301 
302 	/*
303 	 * Step 4. No success...
304 	 */
305 	if (TC_H_MAJ(prio) == 0 &&
306 	    !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
307 	    !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
308 		return head;
309 
310 	return cl;
311 }
312 
313 /*
314    A packet has just been enqueued on the empty class.
315    cbq_activate_class adds it to the tail of active class list
316    of its priority band.
317  */
318 
cbq_activate_class(struct cbq_class * cl)319 static __inline__ void cbq_activate_class(struct cbq_class *cl)
320 {
321 	struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
322 	int prio = cl->cpriority;
323 	struct cbq_class *cl_tail;
324 
325 	cl_tail = q->active[prio];
326 	q->active[prio] = cl;
327 
328 	if (cl_tail != NULL) {
329 		cl->next_alive = cl_tail->next_alive;
330 		cl_tail->next_alive = cl;
331 	} else {
332 		cl->next_alive = cl;
333 		q->activemask |= (1<<prio);
334 	}
335 }
336 
337 /*
338    Unlink class from active chain.
339    Note that this same procedure is done directly in cbq_dequeue*
340    during round-robin procedure.
341  */
342 
cbq_deactivate_class(struct cbq_class * this)343 static void cbq_deactivate_class(struct cbq_class *this)
344 {
345 	struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data;
346 	int prio = this->cpriority;
347 	struct cbq_class *cl;
348 	struct cbq_class *cl_prev = q->active[prio];
349 
350 	do {
351 		cl = cl_prev->next_alive;
352 		if (cl == this) {
353 			cl_prev->next_alive = cl->next_alive;
354 			cl->next_alive = NULL;
355 
356 			if (cl == q->active[prio]) {
357 				q->active[prio] = cl_prev;
358 				if (cl == q->active[prio]) {
359 					q->active[prio] = NULL;
360 					q->activemask &= ~(1<<prio);
361 					return;
362 				}
363 			}
364 
365 			cl = cl_prev->next_alive;
366 			return;
367 		}
368 	} while ((cl_prev = cl) != q->active[prio]);
369 }
370 
371 static void
cbq_mark_toplevel(struct cbq_sched_data * q,struct cbq_class * cl)372 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
373 {
374 	int toplevel = q->toplevel;
375 
376 	if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
377 		psched_time_t now;
378 		psched_tdiff_t incr;
379 
380 		PSCHED_GET_TIME(now);
381 		incr = PSCHED_TDIFF(now, q->now_rt);
382 		PSCHED_TADD2(q->now, incr, now);
383 
384 		do {
385 			if (PSCHED_TLESS(cl->undertime, now)) {
386 				q->toplevel = cl->level;
387 				return;
388 			}
389 		} while ((cl=cl->borrow) != NULL && toplevel > cl->level);
390 	}
391 }
392 
393 static int
cbq_enqueue(struct sk_buff * skb,struct Qdisc * sch)394 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
395 {
396 	struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
397 	struct cbq_class *cl = cbq_classify(skb, sch);
398 	int len = skb->len;
399 	int ret = NET_XMIT_POLICED;
400 
401 #ifdef CONFIG_NET_CLS_POLICE
402 	q->rx_class = cl;
403 #endif
404 	if (cl) {
405 #ifdef CONFIG_NET_CLS_POLICE
406 		cl->q->__parent = sch;
407 #endif
408 		if ((ret = cl->q->enqueue(skb, cl->q)) == 0) {
409 			sch->q.qlen++;
410 			sch->stats.packets++;
411 			sch->stats.bytes+=len;
412 			cbq_mark_toplevel(q, cl);
413 			if (!cl->next_alive)
414 				cbq_activate_class(cl);
415 			return 0;
416 		}
417 	}
418 
419 	sch->stats.drops++;
420 	if (cl == NULL)
421 		kfree_skb(skb);
422 	else {
423 		cbq_mark_toplevel(q, cl);
424 		cl->stats.drops++;
425 	}
426 	return ret;
427 }
428 
429 static int
cbq_requeue(struct sk_buff * skb,struct Qdisc * sch)430 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
431 {
432 	struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
433 	struct cbq_class *cl;
434 	int ret;
435 
436 	if ((cl = q->tx_class) == NULL) {
437 		kfree_skb(skb);
438 		sch->stats.drops++;
439 		return NET_XMIT_CN;
440 	}
441 	q->tx_class = NULL;
442 
443 	cbq_mark_toplevel(q, cl);
444 
445 #ifdef CONFIG_NET_CLS_POLICE
446 	q->rx_class = cl;
447 	cl->q->__parent = sch;
448 #endif
449 	if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
450 		sch->q.qlen++;
451 		if (!cl->next_alive)
452 			cbq_activate_class(cl);
453 		return 0;
454 	}
455 	sch->stats.drops++;
456 	cl->stats.drops++;
457 	return ret;
458 }
459 
460 /* Overlimit actions */
461 
462 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
463 
cbq_ovl_classic(struct cbq_class * cl)464 static void cbq_ovl_classic(struct cbq_class *cl)
465 {
466 	struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
467 	psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
468 
469 	if (!cl->delayed) {
470 		delay += cl->offtime;
471 
472 		/*
473 		   Class goes to sleep, so that it will have no
474 		   chance to work avgidle. Let's forgive it 8)
475 
476 		   BTW cbq-2.0 has a crap in this
477 		   place, apparently they forgot to shift it by cl->ewma_log.
478 		 */
479 		if (cl->avgidle < 0)
480 			delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
481 		if (cl->avgidle < cl->minidle)
482 			cl->avgidle = cl->minidle;
483 		if (delay <= 0)
484 			delay = 1;
485 		PSCHED_TADD2(q->now, delay, cl->undertime);
486 
487 		cl->xstats.overactions++;
488 		cl->delayed = 1;
489 	}
490 	if (q->wd_expires == 0 || q->wd_expires > delay)
491 		q->wd_expires = delay;
492 
493 	/* Dirty work! We must schedule wakeups based on
494 	   real available rate, rather than leaf rate,
495 	   which may be tiny (even zero).
496 	 */
497 	if (q->toplevel == TC_CBQ_MAXLEVEL) {
498 		struct cbq_class *b;
499 		psched_tdiff_t base_delay = q->wd_expires;
500 
501 		for (b = cl->borrow; b; b = b->borrow) {
502 			delay = PSCHED_TDIFF(b->undertime, q->now);
503 			if (delay < base_delay) {
504 				if (delay <= 0)
505 					delay = 1;
506 				base_delay = delay;
507 			}
508 		}
509 
510 		q->wd_expires = base_delay;
511 	}
512 }
513 
514 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
515    they go overlimit
516  */
517 
cbq_ovl_rclassic(struct cbq_class * cl)518 static void cbq_ovl_rclassic(struct cbq_class *cl)
519 {
520 	struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
521 	struct cbq_class *this = cl;
522 
523 	do {
524 		if (cl->level > q->toplevel) {
525 			cl = NULL;
526 			break;
527 		}
528 	} while ((cl = cl->borrow) != NULL);
529 
530 	if (cl == NULL)
531 		cl = this;
532 	cbq_ovl_classic(cl);
533 }
534 
535 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
536 
cbq_ovl_delay(struct cbq_class * cl)537 static void cbq_ovl_delay(struct cbq_class *cl)
538 {
539 	struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
540 	psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
541 
542 	if (!cl->delayed) {
543 		unsigned long sched = jiffies;
544 
545 		delay += cl->offtime;
546 		if (cl->avgidle < 0)
547 			delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
548 		if (cl->avgidle < cl->minidle)
549 			cl->avgidle = cl->minidle;
550 		PSCHED_TADD2(q->now, delay, cl->undertime);
551 
552 		if (delay > 0) {
553 			sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
554 			cl->penalized = sched;
555 			cl->cpriority = TC_CBQ_MAXPRIO;
556 			q->pmask |= (1<<TC_CBQ_MAXPRIO);
557 			if (del_timer(&q->delay_timer) &&
558 			    (long)(q->delay_timer.expires - sched) > 0)
559 				q->delay_timer.expires = sched;
560 			add_timer(&q->delay_timer);
561 			cl->delayed = 1;
562 			cl->xstats.overactions++;
563 			return;
564 		}
565 		delay = 1;
566 	}
567 	if (q->wd_expires == 0 || q->wd_expires > delay)
568 		q->wd_expires = delay;
569 }
570 
571 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
572 
cbq_ovl_lowprio(struct cbq_class * cl)573 static void cbq_ovl_lowprio(struct cbq_class *cl)
574 {
575 	struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
576 
577 	cl->penalized = jiffies + cl->penalty;
578 
579 	if (cl->cpriority != cl->priority2) {
580 		cl->cpriority = cl->priority2;
581 		q->pmask |= (1<<cl->cpriority);
582 		cl->xstats.overactions++;
583 	}
584 	cbq_ovl_classic(cl);
585 }
586 
587 /* TC_CBQ_OVL_DROP: penalize class by dropping */
588 
cbq_ovl_drop(struct cbq_class * cl)589 static void cbq_ovl_drop(struct cbq_class *cl)
590 {
591 	if (cl->q->ops->drop)
592 		if (cl->q->ops->drop(cl->q))
593 			cl->qdisc->q.qlen--;
594 	cl->xstats.overactions++;
595 	cbq_ovl_classic(cl);
596 }
597 
cbq_watchdog(unsigned long arg)598 static void cbq_watchdog(unsigned long arg)
599 {
600 	struct Qdisc *sch = (struct Qdisc*)arg;
601 
602 	sch->flags &= ~TCQ_F_THROTTLED;
603 	netif_schedule(sch->dev);
604 }
605 
cbq_undelay_prio(struct cbq_sched_data * q,int prio)606 static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
607 {
608 	struct cbq_class *cl;
609 	struct cbq_class *cl_prev = q->active[prio];
610 	unsigned long now = jiffies;
611 	unsigned long sched = now;
612 
613 	if (cl_prev == NULL)
614 		return now;
615 
616 	do {
617 		cl = cl_prev->next_alive;
618 		if ((long)(now - cl->penalized) > 0) {
619 			cl_prev->next_alive = cl->next_alive;
620 			cl->next_alive = NULL;
621 			cl->cpriority = cl->priority;
622 			cl->delayed = 0;
623 			cbq_activate_class(cl);
624 
625 			if (cl == q->active[prio]) {
626 				q->active[prio] = cl_prev;
627 				if (cl == q->active[prio]) {
628 					q->active[prio] = NULL;
629 					return 0;
630 				}
631 			}
632 
633 			cl = cl_prev->next_alive;
634 		} else if ((long)(sched - cl->penalized) > 0)
635 			sched = cl->penalized;
636 	} while ((cl_prev = cl) != q->active[prio]);
637 
638 	return (long)(sched - now);
639 }
640 
cbq_undelay(unsigned long arg)641 static void cbq_undelay(unsigned long arg)
642 {
643 	struct Qdisc *sch = (struct Qdisc*)arg;
644 	struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
645 	long delay = 0;
646 	unsigned pmask;
647 
648 	pmask = q->pmask;
649 	q->pmask = 0;
650 
651 	while (pmask) {
652 		int prio = ffz(~pmask);
653 		long tmp;
654 
655 		pmask &= ~(1<<prio);
656 
657 		tmp = cbq_undelay_prio(q, prio);
658 		if (tmp > 0) {
659 			q->pmask |= 1<<prio;
660 			if (tmp < delay || delay == 0)
661 				delay = tmp;
662 		}
663 	}
664 
665 	if (delay) {
666 		q->delay_timer.expires = jiffies + delay;
667 		add_timer(&q->delay_timer);
668 	}
669 
670 	sch->flags &= ~TCQ_F_THROTTLED;
671 	netif_schedule(sch->dev);
672 }
673 
674 
675 #ifdef CONFIG_NET_CLS_POLICE
676 
cbq_reshape_fail(struct sk_buff * skb,struct Qdisc * child)677 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
678 {
679 	int len = skb->len;
680 	struct Qdisc *sch = child->__parent;
681 	struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
682 	struct cbq_class *cl = q->rx_class;
683 
684 	q->rx_class = NULL;
685 
686 	if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
687 
688 		cbq_mark_toplevel(q, cl);
689 
690 		q->rx_class = cl;
691 		cl->q->__parent = sch;
692 
693 		if (cl->q->enqueue(skb, cl->q) == 0) {
694 			sch->q.qlen++;
695 			sch->stats.packets++;
696 			sch->stats.bytes+=len;
697 			if (!cl->next_alive)
698 				cbq_activate_class(cl);
699 			return 0;
700 		}
701 		sch->stats.drops++;
702 		return 0;
703 	}
704 
705 	sch->stats.drops++;
706 	return -1;
707 }
708 #endif
709 
710 /*
711    It is mission critical procedure.
712 
713    We "regenerate" toplevel cutoff, if transmitting class
714    has backlog and it is not regulated. It is not part of
715    original CBQ description, but looks more reasonable.
716    Probably, it is wrong. This question needs further investigation.
717 */
718 
719 static __inline__ void
cbq_update_toplevel(struct cbq_sched_data * q,struct cbq_class * cl,struct cbq_class * borrowed)720 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
721 		    struct cbq_class *borrowed)
722 {
723 	if (cl && q->toplevel >= borrowed->level) {
724 		if (cl->q->q.qlen > 1) {
725 			do {
726 				if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
727 					q->toplevel = borrowed->level;
728 					return;
729 				}
730 			} while ((borrowed=borrowed->borrow) != NULL);
731 		}
732 #if 0
733 	/* It is not necessary now. Uncommenting it
734 	   will save CPU cycles, but decrease fairness.
735 	 */
736 		q->toplevel = TC_CBQ_MAXLEVEL;
737 #endif
738 	}
739 }
740 
741 static void
cbq_update(struct cbq_sched_data * q)742 cbq_update(struct cbq_sched_data *q)
743 {
744 	struct cbq_class *this = q->tx_class;
745 	struct cbq_class *cl = this;
746 	int len = q->tx_len;
747 
748 	q->tx_class = NULL;
749 
750 	for ( ; cl; cl = cl->share) {
751 		long avgidle = cl->avgidle;
752 		long idle;
753 
754 		cl->stats.packets++;
755 		cl->stats.bytes += len;
756 
757 		/*
758 		   (now - last) is total time between packet right edges.
759 		   (last_pktlen/rate) is "virtual" busy time, so that
760 
761 		         idle = (now - last) - last_pktlen/rate
762 		 */
763 
764 		idle = PSCHED_TDIFF(q->now, cl->last);
765 		if ((unsigned long)idle > 128*1024*1024) {
766 			avgidle = cl->maxidle;
767 		} else {
768 			idle -= L2T(cl, len);
769 
770 		/* true_avgidle := (1-W)*true_avgidle + W*idle,
771 		   where W=2^{-ewma_log}. But cl->avgidle is scaled:
772 		   cl->avgidle == true_avgidle/W,
773 		   hence:
774 		 */
775 			avgidle += idle - (avgidle>>cl->ewma_log);
776 		}
777 
778 		if (avgidle <= 0) {
779 			/* Overlimit or at-limit */
780 
781 			if (avgidle < cl->minidle)
782 				avgidle = cl->minidle;
783 
784 			cl->avgidle = avgidle;
785 
786 			/* Calculate expected time, when this class
787 			   will be allowed to send.
788 			   It will occur, when:
789 			   (1-W)*true_avgidle + W*delay = 0, i.e.
790 			   idle = (1/W - 1)*(-true_avgidle)
791 			   or
792 			   idle = (1 - W)*(-cl->avgidle);
793 			 */
794 			idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
795 
796 			/*
797 			   That is not all.
798 			   To maintain the rate allocated to the class,
799 			   we add to undertime virtual clock,
800 			   necesary to complete transmitted packet.
801 			   (len/phys_bandwidth has been already passed
802 			   to the moment of cbq_update)
803 			 */
804 
805 			idle -= L2T(&q->link, len);
806 			idle += L2T(cl, len);
807 
808 			PSCHED_AUDIT_TDIFF(idle);
809 
810 			PSCHED_TADD2(q->now, idle, cl->undertime);
811 		} else {
812 			/* Underlimit */
813 
814 			PSCHED_SET_PASTPERFECT(cl->undertime);
815 			if (avgidle > cl->maxidle)
816 				cl->avgidle = cl->maxidle;
817 			else
818 				cl->avgidle = avgidle;
819 		}
820 		cl->last = q->now;
821 	}
822 
823 	cbq_update_toplevel(q, this, q->tx_borrowed);
824 }
825 
826 static __inline__ struct cbq_class *
cbq_under_limit(struct cbq_class * cl)827 cbq_under_limit(struct cbq_class *cl)
828 {
829 	struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
830 	struct cbq_class *this_cl = cl;
831 
832 	if (cl->tparent == NULL)
833 		return cl;
834 
835 	if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
836 	    !PSCHED_TLESS(q->now, cl->undertime)) {
837 		cl->delayed = 0;
838 		return cl;
839 	}
840 
841 	do {
842 		/* It is very suspicious place. Now overlimit
843 		   action is generated for not bounded classes
844 		   only if link is completely congested.
845 		   Though it is in agree with ancestor-only paradigm,
846 		   it looks very stupid. Particularly,
847 		   it means that this chunk of code will either
848 		   never be called or result in strong amplification
849 		   of burstiness. Dangerous, silly, and, however,
850 		   no another solution exists.
851 		 */
852 		if ((cl = cl->borrow) == NULL) {
853 			this_cl->stats.overlimits++;
854 			this_cl->overlimit(this_cl);
855 			return NULL;
856 		}
857 		if (cl->level > q->toplevel)
858 			return NULL;
859 	} while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
860 		 PSCHED_TLESS(q->now, cl->undertime));
861 
862 	cl->delayed = 0;
863 	return cl;
864 }
865 
866 static __inline__ struct sk_buff *
cbq_dequeue_prio(struct Qdisc * sch,int prio)867 cbq_dequeue_prio(struct Qdisc *sch, int prio)
868 {
869 	struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
870 	struct cbq_class *cl_tail, *cl_prev, *cl;
871 	struct sk_buff *skb;
872 	int deficit;
873 
874 	cl_tail = cl_prev = q->active[prio];
875 	cl = cl_prev->next_alive;
876 
877 	do {
878 		deficit = 0;
879 
880 		/* Start round */
881 		do {
882 			struct cbq_class *borrow = cl;
883 
884 			if (cl->q->q.qlen &&
885 			    (borrow = cbq_under_limit(cl)) == NULL)
886 				goto skip_class;
887 
888 			if (cl->deficit <= 0) {
889 				/* Class exhausted its allotment per
890 				   this round. Switch to the next one.
891 				 */
892 				deficit = 1;
893 				cl->deficit += cl->quantum;
894 				goto next_class;
895 			}
896 
897 			skb = cl->q->dequeue(cl->q);
898 
899 			/* Class did not give us any skb :-(
900 			   It could occur even if cl->q->q.qlen != 0
901 			   f.e. if cl->q == "tbf"
902 			 */
903 			if (skb == NULL)
904 				goto skip_class;
905 
906 			cl->deficit -= skb->len;
907 			q->tx_class = cl;
908 			q->tx_borrowed = borrow;
909 			if (borrow != cl) {
910 #ifndef CBQ_XSTATS_BORROWS_BYTES
911 				borrow->xstats.borrows++;
912 				cl->xstats.borrows++;
913 #else
914 				borrow->xstats.borrows += skb->len;
915 				cl->xstats.borrows += skb->len;
916 #endif
917 			}
918 			q->tx_len = skb->len;
919 
920 			if (cl->deficit <= 0) {
921 				q->active[prio] = cl;
922 				cl = cl->next_alive;
923 				cl->deficit += cl->quantum;
924 			}
925 			return skb;
926 
927 skip_class:
928 			if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
929 				/* Class is empty or penalized.
930 				   Unlink it from active chain.
931 				 */
932 				cl_prev->next_alive = cl->next_alive;
933 				cl->next_alive = NULL;
934 
935 				/* Did cl_tail point to it? */
936 				if (cl == cl_tail) {
937 					/* Repair it! */
938 					cl_tail = cl_prev;
939 
940 					/* Was it the last class in this band? */
941 					if (cl == cl_tail) {
942 						/* Kill the band! */
943 						q->active[prio] = NULL;
944 						q->activemask &= ~(1<<prio);
945 						if (cl->q->q.qlen)
946 							cbq_activate_class(cl);
947 						return NULL;
948 					}
949 
950 					q->active[prio] = cl_tail;
951 				}
952 				if (cl->q->q.qlen)
953 					cbq_activate_class(cl);
954 
955 				cl = cl_prev;
956 			}
957 
958 next_class:
959 			cl_prev = cl;
960 			cl = cl->next_alive;
961 		} while (cl_prev != cl_tail);
962 	} while (deficit);
963 
964 	q->active[prio] = cl_prev;
965 
966 	return NULL;
967 }
968 
969 static __inline__ struct sk_buff *
cbq_dequeue_1(struct Qdisc * sch)970 cbq_dequeue_1(struct Qdisc *sch)
971 {
972 	struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
973 	struct sk_buff *skb;
974 	unsigned activemask;
975 
976 	activemask = q->activemask&0xFF;
977 	while (activemask) {
978 		int prio = ffz(~activemask);
979 		activemask &= ~(1<<prio);
980 		skb = cbq_dequeue_prio(sch, prio);
981 		if (skb)
982 			return skb;
983 	}
984 	return NULL;
985 }
986 
987 static struct sk_buff *
cbq_dequeue(struct Qdisc * sch)988 cbq_dequeue(struct Qdisc *sch)
989 {
990 	struct sk_buff *skb;
991 	struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
992 	psched_time_t now;
993 	psched_tdiff_t incr;
994 
995 	PSCHED_GET_TIME(now);
996 	incr = PSCHED_TDIFF(now, q->now_rt);
997 
998 	if (q->tx_class) {
999 		psched_tdiff_t incr2;
1000 		/* Time integrator. We calculate EOS time
1001 		   by adding expected packet transmittion time.
1002 		   If real time is greater, we warp artificial clock,
1003 		   so that:
1004 
1005 		   cbq_time = max(real_time, work);
1006 		 */
1007 		incr2 = L2T(&q->link, q->tx_len);
1008 		PSCHED_TADD(q->now, incr2);
1009 		cbq_update(q);
1010 		if ((incr -= incr2) < 0)
1011 			incr = 0;
1012 	}
1013 	PSCHED_TADD(q->now, incr);
1014 	q->now_rt = now;
1015 
1016 	for (;;) {
1017 		q->wd_expires = 0;
1018 
1019 		skb = cbq_dequeue_1(sch);
1020 		if (skb) {
1021 			sch->q.qlen--;
1022 			sch->flags &= ~TCQ_F_THROTTLED;
1023 			return skb;
1024 		}
1025 
1026 		/* All the classes are overlimit.
1027 
1028 		   It is possible, if:
1029 
1030 		   1. Scheduler is empty.
1031 		   2. Toplevel cutoff inhibited borrowing.
1032 		   3. Root class is overlimit.
1033 
1034 		   Reset 2d and 3d conditions and retry.
1035 
1036 		   Note, that NS and cbq-2.0 are buggy, peeking
1037 		   an arbitrary class is appropriate for ancestor-only
1038 		   sharing, but not for toplevel algorithm.
1039 
1040 		   Our version is better, but slower, because it requires
1041 		   two passes, but it is unavoidable with top-level sharing.
1042 		*/
1043 
1044 		if (q->toplevel == TC_CBQ_MAXLEVEL &&
1045 		    PSCHED_IS_PASTPERFECT(q->link.undertime))
1046 			break;
1047 
1048 		q->toplevel = TC_CBQ_MAXLEVEL;
1049 		PSCHED_SET_PASTPERFECT(q->link.undertime);
1050 	}
1051 
1052 	/* No packets in scheduler or nobody wants to give them to us :-(
1053 	   Sigh... start watchdog timer in the last case. */
1054 
1055 	if (sch->q.qlen) {
1056 		sch->stats.overlimits++;
1057 		if (q->wd_expires) {
1058 			long delay = PSCHED_US2JIFFIE(q->wd_expires);
1059 			if (delay <= 0)
1060 				delay = 1;
1061 			mod_timer(&q->wd_timer, jiffies + delay);
1062 			sch->flags |= TCQ_F_THROTTLED;
1063 		}
1064 	}
1065 	return NULL;
1066 }
1067 
1068 /* CBQ class maintanance routines */
1069 
cbq_adjust_levels(struct cbq_class * this)1070 static void cbq_adjust_levels(struct cbq_class *this)
1071 {
1072 	if (this == NULL)
1073 		return;
1074 
1075 	do {
1076 		int level = 0;
1077 		struct cbq_class *cl;
1078 
1079 		if ((cl = this->children) != NULL) {
1080 			do {
1081 				if (cl->level > level)
1082 					level = cl->level;
1083 			} while ((cl = cl->sibling) != this->children);
1084 		}
1085 		this->level = level+1;
1086 	} while ((this = this->tparent) != NULL);
1087 }
1088 
cbq_normalize_quanta(struct cbq_sched_data * q,int prio)1089 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1090 {
1091 	struct cbq_class *cl;
1092 	unsigned h;
1093 
1094 	if (q->quanta[prio] == 0)
1095 		return;
1096 
1097 	for (h=0; h<16; h++) {
1098 		for (cl = q->classes[h]; cl; cl = cl->next) {
1099 			/* BUGGGG... Beware! This expression suffer of
1100 			   arithmetic overflows!
1101 			 */
1102 			if (cl->priority == prio) {
1103 				cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1104 					q->quanta[prio];
1105 			}
1106 			if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1107 				printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1108 				cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1109 			}
1110 		}
1111 	}
1112 }
1113 
cbq_sync_defmap(struct cbq_class * cl)1114 static void cbq_sync_defmap(struct cbq_class *cl)
1115 {
1116 	struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
1117 	struct cbq_class *split = cl->split;
1118 	unsigned h;
1119 	int i;
1120 
1121 	if (split == NULL)
1122 		return;
1123 
1124 	for (i=0; i<=TC_PRIO_MAX; i++) {
1125 		if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1126 			split->defaults[i] = NULL;
1127 	}
1128 
1129 	for (i=0; i<=TC_PRIO_MAX; i++) {
1130 		int level = split->level;
1131 
1132 		if (split->defaults[i])
1133 			continue;
1134 
1135 		for (h=0; h<16; h++) {
1136 			struct cbq_class *c;
1137 
1138 			for (c = q->classes[h]; c; c = c->next) {
1139 				if (c->split == split && c->level < level &&
1140 				    c->defmap&(1<<i)) {
1141 					split->defaults[i] = c;
1142 					level = c->level;
1143 				}
1144 			}
1145 		}
1146 	}
1147 }
1148 
cbq_change_defmap(struct cbq_class * cl,u32 splitid,u32 def,u32 mask)1149 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1150 {
1151 	struct cbq_class *split = NULL;
1152 
1153 	if (splitid == 0) {
1154 		if ((split = cl->split) == NULL)
1155 			return;
1156 		splitid = split->classid;
1157 	}
1158 
1159 	if (split == NULL || split->classid != splitid) {
1160 		for (split = cl->tparent; split; split = split->tparent)
1161 			if (split->classid == splitid)
1162 				break;
1163 	}
1164 
1165 	if (split == NULL)
1166 		return;
1167 
1168 	if (cl->split != split) {
1169 		cl->defmap = 0;
1170 		cbq_sync_defmap(cl);
1171 		cl->split = split;
1172 		cl->defmap = def&mask;
1173 	} else
1174 		cl->defmap = (cl->defmap&~mask)|(def&mask);
1175 
1176 	cbq_sync_defmap(cl);
1177 }
1178 
cbq_unlink_class(struct cbq_class * this)1179 static void cbq_unlink_class(struct cbq_class *this)
1180 {
1181 	struct cbq_class *cl, **clp;
1182 	struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data;
1183 
1184 	for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1185 		if (cl == this) {
1186 			*clp = cl->next;
1187 			cl->next = NULL;
1188 			break;
1189 		}
1190 	}
1191 
1192 	if (this->tparent) {
1193 		clp=&this->sibling;
1194 		cl = *clp;
1195 		do {
1196 			if (cl == this) {
1197 				*clp = cl->sibling;
1198 				break;
1199 			}
1200 			clp = &cl->sibling;
1201 		} while ((cl = *clp) != this->sibling);
1202 
1203 		if (this->tparent->children == this) {
1204 			this->tparent->children = this->sibling;
1205 			if (this->sibling == this)
1206 				this->tparent->children = NULL;
1207 		}
1208 	} else {
1209 		BUG_TRAP(this->sibling == this);
1210 	}
1211 }
1212 
cbq_link_class(struct cbq_class * this)1213 static void cbq_link_class(struct cbq_class *this)
1214 {
1215 	struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data;
1216 	unsigned h = cbq_hash(this->classid);
1217 	struct cbq_class *parent = this->tparent;
1218 
1219 	this->sibling = this;
1220 	this->next = q->classes[h];
1221 	q->classes[h] = this;
1222 
1223 	if (parent == NULL)
1224 		return;
1225 
1226 	if (parent->children == NULL) {
1227 		parent->children = this;
1228 	} else {
1229 		this->sibling = parent->children->sibling;
1230 		parent->children->sibling = this;
1231 	}
1232 }
1233 
cbq_drop(struct Qdisc * sch)1234 static unsigned int cbq_drop(struct Qdisc* sch)
1235 {
1236 	struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1237 	struct cbq_class *cl, *cl_head;
1238 	int prio;
1239 	unsigned int len;
1240 
1241 	for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1242 		if ((cl_head = q->active[prio]) == NULL)
1243 			continue;
1244 
1245 		cl = cl_head;
1246 		do {
1247 			if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1248 				sch->q.qlen--;
1249 				return len;
1250 			}
1251 		} while ((cl = cl->next_alive) != cl_head);
1252 	}
1253 	return 0;
1254 }
1255 
1256 static void
cbq_reset(struct Qdisc * sch)1257 cbq_reset(struct Qdisc* sch)
1258 {
1259 	struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1260 	struct cbq_class *cl;
1261 	int prio;
1262 	unsigned h;
1263 
1264 	q->activemask = 0;
1265 	q->pmask = 0;
1266 	q->tx_class = NULL;
1267 	q->tx_borrowed = NULL;
1268 	del_timer(&q->wd_timer);
1269 	del_timer(&q->delay_timer);
1270 	q->toplevel = TC_CBQ_MAXLEVEL;
1271 	PSCHED_GET_TIME(q->now);
1272 	q->now_rt = q->now;
1273 
1274 	for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1275 		q->active[prio] = NULL;
1276 
1277 	for (h = 0; h < 16; h++) {
1278 		for (cl = q->classes[h]; cl; cl = cl->next) {
1279 			qdisc_reset(cl->q);
1280 
1281 			cl->next_alive = NULL;
1282 			PSCHED_SET_PASTPERFECT(cl->undertime);
1283 			cl->avgidle = cl->maxidle;
1284 			cl->deficit = cl->quantum;
1285 			cl->cpriority = cl->priority;
1286 		}
1287 	}
1288 	sch->q.qlen = 0;
1289 }
1290 
1291 
cbq_set_lss(struct cbq_class * cl,struct tc_cbq_lssopt * lss)1292 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1293 {
1294 	if (lss->change&TCF_CBQ_LSS_FLAGS) {
1295 		cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1296 		cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1297 	}
1298 	if (lss->change&TCF_CBQ_LSS_EWMA)
1299 		cl->ewma_log = lss->ewma_log;
1300 	if (lss->change&TCF_CBQ_LSS_AVPKT)
1301 		cl->avpkt = lss->avpkt;
1302 	if (lss->change&TCF_CBQ_LSS_MINIDLE)
1303 		cl->minidle = -(long)lss->minidle;
1304 	if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1305 		cl->maxidle = lss->maxidle;
1306 		cl->avgidle = lss->maxidle;
1307 	}
1308 	if (lss->change&TCF_CBQ_LSS_OFFTIME)
1309 		cl->offtime = lss->offtime;
1310 	return 0;
1311 }
1312 
cbq_rmprio(struct cbq_sched_data * q,struct cbq_class * cl)1313 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1314 {
1315 	q->nclasses[cl->priority]--;
1316 	q->quanta[cl->priority] -= cl->weight;
1317 	cbq_normalize_quanta(q, cl->priority);
1318 }
1319 
cbq_addprio(struct cbq_sched_data * q,struct cbq_class * cl)1320 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1321 {
1322 	q->nclasses[cl->priority]++;
1323 	q->quanta[cl->priority] += cl->weight;
1324 	cbq_normalize_quanta(q, cl->priority);
1325 }
1326 
cbq_set_wrr(struct cbq_class * cl,struct tc_cbq_wrropt * wrr)1327 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1328 {
1329 	struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
1330 
1331 	if (wrr->allot)
1332 		cl->allot = wrr->allot;
1333 	if (wrr->weight)
1334 		cl->weight = wrr->weight;
1335 	if (wrr->priority) {
1336 		cl->priority = wrr->priority-1;
1337 		cl->cpriority = cl->priority;
1338 		if (cl->priority >= cl->priority2)
1339 			cl->priority2 = TC_CBQ_MAXPRIO-1;
1340 	}
1341 
1342 	cbq_addprio(q, cl);
1343 	return 0;
1344 }
1345 
cbq_set_overlimit(struct cbq_class * cl,struct tc_cbq_ovl * ovl)1346 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1347 {
1348 	switch (ovl->strategy) {
1349 	case TC_CBQ_OVL_CLASSIC:
1350 		cl->overlimit = cbq_ovl_classic;
1351 		break;
1352 	case TC_CBQ_OVL_DELAY:
1353 		cl->overlimit = cbq_ovl_delay;
1354 		break;
1355 	case TC_CBQ_OVL_LOWPRIO:
1356 		if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1357 		    ovl->priority2-1 <= cl->priority)
1358 			return -EINVAL;
1359 		cl->priority2 = ovl->priority2-1;
1360 		cl->overlimit = cbq_ovl_lowprio;
1361 		break;
1362 	case TC_CBQ_OVL_DROP:
1363 		cl->overlimit = cbq_ovl_drop;
1364 		break;
1365 	case TC_CBQ_OVL_RCLASSIC:
1366 		cl->overlimit = cbq_ovl_rclassic;
1367 		break;
1368 	default:
1369 		return -EINVAL;
1370 	}
1371 	cl->penalty = (ovl->penalty*HZ)/1000;
1372 	return 0;
1373 }
1374 
1375 #ifdef CONFIG_NET_CLS_POLICE
cbq_set_police(struct cbq_class * cl,struct tc_cbq_police * p)1376 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1377 {
1378 	cl->police = p->police;
1379 
1380 	if (cl->q->handle) {
1381 		if (p->police == TC_POLICE_RECLASSIFY)
1382 			cl->q->reshape_fail = cbq_reshape_fail;
1383 		else
1384 			cl->q->reshape_fail = NULL;
1385 	}
1386 	return 0;
1387 }
1388 #endif
1389 
cbq_set_fopt(struct cbq_class * cl,struct tc_cbq_fopt * fopt)1390 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1391 {
1392 	cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1393 	return 0;
1394 }
1395 
cbq_init(struct Qdisc * sch,struct rtattr * opt)1396 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1397 {
1398 	struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
1399 	struct rtattr *tb[TCA_CBQ_MAX];
1400 	struct tc_ratespec *r;
1401 
1402 	if (rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0 ||
1403 	    tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1404 	    RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1405 		return -EINVAL;
1406 
1407 	if (tb[TCA_CBQ_LSSOPT-1] &&
1408 	    RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1409 		return -EINVAL;
1410 
1411 	r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1412 
1413 	MOD_INC_USE_COUNT;
1414 	if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL) {
1415 		MOD_DEC_USE_COUNT;
1416 		return -EINVAL;
1417 	}
1418 
1419 	q->link.refcnt = 1;
1420 	q->link.sibling = &q->link;
1421 	q->link.classid = sch->handle;
1422 	q->link.qdisc = sch;
1423 	if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1424 		q->link.q = &noop_qdisc;
1425 
1426 	q->link.priority = TC_CBQ_MAXPRIO-1;
1427 	q->link.priority2 = TC_CBQ_MAXPRIO-1;
1428 	q->link.cpriority = TC_CBQ_MAXPRIO-1;
1429 	q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1430 	q->link.overlimit = cbq_ovl_classic;
1431 	q->link.allot = psched_mtu(sch->dev);
1432 	q->link.quantum = q->link.allot;
1433 	q->link.weight = q->link.R_tab->rate.rate;
1434 
1435 	q->link.ewma_log = TC_CBQ_DEF_EWMA;
1436 	q->link.avpkt = q->link.allot/2;
1437 	q->link.minidle = -0x7FFFFFFF;
1438 	q->link.stats.lock = &sch->dev->queue_lock;
1439 
1440 	init_timer(&q->wd_timer);
1441 	q->wd_timer.data = (unsigned long)sch;
1442 	q->wd_timer.function = cbq_watchdog;
1443 	init_timer(&q->delay_timer);
1444 	q->delay_timer.data = (unsigned long)sch;
1445 	q->delay_timer.function = cbq_undelay;
1446 	q->toplevel = TC_CBQ_MAXLEVEL;
1447 	PSCHED_GET_TIME(q->now);
1448 	q->now_rt = q->now;
1449 
1450 	cbq_link_class(&q->link);
1451 
1452 	if (tb[TCA_CBQ_LSSOPT-1])
1453 		cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1454 
1455 	cbq_addprio(q, &q->link);
1456 	return 0;
1457 }
1458 
cbq_dump_rate(struct sk_buff * skb,struct cbq_class * cl)1459 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1460 {
1461 	unsigned char	 *b = skb->tail;
1462 
1463 	RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1464 	return skb->len;
1465 
1466 rtattr_failure:
1467 	skb_trim(skb, b - skb->data);
1468 	return -1;
1469 }
1470 
cbq_dump_lss(struct sk_buff * skb,struct cbq_class * cl)1471 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1472 {
1473 	unsigned char	 *b = skb->tail;
1474 	struct tc_cbq_lssopt opt;
1475 
1476 	opt.flags = 0;
1477 	if (cl->borrow == NULL)
1478 		opt.flags |= TCF_CBQ_LSS_BOUNDED;
1479 	if (cl->share == NULL)
1480 		opt.flags |= TCF_CBQ_LSS_ISOLATED;
1481 	opt.ewma_log = cl->ewma_log;
1482 	opt.level = cl->level;
1483 	opt.avpkt = cl->avpkt;
1484 	opt.maxidle = cl->maxidle;
1485 	opt.minidle = (u32)(-cl->minidle);
1486 	opt.offtime = cl->offtime;
1487 	opt.change = ~0;
1488 	RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1489 	return skb->len;
1490 
1491 rtattr_failure:
1492 	skb_trim(skb, b - skb->data);
1493 	return -1;
1494 }
1495 
cbq_dump_wrr(struct sk_buff * skb,struct cbq_class * cl)1496 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1497 {
1498 	unsigned char	 *b = skb->tail;
1499 	struct tc_cbq_wrropt opt;
1500 
1501 	opt.flags = 0;
1502 	opt.allot = cl->allot;
1503 	opt.priority = cl->priority+1;
1504 	opt.cpriority = cl->cpriority+1;
1505 	opt.weight = cl->weight;
1506 	RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1507 	return skb->len;
1508 
1509 rtattr_failure:
1510 	skb_trim(skb, b - skb->data);
1511 	return -1;
1512 }
1513 
cbq_dump_ovl(struct sk_buff * skb,struct cbq_class * cl)1514 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1515 {
1516 	unsigned char	 *b = skb->tail;
1517 	struct tc_cbq_ovl opt;
1518 
1519 	opt.strategy = cl->ovl_strategy;
1520 	opt.priority2 = cl->priority2+1;
1521 	opt.pad = 0;
1522 	opt.penalty = (cl->penalty*1000)/HZ;
1523 	RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1524 	return skb->len;
1525 
1526 rtattr_failure:
1527 	skb_trim(skb, b - skb->data);
1528 	return -1;
1529 }
1530 
cbq_dump_fopt(struct sk_buff * skb,struct cbq_class * cl)1531 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1532 {
1533 	unsigned char	 *b = skb->tail;
1534 	struct tc_cbq_fopt opt;
1535 
1536 	if (cl->split || cl->defmap) {
1537 		opt.split = cl->split ? cl->split->classid : 0;
1538 		opt.defmap = cl->defmap;
1539 		opt.defchange = ~0;
1540 		RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1541 	}
1542 	return skb->len;
1543 
1544 rtattr_failure:
1545 	skb_trim(skb, b - skb->data);
1546 	return -1;
1547 }
1548 
1549 #ifdef CONFIG_NET_CLS_POLICE
cbq_dump_police(struct sk_buff * skb,struct cbq_class * cl)1550 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1551 {
1552 	unsigned char	 *b = skb->tail;
1553 	struct tc_cbq_police opt;
1554 
1555 	if (cl->police) {
1556 		opt.police = cl->police;
1557 		opt.__res1 = 0;
1558 		opt.__res2 = 0;
1559 		RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1560 	}
1561 	return skb->len;
1562 
1563 rtattr_failure:
1564 	skb_trim(skb, b - skb->data);
1565 	return -1;
1566 }
1567 #endif
1568 
cbq_dump_attr(struct sk_buff * skb,struct cbq_class * cl)1569 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1570 {
1571 	if (cbq_dump_lss(skb, cl) < 0 ||
1572 	    cbq_dump_rate(skb, cl) < 0 ||
1573 	    cbq_dump_wrr(skb, cl) < 0 ||
1574 	    cbq_dump_ovl(skb, cl) < 0 ||
1575 #ifdef CONFIG_NET_CLS_POLICE
1576 	    cbq_dump_police(skb, cl) < 0 ||
1577 #endif
1578 	    cbq_dump_fopt(skb, cl) < 0)
1579 		return -1;
1580 	return 0;
1581 }
1582 
cbq_copy_xstats(struct sk_buff * skb,struct tc_cbq_xstats * st)1583 int cbq_copy_xstats(struct sk_buff *skb, struct tc_cbq_xstats *st)
1584 {
1585 	RTA_PUT(skb, TCA_XSTATS, sizeof(*st), st);
1586 	return 0;
1587 
1588 rtattr_failure:
1589 	return -1;
1590 }
1591 
1592 
cbq_dump(struct Qdisc * sch,struct sk_buff * skb)1593 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1594 {
1595 	struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
1596 	unsigned char	 *b = skb->tail;
1597 	struct rtattr *rta;
1598 
1599 	rta = (struct rtattr*)b;
1600 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1601 	if (cbq_dump_attr(skb, &q->link) < 0)
1602 		goto rtattr_failure;
1603 	rta->rta_len = skb->tail - b;
1604 	spin_lock_bh(&sch->dev->queue_lock);
1605 	q->link.xstats.avgidle = q->link.avgidle;
1606 	if (cbq_copy_xstats(skb, &q->link.xstats)) {
1607 		spin_unlock_bh(&sch->dev->queue_lock);
1608 		goto rtattr_failure;
1609 	}
1610 	spin_unlock_bh(&sch->dev->queue_lock);
1611 	return skb->len;
1612 
1613 rtattr_failure:
1614 	skb_trim(skb, b - skb->data);
1615 	return -1;
1616 }
1617 
1618 static int
cbq_dump_class(struct Qdisc * sch,unsigned long arg,struct sk_buff * skb,struct tcmsg * tcm)1619 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1620 	       struct sk_buff *skb, struct tcmsg *tcm)
1621 {
1622 	struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
1623 	struct cbq_class *cl = (struct cbq_class*)arg;
1624 	unsigned char	 *b = skb->tail;
1625 	struct rtattr *rta;
1626 
1627 	if (cl->tparent)
1628 		tcm->tcm_parent = cl->tparent->classid;
1629 	else
1630 		tcm->tcm_parent = TC_H_ROOT;
1631 	tcm->tcm_handle = cl->classid;
1632 	tcm->tcm_info = cl->q->handle;
1633 
1634 	rta = (struct rtattr*)b;
1635 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1636 	if (cbq_dump_attr(skb, cl) < 0)
1637 		goto rtattr_failure;
1638 	rta->rta_len = skb->tail - b;
1639 	cl->stats.qlen = cl->q->q.qlen;
1640 	if (qdisc_copy_stats(skb, &cl->stats))
1641 		goto rtattr_failure;
1642 	spin_lock_bh(&sch->dev->queue_lock);
1643 	cl->xstats.avgidle = cl->avgidle;
1644 	cl->xstats.undertime = 0;
1645 	if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1646 		cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1647 	if (cbq_copy_xstats(skb, &cl->xstats)) {
1648 		spin_unlock_bh(&sch->dev->queue_lock);
1649 		goto rtattr_failure;
1650 	}
1651 	spin_unlock_bh(&sch->dev->queue_lock);
1652 
1653 	return skb->len;
1654 
1655 rtattr_failure:
1656 	skb_trim(skb, b - skb->data);
1657 	return -1;
1658 }
1659 
cbq_graft(struct Qdisc * sch,unsigned long arg,struct Qdisc * new,struct Qdisc ** old)1660 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1661 		     struct Qdisc **old)
1662 {
1663 	struct cbq_class *cl = (struct cbq_class*)arg;
1664 
1665 	if (cl) {
1666 		if (new == NULL) {
1667 			if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)) == NULL)
1668 				return -ENOBUFS;
1669 		} else {
1670 #ifdef CONFIG_NET_CLS_POLICE
1671 			if (cl->police == TC_POLICE_RECLASSIFY)
1672 				new->reshape_fail = cbq_reshape_fail;
1673 #endif
1674 		}
1675 		sch_tree_lock(sch);
1676 		*old = cl->q;
1677 		cl->q = new;
1678 		sch->q.qlen -= (*old)->q.qlen;
1679 		qdisc_reset(*old);
1680 		sch_tree_unlock(sch);
1681 
1682 		return 0;
1683 	}
1684 	return -ENOENT;
1685 }
1686 
1687 static struct Qdisc *
cbq_leaf(struct Qdisc * sch,unsigned long arg)1688 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1689 {
1690 	struct cbq_class *cl = (struct cbq_class*)arg;
1691 
1692 	return cl ? cl->q : NULL;
1693 }
1694 
cbq_get(struct Qdisc * sch,u32 classid)1695 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1696 {
1697 	struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1698 	struct cbq_class *cl = cbq_class_lookup(q, classid);
1699 
1700 	if (cl) {
1701 		cl->refcnt++;
1702 		return (unsigned long)cl;
1703 	}
1704 	return 0;
1705 }
1706 
cbq_destroy_filters(struct cbq_class * cl)1707 static void cbq_destroy_filters(struct cbq_class *cl)
1708 {
1709 	struct tcf_proto *tp;
1710 
1711 	while ((tp = cl->filter_list) != NULL) {
1712 		cl->filter_list = tp->next;
1713 		tcf_destroy(tp);
1714 	}
1715 }
1716 
cbq_destroy_class(struct Qdisc * sch,struct cbq_class * cl)1717 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1718 {
1719 	struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1720 
1721 	BUG_TRAP(!cl->filters);
1722 
1723 	cbq_destroy_filters(cl);
1724 	qdisc_destroy(cl->q);
1725 	qdisc_put_rtab(cl->R_tab);
1726 #ifdef CONFIG_NET_ESTIMATOR
1727 	qdisc_kill_estimator(&cl->stats);
1728 #endif
1729 	if (cl != &q->link)
1730 		kfree(cl);
1731 }
1732 
1733 static void
cbq_destroy(struct Qdisc * sch)1734 cbq_destroy(struct Qdisc* sch)
1735 {
1736 	struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1737 	struct cbq_class *cl;
1738 	unsigned h;
1739 
1740 #ifdef CONFIG_NET_CLS_POLICE
1741 	q->rx_class = NULL;
1742 #endif
1743 	/*
1744 	 * Filters must be destroyed first because we don't destroy the
1745 	 * classes from root to leafs which means that filters can still
1746 	 * be bound to classes which have been destroyed already. --TGR '04
1747 	 */
1748 	for (h = 0; h < 16; h++)
1749 		for (cl = q->classes[h]; cl; cl = cl->next)
1750 			cbq_destroy_filters(cl);
1751 
1752 	for (h = 0; h < 16; h++) {
1753 		struct cbq_class *next;
1754 
1755 		for (cl = q->classes[h]; cl; cl = next) {
1756 			next = cl->next;
1757 			cbq_destroy_class(sch, cl);
1758 		}
1759 	}
1760 
1761 	MOD_DEC_USE_COUNT;
1762 }
1763 
cbq_put(struct Qdisc * sch,unsigned long arg)1764 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1765 {
1766 	struct cbq_class *cl = (struct cbq_class*)arg;
1767 
1768 	if (--cl->refcnt == 0) {
1769 #ifdef CONFIG_NET_CLS_POLICE
1770 		struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1771 
1772 		spin_lock_bh(&sch->dev->queue_lock);
1773 		if (q->rx_class == cl)
1774 			q->rx_class = NULL;
1775 		spin_unlock_bh(&sch->dev->queue_lock);
1776 #endif
1777 
1778 		cbq_destroy_class(sch, cl);
1779 	}
1780 }
1781 
1782 static int
cbq_change_class(struct Qdisc * sch,u32 classid,u32 parentid,struct rtattr ** tca,unsigned long * arg)1783 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1784 		 unsigned long *arg)
1785 {
1786 	int err;
1787 	struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1788 	struct cbq_class *cl = (struct cbq_class*)*arg;
1789 	struct rtattr *opt = tca[TCA_OPTIONS-1];
1790 	struct rtattr *tb[TCA_CBQ_MAX];
1791 	struct cbq_class *parent;
1792 	struct qdisc_rate_table *rtab = NULL;
1793 
1794 	if (opt==NULL ||
1795 	    rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)))
1796 		return -EINVAL;
1797 
1798 	if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1799 	    RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1800 		return -EINVAL;
1801 
1802 	if (tb[TCA_CBQ_FOPT-1] &&
1803 	    RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1804 		return -EINVAL;
1805 
1806 	if (tb[TCA_CBQ_RATE-1] &&
1807 	    RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1808 			return -EINVAL;
1809 
1810 	if (tb[TCA_CBQ_LSSOPT-1] &&
1811 	    RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1812 			return -EINVAL;
1813 
1814 	if (tb[TCA_CBQ_WRROPT-1] &&
1815 	    RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1816 			return -EINVAL;
1817 
1818 #ifdef CONFIG_NET_CLS_POLICE
1819 	if (tb[TCA_CBQ_POLICE-1] &&
1820 	    RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1821 			return -EINVAL;
1822 #endif
1823 
1824 	if (cl) {
1825 		/* Check parent */
1826 		if (parentid) {
1827 			if (cl->tparent && cl->tparent->classid != parentid)
1828 				return -EINVAL;
1829 			if (!cl->tparent && parentid != TC_H_ROOT)
1830 				return -EINVAL;
1831 		}
1832 
1833 		if (tb[TCA_CBQ_RATE-1]) {
1834 			rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1835 			if (rtab == NULL)
1836 				return -EINVAL;
1837 		}
1838 
1839 		/* Change class parameters */
1840 		sch_tree_lock(sch);
1841 
1842 		if (cl->next_alive != NULL)
1843 			cbq_deactivate_class(cl);
1844 
1845 		if (rtab) {
1846 			rtab = xchg(&cl->R_tab, rtab);
1847 			qdisc_put_rtab(rtab);
1848 		}
1849 
1850 		if (tb[TCA_CBQ_LSSOPT-1])
1851 			cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1852 
1853 		if (tb[TCA_CBQ_WRROPT-1]) {
1854 			cbq_rmprio(q, cl);
1855 			cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1856 		}
1857 
1858 		if (tb[TCA_CBQ_OVL_STRATEGY-1])
1859 			cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1860 
1861 #ifdef CONFIG_NET_CLS_POLICE
1862 		if (tb[TCA_CBQ_POLICE-1])
1863 			cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1864 #endif
1865 
1866 		if (tb[TCA_CBQ_FOPT-1])
1867 			cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1868 
1869 		if (cl->q->q.qlen)
1870 			cbq_activate_class(cl);
1871 
1872 		sch_tree_unlock(sch);
1873 
1874 #ifdef CONFIG_NET_ESTIMATOR
1875 		if (tca[TCA_RATE-1]) {
1876 			qdisc_kill_estimator(&cl->stats);
1877 			qdisc_new_estimator(&cl->stats, tca[TCA_RATE-1]);
1878 		}
1879 #endif
1880 		return 0;
1881 	}
1882 
1883 	if (parentid == TC_H_ROOT)
1884 		return -EINVAL;
1885 
1886 	if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1887 	    tb[TCA_CBQ_LSSOPT-1] == NULL)
1888 		return -EINVAL;
1889 
1890 	rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1891 	if (rtab == NULL)
1892 		return -EINVAL;
1893 
1894 	if (classid) {
1895 		err = -EINVAL;
1896 		if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1897 			goto failure;
1898 	} else {
1899 		int i;
1900 		classid = TC_H_MAKE(sch->handle,0x8000);
1901 
1902 		for (i=0; i<0x8000; i++) {
1903 			if (++q->hgenerator >= 0x8000)
1904 				q->hgenerator = 1;
1905 			if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1906 				break;
1907 		}
1908 		err = -ENOSR;
1909 		if (i >= 0x8000)
1910 			goto failure;
1911 		classid = classid|q->hgenerator;
1912 	}
1913 
1914 	parent = &q->link;
1915 	if (parentid) {
1916 		parent = cbq_class_lookup(q, parentid);
1917 		err = -EINVAL;
1918 		if (parent == NULL)
1919 			goto failure;
1920 	}
1921 
1922 	err = -ENOBUFS;
1923 	cl = kmalloc(sizeof(*cl), GFP_KERNEL);
1924 	if (cl == NULL)
1925 		goto failure;
1926 	memset(cl, 0, sizeof(*cl));
1927 	cl->R_tab = rtab;
1928 	rtab = NULL;
1929 	cl->refcnt = 1;
1930 	if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1931 		cl->q = &noop_qdisc;
1932 	cl->classid = classid;
1933 	cl->tparent = parent;
1934 	cl->qdisc = sch;
1935 	cl->allot = parent->allot;
1936 	cl->quantum = cl->allot;
1937 	cl->weight = cl->R_tab->rate.rate;
1938 	cl->stats.lock = &sch->dev->queue_lock;
1939 
1940 	sch_tree_lock(sch);
1941 	cbq_link_class(cl);
1942 	cl->borrow = cl->tparent;
1943 	if (cl->tparent != &q->link)
1944 		cl->share = cl->tparent;
1945 	cbq_adjust_levels(parent);
1946 	cl->minidle = -0x7FFFFFFF;
1947 	cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1948 	cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1949 	if (cl->ewma_log==0)
1950 		cl->ewma_log = q->link.ewma_log;
1951 	if (cl->maxidle==0)
1952 		cl->maxidle = q->link.maxidle;
1953 	if (cl->avpkt==0)
1954 		cl->avpkt = q->link.avpkt;
1955 	cl->overlimit = cbq_ovl_classic;
1956 	if (tb[TCA_CBQ_OVL_STRATEGY-1])
1957 		cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1958 #ifdef CONFIG_NET_CLS_POLICE
1959 	if (tb[TCA_CBQ_POLICE-1])
1960 		cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1961 #endif
1962 	if (tb[TCA_CBQ_FOPT-1])
1963 		cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1964 	sch_tree_unlock(sch);
1965 
1966 #ifdef CONFIG_NET_ESTIMATOR
1967 	if (tca[TCA_RATE-1])
1968 		qdisc_new_estimator(&cl->stats, tca[TCA_RATE-1]);
1969 #endif
1970 
1971 	*arg = (unsigned long)cl;
1972 	return 0;
1973 
1974 failure:
1975 	qdisc_put_rtab(rtab);
1976 	return err;
1977 }
1978 
cbq_delete(struct Qdisc * sch,unsigned long arg)1979 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1980 {
1981 	struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1982 	struct cbq_class *cl = (struct cbq_class*)arg;
1983 
1984 	if (cl->filters || cl->children || cl == &q->link)
1985 		return -EBUSY;
1986 
1987 	sch_tree_lock(sch);
1988 
1989 	if (cl->next_alive)
1990 		cbq_deactivate_class(cl);
1991 
1992 	if (q->tx_borrowed == cl)
1993 		q->tx_borrowed = q->tx_class;
1994 	if (q->tx_class == cl) {
1995 		q->tx_class = NULL;
1996 		q->tx_borrowed = NULL;
1997 	}
1998 #ifdef CONFIG_NET_CLS_POLICE
1999 	if (q->rx_class == cl)
2000 		q->rx_class = NULL;
2001 #endif
2002 
2003 	cbq_unlink_class(cl);
2004 	cbq_adjust_levels(cl->tparent);
2005 	cl->defmap = 0;
2006 	cbq_sync_defmap(cl);
2007 
2008 	cbq_rmprio(q, cl);
2009 	sch_tree_unlock(sch);
2010 
2011 	if (--cl->refcnt == 0)
2012 		cbq_destroy_class(sch, cl);
2013 
2014 	return 0;
2015 }
2016 
cbq_find_tcf(struct Qdisc * sch,unsigned long arg)2017 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2018 {
2019 	struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
2020 	struct cbq_class *cl = (struct cbq_class *)arg;
2021 
2022 	if (cl == NULL)
2023 		cl = &q->link;
2024 
2025 	return &cl->filter_list;
2026 }
2027 
cbq_bind_filter(struct Qdisc * sch,unsigned long parent,u32 classid)2028 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2029 				     u32 classid)
2030 {
2031 	struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
2032 	struct cbq_class *p = (struct cbq_class*)parent;
2033 	struct cbq_class *cl = cbq_class_lookup(q, classid);
2034 
2035 	if (cl) {
2036 		if (p && p->level <= cl->level)
2037 			return 0;
2038 		cl->filters++;
2039 		return (unsigned long)cl;
2040 	}
2041 	return 0;
2042 }
2043 
cbq_unbind_filter(struct Qdisc * sch,unsigned long arg)2044 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2045 {
2046 	struct cbq_class *cl = (struct cbq_class*)arg;
2047 
2048 	cl->filters--;
2049 }
2050 
cbq_walk(struct Qdisc * sch,struct qdisc_walker * arg)2051 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2052 {
2053 	struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
2054 	unsigned h;
2055 
2056 	if (arg->stop)
2057 		return;
2058 
2059 	for (h = 0; h < 16; h++) {
2060 		struct cbq_class *cl;
2061 
2062 		for (cl = q->classes[h]; cl; cl = cl->next) {
2063 			if (arg->count < arg->skip) {
2064 				arg->count++;
2065 				continue;
2066 			}
2067 			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2068 				arg->stop = 1;
2069 				return;
2070 			}
2071 			arg->count++;
2072 		}
2073 	}
2074 }
2075 
2076 static struct Qdisc_class_ops cbq_class_ops =
2077 {
2078 	cbq_graft,
2079 	cbq_leaf,
2080 	cbq_get,
2081 	cbq_put,
2082 	cbq_change_class,
2083 	cbq_delete,
2084 	cbq_walk,
2085 
2086 	cbq_find_tcf,
2087 	cbq_bind_filter,
2088 	cbq_unbind_filter,
2089 
2090 	cbq_dump_class,
2091 };
2092 
2093 struct Qdisc_ops cbq_qdisc_ops =
2094 {
2095 	NULL,
2096 	&cbq_class_ops,
2097 	"cbq",
2098 	sizeof(struct cbq_sched_data),
2099 
2100 	cbq_enqueue,
2101 	cbq_dequeue,
2102 	cbq_requeue,
2103 	cbq_drop,
2104 
2105 	cbq_init,
2106 	cbq_reset,
2107 	cbq_destroy,
2108 	NULL /* cbq_change */,
2109 
2110 	cbq_dump,
2111 };
2112 
2113 #ifdef MODULE
init_module(void)2114 int init_module(void)
2115 {
2116 	return register_qdisc(&cbq_qdisc_ops);
2117 }
2118 
cleanup_module(void)2119 void cleanup_module(void)
2120 {
2121 	unregister_qdisc(&cbq_qdisc_ops);
2122 }
2123 #endif
2124 MODULE_LICENSE("GPL");
2125