1 /*
2 * net/sched/sch_netem.c Network emulator
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Many of the algorithms and ideas for this came from
10 * NIST Net which is not copyrighted.
11 *
12 * Authors: Stephen Hemminger <shemminger@osdl.org>
13 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
14 */
15
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <asm/bitops.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/errno.h>
22 #include <linux/netdevice.h>
23 #include <linux/skbuff.h>
24 #include <linux/rtnetlink.h>
25 #include <linux/init.h>
26
27 #include <net/pkt_sched.h>
28
29 #define qdisc_priv(q) ((void *)(q->data))
30
31 /* Network Emulation Queuing algorithm.
32 ====================================
33
34 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
35 Network Emulation Tool
36 [2] Luigi Rizzo, DummyNet for FreeBSD
37
38 ----------------------------------------------------------------
39
40 This started out as a simple way to delay outgoing packets to
41 test TCP but has grown to include most of the functionality
42 of a full blown network emulator like NISTnet. It can delay
43 packets and add random jitter (and correlation). The random
44 distribution can be loaded from a table as well to provide
45 normal, Pareto, or experimental curves. Packet loss,
46 duplication, and reordering can also be emulated.
47
48 This qdisc does not do classification that can be handled in
49 layering other disciplines. It does not need to do bandwidth
50 control either since that can be handled by using token
51 bucket or other rate control.
52
53 The simulator is limited by the Linux timer resolution
54 and will create packet bursts on the HZ boundary (1ms).
55 */
56
57 struct netem_sched_data {
58 struct Qdisc *qdisc;
59 struct sk_buff_head delayed;
60 struct timer_list timer;
61
62 u32 latency;
63 u32 loss;
64 u32 limit;
65 u32 counter;
66 u32 gap;
67 u32 jitter;
68 u32 duplicate;
69
70 struct crndstate {
71 unsigned long last;
72 unsigned long rho;
73 } delay_cor, loss_cor, dup_cor;
74
75 struct disttable {
76 u32 size;
77 s16 table[0];
78 } *delay_dist;
79 };
80
81 /* Time stamp put into socket buffer control block */
82 struct netem_skb_cb {
83 psched_time_t time_to_send;
84 };
85
86 /* init_crandom - initialize correlated random number generator
87 * Use entropy source for initial seed.
88 */
init_crandom(struct crndstate * state,unsigned long rho)89 static void init_crandom(struct crndstate *state, unsigned long rho)
90 {
91 state->rho = rho;
92 state->last = net_random();
93 }
94
95 /* get_crandom - correlated random number generator
96 * Next number depends on last value.
97 * rho is scaled to avoid floating point.
98 */
get_crandom(struct crndstate * state)99 static unsigned long get_crandom(struct crndstate *state)
100 {
101 u64 value, rho;
102 unsigned long answer;
103
104 if (state->rho == 0) /* no correllation */
105 return net_random();
106
107 value = net_random();
108 rho = (u64)state->rho + 1;
109 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
110 state->last = answer;
111 return answer;
112 }
113
114 /* tabledist - return a pseudo-randomly distributed value with mean mu and
115 * std deviation sigma. Uses table lookup to approximate the desired
116 * distribution, and a uniformly-distributed pseudo-random source.
117 */
tabledist(unsigned long mu,long sigma,struct crndstate * state,const struct disttable * dist)118 static long tabledist(unsigned long mu, long sigma,
119 struct crndstate *state, const struct disttable *dist)
120 {
121 long t, x;
122 unsigned long rnd;
123
124 if (sigma == 0)
125 return mu;
126
127 rnd = get_crandom(state);
128
129 /* default uniform distribution */
130 if (dist == NULL)
131 return (rnd % (2*sigma)) - sigma + mu;
132
133 t = dist->table[rnd % dist->size];
134 x = (sigma % NETEM_DIST_SCALE) * t;
135 if (x >= 0)
136 x += NETEM_DIST_SCALE/2;
137 else
138 x -= NETEM_DIST_SCALE/2;
139
140 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
141 }
142
143 /* Put skb in the private delayed queue. */
delay_skb(struct Qdisc * sch,struct sk_buff * skb)144 static int delay_skb(struct Qdisc *sch, struct sk_buff *skb)
145 {
146 struct netem_sched_data *q = qdisc_priv(sch);
147 struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
148 psched_tdiff_t td;
149 psched_time_t now;
150
151 PSCHED_GET_TIME(now);
152 td = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist);
153 PSCHED_TADD2(now, td, cb->time_to_send);
154
155 /* Always queue at tail to keep packets in order */
156 if (likely(q->delayed.qlen < q->limit)) {
157 __skb_queue_tail(&q->delayed, skb);
158 sch->stats.bytes += skb->len;
159 sch->stats.packets++;
160
161 if (!timer_pending(&q->timer)) {
162 q->timer.expires = jiffies + PSCHED_US2JIFFIE(td);
163 add_timer(&q->timer);
164 }
165 return NET_XMIT_SUCCESS;
166 }
167
168 sch->stats.drops++;
169 kfree_skb(skb);
170 return NET_XMIT_DROP;
171 }
172
netem_enqueue(struct sk_buff * skb,struct Qdisc * sch)173 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
174 {
175 struct netem_sched_data *q = qdisc_priv(sch);
176
177 pr_debug("netem_enqueue skb=%p @%lu\n", skb, jiffies);
178
179 /* Random packet drop 0 => none, ~0 => all */
180 if (q->loss && q->loss >= get_crandom(&q->loss_cor)) {
181 pr_debug("netem_enqueue: random loss\n");
182 sch->stats.drops++;
183 kfree_skb(skb);
184 return 0; /* lie about loss so TCP doesn't know */
185 }
186
187 /* Random duplication */
188 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) {
189 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
190
191 pr_debug("netem_enqueue: dup %p\n", skb2);
192 if (skb2)
193 delay_skb(sch, skb2);
194 }
195
196 /* If doing simple delay then gap == 0 so all packets
197 * go into the delayed holding queue
198 * otherwise if doing out of order only "1 out of gap"
199 * packets will be delayed.
200 */
201 if (q->counter < q->gap) {
202 int ret;
203
204 ++q->counter;
205 ret = q->qdisc->enqueue(skb, q->qdisc);
206 if (likely(ret == NET_XMIT_SUCCESS)) {
207 sch->q.qlen++;
208 sch->stats.bytes += skb->len;
209 sch->stats.packets++;
210 } else
211 sch->stats.drops++;
212 return ret;
213 }
214
215 q->counter = 0;
216
217 return delay_skb(sch, skb);
218 }
219
220 /* Requeue packets but don't change time stamp */
netem_requeue(struct sk_buff * skb,struct Qdisc * sch)221 static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch)
222 {
223 struct netem_sched_data *q = qdisc_priv(sch);
224 int ret;
225
226 if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0)
227 sch->q.qlen++;
228
229 return ret;
230 }
231
netem_drop(struct Qdisc * sch)232 static unsigned int netem_drop(struct Qdisc* sch)
233 {
234 struct netem_sched_data *q = qdisc_priv(sch);
235 unsigned int len;
236
237 if ((len = q->qdisc->ops->drop(q->qdisc)) != 0) {
238 sch->q.qlen--;
239 sch->stats.drops++;
240 }
241 return len;
242 }
243
244 /* Dequeue packet.
245 * Move all packets that are ready to send from the delay holding
246 * list to the underlying qdisc, then just call dequeue
247 */
netem_dequeue(struct Qdisc * sch)248 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
249 {
250 struct netem_sched_data *q = qdisc_priv(sch);
251 struct sk_buff *skb;
252
253 skb = q->qdisc->dequeue(q->qdisc);
254 if (skb)
255 sch->q.qlen--;
256 return skb;
257 }
258
netem_watchdog(unsigned long arg)259 static void netem_watchdog(unsigned long arg)
260 {
261 struct Qdisc *sch = (struct Qdisc *)arg;
262 struct netem_sched_data *q = qdisc_priv(sch);
263 struct net_device *dev = sch->dev;
264 struct sk_buff *skb;
265 psched_time_t now;
266
267 pr_debug("netem_watchdog: fired @%lu\n", jiffies);
268
269 spin_lock_bh(&dev->queue_lock);
270 PSCHED_GET_TIME(now);
271
272 while ((skb = skb_peek(&q->delayed)) != NULL) {
273 const struct netem_skb_cb *cb
274 = (const struct netem_skb_cb *)skb->cb;
275 long delay
276 = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
277 pr_debug("netem_watchdog: skb %p@%lu %ld\n",
278 skb, jiffies, delay);
279
280 /* if more time remaining? */
281 if (delay > 0) {
282 mod_timer(&q->timer, jiffies + delay);
283 break;
284 }
285 __skb_unlink(skb, &q->delayed);
286
287 if (q->qdisc->enqueue(skb, q->qdisc))
288 sch->stats.drops++;
289 else
290 sch->q.qlen++;
291 }
292 qdisc_run(dev);
293 spin_unlock_bh(&dev->queue_lock);
294 }
295
netem_reset(struct Qdisc * sch)296 static void netem_reset(struct Qdisc *sch)
297 {
298 struct netem_sched_data *q = qdisc_priv(sch);
299
300 qdisc_reset(q->qdisc);
301 skb_queue_purge(&q->delayed);
302
303 sch->q.qlen = 0;
304 del_timer_sync(&q->timer);
305 }
306
set_fifo_limit(struct Qdisc * q,int limit)307 static int set_fifo_limit(struct Qdisc *q, int limit)
308 {
309 struct rtattr *rta;
310 int ret = -ENOMEM;
311
312 rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
313 if (rta) {
314 rta->rta_type = RTM_NEWQDISC;
315 rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt));
316 ((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
317
318 ret = q->ops->change(q, rta);
319 kfree(rta);
320 }
321 return ret;
322 }
323
324 /*
325 * Distribution data is a variable size payload containing
326 * signed 16 bit values.
327 */
get_dist_table(struct Qdisc * sch,const struct rtattr * attr)328 static int get_dist_table(struct Qdisc *sch, const struct rtattr *attr)
329 {
330 struct netem_sched_data *q = qdisc_priv(sch);
331 unsigned long n = RTA_PAYLOAD(attr)/sizeof(__s16);
332 const __s16 *data = RTA_DATA(attr);
333 struct disttable *d;
334 int i;
335
336 if (n > 65536)
337 return -EINVAL;
338
339 d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL);
340 if (!d)
341 return -ENOMEM;
342
343 d->size = n;
344 for (i = 0; i < n; i++)
345 d->table[i] = data[i];
346
347 spin_lock_bh(&sch->dev->queue_lock);
348 d = xchg(&q->delay_dist, d);
349 spin_unlock_bh(&sch->dev->queue_lock);
350
351 kfree(d);
352 return 0;
353 }
354
get_correlation(struct Qdisc * sch,const struct rtattr * attr)355 static int get_correlation(struct Qdisc *sch, const struct rtattr *attr)
356 {
357 struct netem_sched_data *q = qdisc_priv(sch);
358 const struct tc_netem_corr *c = RTA_DATA(attr);
359
360 if (RTA_PAYLOAD(attr) != sizeof(*c))
361 return -EINVAL;
362
363 init_crandom(&q->delay_cor, c->delay_corr);
364 init_crandom(&q->loss_cor, c->loss_corr);
365 init_crandom(&q->dup_cor, c->dup_corr);
366 return 0;
367 }
368
netem_change(struct Qdisc * sch,struct rtattr * opt)369 static int netem_change(struct Qdisc *sch, struct rtattr *opt)
370 {
371 struct netem_sched_data *q = qdisc_priv(sch);
372 struct tc_netem_qopt *qopt;
373 int ret;
374
375 if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
376 return -EINVAL;
377
378 qopt = RTA_DATA(opt);
379 ret = set_fifo_limit(q->qdisc, qopt->limit);
380 if (ret) {
381 pr_debug("netem: can't set fifo limit\n");
382 return ret;
383 }
384
385 q->latency = qopt->latency;
386 q->jitter = qopt->jitter;
387 q->limit = qopt->limit;
388 q->gap = qopt->gap;
389 q->loss = qopt->loss;
390 q->duplicate = qopt->duplicate;
391
392 /* Handle nested options after initial queue options.
393 * Should have put all options in nested format but too late now.
394 */
395 if (RTA_PAYLOAD(opt) > sizeof(*qopt)) {
396 struct rtattr *tb[TCA_NETEM_MAX];
397 if (rtattr_parse(tb, TCA_NETEM_MAX,
398 RTA_DATA(opt) + sizeof(*qopt),
399 RTA_PAYLOAD(opt) - sizeof(*qopt)))
400 return -EINVAL;
401
402 if (tb[TCA_NETEM_CORR-1]) {
403 ret = get_correlation(sch, tb[TCA_NETEM_CORR-1]);
404 if (ret)
405 return ret;
406 }
407
408 if (tb[TCA_NETEM_DELAY_DIST-1]) {
409 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST-1]);
410 if (ret)
411 return ret;
412 }
413 }
414
415
416 return 0;
417 }
418
netem_init(struct Qdisc * sch,struct rtattr * opt)419 static int netem_init(struct Qdisc *sch, struct rtattr *opt)
420 {
421 struct netem_sched_data *q = qdisc_priv(sch);
422 int ret;
423
424 if (!opt)
425 return -EINVAL;
426
427 MOD_INC_USE_COUNT;
428 skb_queue_head_init(&q->delayed);
429 init_timer(&q->timer);
430 q->timer.function = netem_watchdog;
431 q->timer.data = (unsigned long) sch;
432 q->counter = 0;
433
434 q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
435 if (!q->qdisc) {
436 pr_debug("netem: qdisc create failed\n");
437 return -ENOMEM;
438 }
439
440 ret = netem_change(sch, opt);
441 if (ret) {
442 pr_debug("netem: change failed\n");
443 qdisc_destroy(q->qdisc);
444 MOD_DEC_USE_COUNT;
445 }
446 return ret;
447 }
448
netem_destroy(struct Qdisc * sch)449 static void netem_destroy(struct Qdisc *sch)
450 {
451 struct netem_sched_data *q = qdisc_priv(sch);
452
453 del_timer_sync(&q->timer);
454 qdisc_destroy(q->qdisc);
455 kfree(q->delay_dist);
456 MOD_DEC_USE_COUNT;
457 }
458
netem_dump(struct Qdisc * sch,struct sk_buff * skb)459 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
460 {
461 const struct netem_sched_data *q = qdisc_priv(sch);
462 unsigned char *b = skb->tail;
463 struct rtattr *rta = (struct rtattr *) b;
464 struct tc_netem_qopt qopt;
465 struct tc_netem_corr cor;
466
467 qopt.latency = q->latency;
468 qopt.jitter = q->jitter;
469 qopt.limit = q->limit;
470 qopt.loss = q->loss;
471 qopt.gap = q->gap;
472 qopt.duplicate = q->duplicate;
473 RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
474
475 cor.delay_corr = q->delay_cor.rho;
476 cor.loss_corr = q->loss_cor.rho;
477 cor.dup_corr = q->dup_cor.rho;
478 RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
479 rta->rta_len = skb->tail - b;
480
481 return skb->len;
482
483 rtattr_failure:
484 skb_trim(skb, b - skb->data);
485 return -1;
486 }
487
netem_dump_class(struct Qdisc * sch,unsigned long cl,struct sk_buff * skb,struct tcmsg * tcm)488 static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
489 struct sk_buff *skb, struct tcmsg *tcm)
490 {
491 struct netem_sched_data *q = qdisc_priv(sch);
492
493 if (cl != 1) /* only one class */
494 return -ENOENT;
495
496 tcm->tcm_handle |= TC_H_MIN(1);
497 tcm->tcm_info = q->qdisc->handle;
498
499 return 0;
500 }
501
netem_graft(struct Qdisc * sch,unsigned long arg,struct Qdisc * new,struct Qdisc ** old)502 static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
503 struct Qdisc **old)
504 {
505 struct netem_sched_data *q = qdisc_priv(sch);
506
507 if (new == NULL)
508 new = &noop_qdisc;
509
510 sch_tree_lock(sch);
511 *old = xchg(&q->qdisc, new);
512 qdisc_reset(*old);
513 sch->q.qlen = 0;
514 sch_tree_unlock(sch);
515
516 return 0;
517 }
518
netem_leaf(struct Qdisc * sch,unsigned long arg)519 static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
520 {
521 struct netem_sched_data *q = qdisc_priv(sch);
522 return q->qdisc;
523 }
524
netem_get(struct Qdisc * sch,u32 classid)525 static unsigned long netem_get(struct Qdisc *sch, u32 classid)
526 {
527 return 1;
528 }
529
netem_put(struct Qdisc * sch,unsigned long arg)530 static void netem_put(struct Qdisc *sch, unsigned long arg)
531 {
532 }
533
netem_change_class(struct Qdisc * sch,u32 classid,u32 parentid,struct rtattr ** tca,unsigned long * arg)534 static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
535 struct rtattr **tca, unsigned long *arg)
536 {
537 return -ENOSYS;
538 }
539
netem_delete(struct Qdisc * sch,unsigned long arg)540 static int netem_delete(struct Qdisc *sch, unsigned long arg)
541 {
542 return -ENOSYS;
543 }
544
netem_walk(struct Qdisc * sch,struct qdisc_walker * walker)545 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
546 {
547 if (!walker->stop) {
548 if (walker->count >= walker->skip)
549 if (walker->fn(sch, 1, walker) < 0) {
550 walker->stop = 1;
551 return;
552 }
553 walker->count++;
554 }
555 }
556
netem_find_tcf(struct Qdisc * sch,unsigned long cl)557 static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl)
558 {
559 return NULL;
560 }
561
562 static struct Qdisc_class_ops netem_class_ops = {
563 .graft = netem_graft,
564 .leaf = netem_leaf,
565 .get = netem_get,
566 .put = netem_put,
567 .change = netem_change_class,
568 .delete = netem_delete,
569 .walk = netem_walk,
570 .tcf_chain = netem_find_tcf,
571 .dump = netem_dump_class,
572 };
573
574 static struct Qdisc_ops netem_qdisc_ops = {
575 .id = "netem",
576 .cl_ops = &netem_class_ops,
577 .priv_size = sizeof(struct netem_sched_data),
578 .enqueue = netem_enqueue,
579 .dequeue = netem_dequeue,
580 .requeue = netem_requeue,
581 .drop = netem_drop,
582 .init = netem_init,
583 .reset = netem_reset,
584 .destroy = netem_destroy,
585 .change = netem_change,
586 .dump = netem_dump,
587 };
588
589
netem_module_init(void)590 static int __init netem_module_init(void)
591 {
592 return register_qdisc(&netem_qdisc_ops);
593 }
netem_module_exit(void)594 static void __exit netem_module_exit(void)
595 {
596 unregister_qdisc(&netem_qdisc_ops);
597 }
598 module_init(netem_module_init)
599 module_exit(netem_module_exit)
600 MODULE_LICENSE("GPL");
601