]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - net/sched/sch_netem.c
bna: fix sparse warnings/errors
[mirror_ubuntu-hirsute-kernel.git] / net / sched / sch_netem.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_netem.c Network emulator
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
798b6b19 7 * 2 of the License.
1da177e4
LT
8 *
9 * Many of the algorithms and ideas for this came from
10297b99 10 * NIST Net which is not copyrighted.
1da177e4
LT
11 *
12 * Authors: Stephen Hemminger <shemminger@osdl.org>
13 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
14 */
15
b7f080cf 16#include <linux/mm.h>
1da177e4 17#include <linux/module.h>
5a0e3ad6 18#include <linux/slab.h>
1da177e4
LT
19#include <linux/types.h>
20#include <linux/kernel.h>
21#include <linux/errno.h>
1da177e4 22#include <linux/skbuff.h>
78776d3f 23#include <linux/vmalloc.h>
1da177e4 24#include <linux/rtnetlink.h>
90b41a1c 25#include <linux/reciprocal_div.h>
1da177e4 26
dc5fc579 27#include <net/netlink.h>
1da177e4
LT
28#include <net/pkt_sched.h>
29
250a65f7 30#define VERSION "1.3"
eb229c4c 31
1da177e4
LT
32/* Network Emulation Queuing algorithm.
33 ====================================
34
35 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
36 Network Emulation Tool
37 [2] Luigi Rizzo, DummyNet for FreeBSD
38
39 ----------------------------------------------------------------
40
41 This started out as a simple way to delay outgoing packets to
42 test TCP but has grown to include most of the functionality
43 of a full blown network emulator like NISTnet. It can delay
44 packets and add random jitter (and correlation). The random
45 distribution can be loaded from a table as well to provide
46 normal, Pareto, or experimental curves. Packet loss,
47 duplication, and reordering can also be emulated.
48
49 This qdisc does not do classification that can be handled in
50 layering other disciplines. It does not need to do bandwidth
51 control either since that can be handled by using token
52 bucket or other rate control.
661b7972 53
54 Correlated Loss Generator models
55
56 Added generation of correlated loss according to the
57 "Gilbert-Elliot" model, a 4-state markov model.
58
59 References:
60 [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
61 [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
62 and intuitive loss model for packet networks and its implementation
63 in the Netem module in the Linux kernel", available in [1]
64
65 Authors: Stefano Salsano <stefano.salsano at uniroma2.it
66 Fabio Ludovici <fabio.ludovici at yahoo.it>
1da177e4
LT
67*/
68
69struct netem_sched_data {
50612537
ED
70 /* internal t(ime)fifo qdisc uses sch->q and sch->limit */
71
72 /* optional qdisc for classful handling (NULL at netem init) */
1da177e4 73 struct Qdisc *qdisc;
50612537 74
59cb5c67 75 struct qdisc_watchdog watchdog;
1da177e4 76
b407621c
SH
77 psched_tdiff_t latency;
78 psched_tdiff_t jitter;
79
1da177e4
LT
80 u32 loss;
81 u32 limit;
82 u32 counter;
83 u32 gap;
1da177e4 84 u32 duplicate;
0dca51d3 85 u32 reorder;
c865e5d9 86 u32 corrupt;
7bc0f28c 87 u32 rate;
90b41a1c
HPP
88 s32 packet_overhead;
89 u32 cell_size;
90 u32 cell_size_reciprocal;
91 s32 cell_overhead;
1da177e4
LT
92
93 struct crndstate {
b407621c
SH
94 u32 last;
95 u32 rho;
c865e5d9 96 } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
1da177e4
LT
97
98 struct disttable {
99 u32 size;
100 s16 table[0];
101 } *delay_dist;
661b7972 102
103 enum {
104 CLG_RANDOM,
105 CLG_4_STATES,
106 CLG_GILB_ELL,
107 } loss_model;
108
109 /* Correlated Loss Generation models */
110 struct clgstate {
111 /* state of the Markov chain */
112 u8 state;
113
114 /* 4-states and Gilbert-Elliot models */
115 u32 a1; /* p13 for 4-states or p for GE */
116 u32 a2; /* p31 for 4-states or r for GE */
117 u32 a3; /* p32 for 4-states or h for GE */
118 u32 a4; /* p14 for 4-states or 1-k for GE */
119 u32 a5; /* p23 used only in 4-states */
120 } clg;
121
1da177e4
LT
122};
123
50612537
ED
124/* Time stamp put into socket buffer control block
125 * Only valid when skbs are in our internal t(ime)fifo queue.
126 */
1da177e4
LT
127struct netem_skb_cb {
128 psched_time_t time_to_send;
129};
130
5f86173b
JK
131static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
132{
175f9c1b
JK
133 BUILD_BUG_ON(sizeof(skb->cb) <
134 sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb));
135 return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
5f86173b
JK
136}
137
1da177e4
LT
138/* init_crandom - initialize correlated random number generator
139 * Use entropy source for initial seed.
140 */
141static void init_crandom(struct crndstate *state, unsigned long rho)
142{
143 state->rho = rho;
144 state->last = net_random();
145}
146
147/* get_crandom - correlated random number generator
148 * Next number depends on last value.
149 * rho is scaled to avoid floating point.
150 */
b407621c 151static u32 get_crandom(struct crndstate *state)
1da177e4
LT
152{
153 u64 value, rho;
154 unsigned long answer;
155
bb2f8cc0 156 if (state->rho == 0) /* no correlation */
1da177e4
LT
157 return net_random();
158
159 value = net_random();
160 rho = (u64)state->rho + 1;
161 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
162 state->last = answer;
163 return answer;
164}
165
661b7972 166/* loss_4state - 4-state model loss generator
167 * Generates losses according to the 4-state Markov chain adopted in
168 * the GI (General and Intuitive) loss model.
169 */
170static bool loss_4state(struct netem_sched_data *q)
171{
172 struct clgstate *clg = &q->clg;
173 u32 rnd = net_random();
174
175 /*
25985edc 176 * Makes a comparison between rnd and the transition
661b7972 177 * probabilities outgoing from the current state, then decides the
178 * next state and if the next packet has to be transmitted or lost.
179 * The four states correspond to:
180 * 1 => successfully transmitted packets within a gap period
181 * 4 => isolated losses within a gap period
182 * 3 => lost packets within a burst period
183 * 2 => successfully transmitted packets within a burst period
184 */
185 switch (clg->state) {
186 case 1:
187 if (rnd < clg->a4) {
188 clg->state = 4;
189 return true;
190 } else if (clg->a4 < rnd && rnd < clg->a1) {
191 clg->state = 3;
192 return true;
193 } else if (clg->a1 < rnd)
194 clg->state = 1;
195
196 break;
197 case 2:
198 if (rnd < clg->a5) {
199 clg->state = 3;
200 return true;
201 } else
202 clg->state = 2;
203
204 break;
205 case 3:
206 if (rnd < clg->a3)
207 clg->state = 2;
208 else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
209 clg->state = 1;
210 return true;
211 } else if (clg->a2 + clg->a3 < rnd) {
212 clg->state = 3;
213 return true;
214 }
215 break;
216 case 4:
217 clg->state = 1;
218 break;
219 }
220
221 return false;
222}
223
224/* loss_gilb_ell - Gilbert-Elliot model loss generator
225 * Generates losses according to the Gilbert-Elliot loss model or
226 * its special cases (Gilbert or Simple Gilbert)
227 *
25985edc 228 * Makes a comparison between random number and the transition
661b7972 229 * probabilities outgoing from the current state, then decides the
25985edc 230 * next state. A second random number is extracted and the comparison
661b7972 231 * with the loss probability of the current state decides if the next
232 * packet will be transmitted or lost.
233 */
234static bool loss_gilb_ell(struct netem_sched_data *q)
235{
236 struct clgstate *clg = &q->clg;
237
238 switch (clg->state) {
239 case 1:
240 if (net_random() < clg->a1)
241 clg->state = 2;
242 if (net_random() < clg->a4)
243 return true;
244 case 2:
245 if (net_random() < clg->a2)
246 clg->state = 1;
247 if (clg->a3 > net_random())
248 return true;
249 }
250
251 return false;
252}
253
254static bool loss_event(struct netem_sched_data *q)
255{
256 switch (q->loss_model) {
257 case CLG_RANDOM:
258 /* Random packet drop 0 => none, ~0 => all */
259 return q->loss && q->loss >= get_crandom(&q->loss_cor);
260
261 case CLG_4_STATES:
262 /* 4state loss model algorithm (used also for GI model)
263 * Extracts a value from the markov 4 state loss generator,
264 * if it is 1 drops a packet and if needed writes the event in
265 * the kernel logs
266 */
267 return loss_4state(q);
268
269 case CLG_GILB_ELL:
270 /* Gilbert-Elliot loss model algorithm
271 * Extracts a value from the Gilbert-Elliot loss generator,
272 * if it is 1 drops a packet and if needed writes the event in
273 * the kernel logs
274 */
275 return loss_gilb_ell(q);
276 }
277
278 return false; /* not reached */
279}
280
281
1da177e4
LT
282/* tabledist - return a pseudo-randomly distributed value with mean mu and
283 * std deviation sigma. Uses table lookup to approximate the desired
284 * distribution, and a uniformly-distributed pseudo-random source.
285 */
b407621c
SH
286static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
287 struct crndstate *state,
288 const struct disttable *dist)
1da177e4 289{
b407621c
SH
290 psched_tdiff_t x;
291 long t;
292 u32 rnd;
1da177e4
LT
293
294 if (sigma == 0)
295 return mu;
296
297 rnd = get_crandom(state);
298
299 /* default uniform distribution */
10297b99 300 if (dist == NULL)
1da177e4
LT
301 return (rnd % (2*sigma)) - sigma + mu;
302
303 t = dist->table[rnd % dist->size];
304 x = (sigma % NETEM_DIST_SCALE) * t;
305 if (x >= 0)
306 x += NETEM_DIST_SCALE/2;
307 else
308 x -= NETEM_DIST_SCALE/2;
309
310 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
311}
312
90b41a1c 313static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
7bc0f28c 314{
90b41a1c 315 u64 ticks;
fc33cc72 316
90b41a1c
HPP
317 len += q->packet_overhead;
318
319 if (q->cell_size) {
320 u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
321
322 if (len > cells * q->cell_size) /* extra cell needed for remainder */
323 cells++;
324 len = cells * (q->cell_size + q->cell_overhead);
325 }
326
327 ticks = (u64)len * NSEC_PER_SEC;
328
329 do_div(ticks, q->rate);
fc33cc72 330 return PSCHED_NS2TICKS(ticks);
7bc0f28c
HPP
331}
332
50612537
ED
333static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
334{
335 struct sk_buff_head *list = &sch->q;
336 psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
337 struct sk_buff *skb;
338
339 if (likely(skb_queue_len(list) < sch->limit)) {
340 skb = skb_peek_tail(list);
341 /* Optimize for add at tail */
342 if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send))
343 return qdisc_enqueue_tail(nskb, sch);
344
345 skb_queue_reverse_walk(list, skb) {
346 if (tnext >= netem_skb_cb(skb)->time_to_send)
347 break;
348 }
349
350 __skb_queue_after(list, skb, nskb);
351 sch->qstats.backlog += qdisc_pkt_len(nskb);
352 return NET_XMIT_SUCCESS;
353 }
354
355 return qdisc_reshape_fail(nskb, sch);
356}
357
0afb51e7
SH
358/*
359 * Insert one skb into qdisc.
360 * Note: parent depends on return value to account for queue length.
361 * NET_XMIT_DROP: queue length didn't change.
362 * NET_XMIT_SUCCESS: one skb was queued.
363 */
1da177e4
LT
364static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
365{
366 struct netem_sched_data *q = qdisc_priv(sch);
89e1df74
GC
367 /* We don't fill cb now as skb_unshare() may invalidate it */
368 struct netem_skb_cb *cb;
0afb51e7 369 struct sk_buff *skb2;
1da177e4 370 int ret;
0afb51e7 371 int count = 1;
1da177e4 372
0afb51e7
SH
373 /* Random duplication */
374 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
375 ++count;
376
661b7972 377 /* Drop packet? */
378 if (loss_event(q))
0afb51e7
SH
379 --count;
380
381 if (count == 0) {
1da177e4
LT
382 sch->qstats.drops++;
383 kfree_skb(skb);
c27f339a 384 return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1da177e4
LT
385 }
386
4e8a5201
DM
387 skb_orphan(skb);
388
0afb51e7
SH
389 /*
390 * If we need to duplicate packet, then re-insert at top of the
391 * qdisc tree, since parent queuer expects that only one
392 * skb will be queued.
393 */
394 if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
7698b4fc 395 struct Qdisc *rootq = qdisc_root(sch);
0afb51e7
SH
396 u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
397 q->duplicate = 0;
398
5f86173b 399 qdisc_enqueue_root(skb2, rootq);
0afb51e7 400 q->duplicate = dupsave;
1da177e4
LT
401 }
402
c865e5d9
SH
403 /*
404 * Randomized packet corruption.
405 * Make copy if needed since we are modifying
406 * If packet is going to be hardware checksummed, then
407 * do it now in software before we mangle it.
408 */
409 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
f64f9e71
JP
410 if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
411 (skb->ip_summed == CHECKSUM_PARTIAL &&
412 skb_checksum_help(skb))) {
c865e5d9
SH
413 sch->qstats.drops++;
414 return NET_XMIT_DROP;
415 }
416
417 skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
418 }
419
5f86173b 420 cb = netem_skb_cb(skb);
cc7ec456
ED
421 if (q->gap == 0 || /* not doing reordering */
422 q->counter < q->gap || /* inside last reordering gap */
f64f9e71 423 q->reorder < get_crandom(&q->reorder_cor)) {
0f9f32ac 424 psched_time_t now;
07aaa115
SH
425 psched_tdiff_t delay;
426
427 delay = tabledist(q->latency, q->jitter,
428 &q->delay_cor, q->delay_dist);
429
3bebcda2 430 now = psched_get_time();
7bc0f28c
HPP
431
432 if (q->rate) {
50612537 433 struct sk_buff_head *list = &sch->q;
7bc0f28c 434
90b41a1c 435 delay += packet_len_2_sched_time(skb->len, q);
7bc0f28c
HPP
436
437 if (!skb_queue_empty(list)) {
438 /*
439 * Last packet in queue is reference point (now).
440 * First packet in queue is already in flight,
441 * calculate this time bonus and substract
442 * from delay.
443 */
444 delay -= now - netem_skb_cb(skb_peek(list))->time_to_send;
445 now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
446 }
447 }
448
7c59e25f 449 cb->time_to_send = now + delay;
1da177e4 450 ++q->counter;
50612537 451 ret = tfifo_enqueue(skb, sch);
1da177e4 452 } else {
10297b99 453 /*
0dca51d3
SH
454 * Do re-ordering by putting one out of N packets at the front
455 * of the queue.
456 */
3bebcda2 457 cb->time_to_send = psched_get_time();
0dca51d3 458 q->counter = 0;
8ba25dad 459
50612537 460 __skb_queue_head(&sch->q, skb);
8ba25dad
JP
461 q->qdisc->qstats.backlog += qdisc_pkt_len(skb);
462 q->qdisc->qstats.requeues++;
463 ret = NET_XMIT_SUCCESS;
1da177e4
LT
464 }
465
10f6dfcf 466 if (ret != NET_XMIT_SUCCESS) {
467 if (net_xmit_drop_count(ret)) {
468 sch->qstats.drops++;
469 return ret;
470 }
378a2f09 471 }
1da177e4 472
10f6dfcf 473 return NET_XMIT_SUCCESS;
1da177e4
LT
474}
475
cc7ec456 476static unsigned int netem_drop(struct Qdisc *sch)
1da177e4
LT
477{
478 struct netem_sched_data *q = qdisc_priv(sch);
50612537 479 unsigned int len;
1da177e4 480
50612537
ED
481 len = qdisc_queue_drop(sch);
482 if (!len && q->qdisc && q->qdisc->ops->drop)
483 len = q->qdisc->ops->drop(q->qdisc);
484 if (len)
1da177e4 485 sch->qstats.drops++;
50612537 486
1da177e4
LT
487 return len;
488}
489
1da177e4
LT
490static struct sk_buff *netem_dequeue(struct Qdisc *sch)
491{
492 struct netem_sched_data *q = qdisc_priv(sch);
493 struct sk_buff *skb;
494
fd245a4a 495 if (qdisc_is_throttled(sch))
11274e5a
SH
496 return NULL;
497
50612537
ED
498tfifo_dequeue:
499 skb = qdisc_peek_head(sch);
771018e7 500 if (skb) {
5f86173b 501 const struct netem_skb_cb *cb = netem_skb_cb(skb);
0f9f32ac
SH
502
503 /* if more time remaining? */
50612537
ED
504 if (cb->time_to_send <= psched_get_time()) {
505 skb = qdisc_dequeue_tail(sch);
77be155c 506 if (unlikely(!skb))
50612537 507 goto qdisc_dequeue;
03c05f0d 508
8caf1539
JP
509#ifdef CONFIG_NET_CLS_ACT
510 /*
511 * If it's at ingress let's pretend the delay is
512 * from the network (tstamp will be updated).
513 */
514 if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
515 skb->tstamp.tv64 = 0;
516#endif
10f6dfcf 517
50612537
ED
518 if (q->qdisc) {
519 int err = qdisc_enqueue(skb, q->qdisc);
520
521 if (unlikely(err != NET_XMIT_SUCCESS)) {
522 if (net_xmit_drop_count(err)) {
523 sch->qstats.drops++;
524 qdisc_tree_decrease_qlen(sch, 1);
525 }
526 }
527 goto tfifo_dequeue;
528 }
529deliver:
10f6dfcf 530 qdisc_unthrottled(sch);
531 qdisc_bstats_update(sch, skb);
0f9f32ac 532 return skb;
07aaa115 533 }
11274e5a 534
50612537
ED
535 if (q->qdisc) {
536 skb = q->qdisc->ops->dequeue(q->qdisc);
537 if (skb)
538 goto deliver;
539 }
11274e5a 540 qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
0f9f32ac
SH
541 }
542
50612537
ED
543qdisc_dequeue:
544 if (q->qdisc) {
545 skb = q->qdisc->ops->dequeue(q->qdisc);
546 if (skb)
547 goto deliver;
548 }
0f9f32ac 549 return NULL;
1da177e4
LT
550}
551
1da177e4
LT
552static void netem_reset(struct Qdisc *sch)
553{
554 struct netem_sched_data *q = qdisc_priv(sch);
555
50612537
ED
556 qdisc_reset_queue(sch);
557 if (q->qdisc)
558 qdisc_reset(q->qdisc);
59cb5c67 559 qdisc_watchdog_cancel(&q->watchdog);
1da177e4
LT
560}
561
6373a9a2 562static void dist_free(struct disttable *d)
563{
564 if (d) {
565 if (is_vmalloc_addr(d))
566 vfree(d);
567 else
568 kfree(d);
569 }
570}
571
1da177e4
LT
572/*
573 * Distribution data is a variable size payload containing
574 * signed 16 bit values.
575 */
1e90474c 576static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
1da177e4
LT
577{
578 struct netem_sched_data *q = qdisc_priv(sch);
6373a9a2 579 size_t n = nla_len(attr)/sizeof(__s16);
1e90474c 580 const __s16 *data = nla_data(attr);
7698b4fc 581 spinlock_t *root_lock;
1da177e4
LT
582 struct disttable *d;
583 int i;
6373a9a2 584 size_t s;
1da177e4 585
df173bda 586 if (n > NETEM_DIST_MAX)
1da177e4
LT
587 return -EINVAL;
588
6373a9a2 589 s = sizeof(struct disttable) + n * sizeof(s16);
bb52c7ac 590 d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN);
6373a9a2 591 if (!d)
592 d = vmalloc(s);
1da177e4
LT
593 if (!d)
594 return -ENOMEM;
595
596 d->size = n;
597 for (i = 0; i < n; i++)
598 d->table[i] = data[i];
10297b99 599
102396ae 600 root_lock = qdisc_root_sleeping_lock(sch);
7698b4fc
DM
601
602 spin_lock_bh(root_lock);
bb52c7ac 603 swap(q->delay_dist, d);
7698b4fc 604 spin_unlock_bh(root_lock);
bb52c7ac
ED
605
606 dist_free(d);
1da177e4
LT
607 return 0;
608}
609
265eb67f 610static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
1da177e4
LT
611{
612 struct netem_sched_data *q = qdisc_priv(sch);
1e90474c 613 const struct tc_netem_corr *c = nla_data(attr);
1da177e4 614
1da177e4
LT
615 init_crandom(&q->delay_cor, c->delay_corr);
616 init_crandom(&q->loss_cor, c->loss_corr);
617 init_crandom(&q->dup_cor, c->dup_corr);
1da177e4
LT
618}
619
265eb67f 620static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
0dca51d3
SH
621{
622 struct netem_sched_data *q = qdisc_priv(sch);
1e90474c 623 const struct tc_netem_reorder *r = nla_data(attr);
0dca51d3 624
0dca51d3
SH
625 q->reorder = r->probability;
626 init_crandom(&q->reorder_cor, r->correlation);
0dca51d3
SH
627}
628
265eb67f 629static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
c865e5d9
SH
630{
631 struct netem_sched_data *q = qdisc_priv(sch);
1e90474c 632 const struct tc_netem_corrupt *r = nla_data(attr);
c865e5d9 633
c865e5d9
SH
634 q->corrupt = r->probability;
635 init_crandom(&q->corrupt_cor, r->correlation);
c865e5d9
SH
636}
637
7bc0f28c
HPP
638static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
639{
640 struct netem_sched_data *q = qdisc_priv(sch);
641 const struct tc_netem_rate *r = nla_data(attr);
642
643 q->rate = r->rate;
90b41a1c
HPP
644 q->packet_overhead = r->packet_overhead;
645 q->cell_size = r->cell_size;
646 if (q->cell_size)
647 q->cell_size_reciprocal = reciprocal_value(q->cell_size);
648 q->cell_overhead = r->cell_overhead;
7bc0f28c
HPP
649}
650
661b7972 651static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
652{
653 struct netem_sched_data *q = qdisc_priv(sch);
654 const struct nlattr *la;
655 int rem;
656
657 nla_for_each_nested(la, attr, rem) {
658 u16 type = nla_type(la);
659
660 switch(type) {
661 case NETEM_LOSS_GI: {
662 const struct tc_netem_gimodel *gi = nla_data(la);
663
2494654d 664 if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
661b7972 665 pr_info("netem: incorrect gi model size\n");
666 return -EINVAL;
667 }
668
669 q->loss_model = CLG_4_STATES;
670
671 q->clg.state = 1;
672 q->clg.a1 = gi->p13;
673 q->clg.a2 = gi->p31;
674 q->clg.a3 = gi->p32;
675 q->clg.a4 = gi->p14;
676 q->clg.a5 = gi->p23;
677 break;
678 }
679
680 case NETEM_LOSS_GE: {
681 const struct tc_netem_gemodel *ge = nla_data(la);
682
2494654d 683 if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
684 pr_info("netem: incorrect ge model size\n");
661b7972 685 return -EINVAL;
686 }
687
688 q->loss_model = CLG_GILB_ELL;
689 q->clg.state = 1;
690 q->clg.a1 = ge->p;
691 q->clg.a2 = ge->r;
692 q->clg.a3 = ge->h;
693 q->clg.a4 = ge->k1;
694 break;
695 }
696
697 default:
698 pr_info("netem: unknown loss type %u\n", type);
699 return -EINVAL;
700 }
701 }
702
703 return 0;
704}
705
27a3421e
PM
706static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
707 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) },
708 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) },
709 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
7bc0f28c 710 [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) },
661b7972 711 [TCA_NETEM_LOSS] = { .type = NLA_NESTED },
27a3421e
PM
712};
713
2c10b32b
TG
714static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
715 const struct nla_policy *policy, int len)
716{
717 int nested_len = nla_len(nla) - NLA_ALIGN(len);
718
661b7972 719 if (nested_len < 0) {
720 pr_info("netem: invalid attributes len %d\n", nested_len);
2c10b32b 721 return -EINVAL;
661b7972 722 }
723
2c10b32b
TG
724 if (nested_len >= nla_attr_size(0))
725 return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
726 nested_len, policy);
661b7972 727
2c10b32b
TG
728 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
729 return 0;
730}
731
c865e5d9 732/* Parse netlink message to set options */
1e90474c 733static int netem_change(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
734{
735 struct netem_sched_data *q = qdisc_priv(sch);
b03f4672 736 struct nlattr *tb[TCA_NETEM_MAX + 1];
1da177e4
LT
737 struct tc_netem_qopt *qopt;
738 int ret;
10297b99 739
b03f4672 740 if (opt == NULL)
1da177e4
LT
741 return -EINVAL;
742
2c10b32b
TG
743 qopt = nla_data(opt);
744 ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
b03f4672
PM
745 if (ret < 0)
746 return ret;
747
50612537 748 sch->limit = qopt->limit;
10297b99 749
1da177e4
LT
750 q->latency = qopt->latency;
751 q->jitter = qopt->jitter;
752 q->limit = qopt->limit;
753 q->gap = qopt->gap;
0dca51d3 754 q->counter = 0;
1da177e4
LT
755 q->loss = qopt->loss;
756 q->duplicate = qopt->duplicate;
757
bb2f8cc0
SH
758 /* for compatibility with earlier versions.
759 * if gap is set, need to assume 100% probability
0dca51d3 760 */
a362e0a7
SH
761 if (q->gap)
762 q->reorder = ~0;
0dca51d3 763
265eb67f
SH
764 if (tb[TCA_NETEM_CORR])
765 get_correlation(sch, tb[TCA_NETEM_CORR]);
1da177e4 766
b03f4672
PM
767 if (tb[TCA_NETEM_DELAY_DIST]) {
768 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
769 if (ret)
770 return ret;
771 }
c865e5d9 772
265eb67f
SH
773 if (tb[TCA_NETEM_REORDER])
774 get_reorder(sch, tb[TCA_NETEM_REORDER]);
1da177e4 775
265eb67f
SH
776 if (tb[TCA_NETEM_CORRUPT])
777 get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
1da177e4 778
7bc0f28c
HPP
779 if (tb[TCA_NETEM_RATE])
780 get_rate(sch, tb[TCA_NETEM_RATE]);
781
661b7972 782 q->loss_model = CLG_RANDOM;
783 if (tb[TCA_NETEM_LOSS])
784 ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
785
786 return ret;
1da177e4
LT
787}
788
1e90474c 789static int netem_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
790{
791 struct netem_sched_data *q = qdisc_priv(sch);
792 int ret;
793
794 if (!opt)
795 return -EINVAL;
796
59cb5c67 797 qdisc_watchdog_init(&q->watchdog, sch);
1da177e4 798
661b7972 799 q->loss_model = CLG_RANDOM;
1da177e4 800 ret = netem_change(sch, opt);
50612537 801 if (ret)
250a65f7 802 pr_info("netem: change failed\n");
1da177e4
LT
803 return ret;
804}
805
806static void netem_destroy(struct Qdisc *sch)
807{
808 struct netem_sched_data *q = qdisc_priv(sch);
809
59cb5c67 810 qdisc_watchdog_cancel(&q->watchdog);
50612537
ED
811 if (q->qdisc)
812 qdisc_destroy(q->qdisc);
6373a9a2 813 dist_free(q->delay_dist);
1da177e4
LT
814}
815
661b7972 816static int dump_loss_model(const struct netem_sched_data *q,
817 struct sk_buff *skb)
818{
819 struct nlattr *nest;
820
821 nest = nla_nest_start(skb, TCA_NETEM_LOSS);
822 if (nest == NULL)
823 goto nla_put_failure;
824
825 switch (q->loss_model) {
826 case CLG_RANDOM:
827 /* legacy loss model */
828 nla_nest_cancel(skb, nest);
829 return 0; /* no data */
830
831 case CLG_4_STATES: {
832 struct tc_netem_gimodel gi = {
833 .p13 = q->clg.a1,
834 .p31 = q->clg.a2,
835 .p32 = q->clg.a3,
836 .p14 = q->clg.a4,
837 .p23 = q->clg.a5,
838 };
839
840 NLA_PUT(skb, NETEM_LOSS_GI, sizeof(gi), &gi);
841 break;
842 }
843 case CLG_GILB_ELL: {
844 struct tc_netem_gemodel ge = {
845 .p = q->clg.a1,
846 .r = q->clg.a2,
847 .h = q->clg.a3,
848 .k1 = q->clg.a4,
849 };
850
851 NLA_PUT(skb, NETEM_LOSS_GE, sizeof(ge), &ge);
852 break;
853 }
854 }
855
856 nla_nest_end(skb, nest);
857 return 0;
858
859nla_put_failure:
860 nla_nest_cancel(skb, nest);
861 return -1;
862}
863
1da177e4
LT
864static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
865{
866 const struct netem_sched_data *q = qdisc_priv(sch);
861d7f74 867 struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
1da177e4
LT
868 struct tc_netem_qopt qopt;
869 struct tc_netem_corr cor;
0dca51d3 870 struct tc_netem_reorder reorder;
c865e5d9 871 struct tc_netem_corrupt corrupt;
7bc0f28c 872 struct tc_netem_rate rate;
1da177e4
LT
873
874 qopt.latency = q->latency;
875 qopt.jitter = q->jitter;
876 qopt.limit = q->limit;
877 qopt.loss = q->loss;
878 qopt.gap = q->gap;
879 qopt.duplicate = q->duplicate;
1e90474c 880 NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
1da177e4
LT
881
882 cor.delay_corr = q->delay_cor.rho;
883 cor.loss_corr = q->loss_cor.rho;
884 cor.dup_corr = q->dup_cor.rho;
1e90474c 885 NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
0dca51d3
SH
886
887 reorder.probability = q->reorder;
888 reorder.correlation = q->reorder_cor.rho;
1e90474c 889 NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
0dca51d3 890
c865e5d9
SH
891 corrupt.probability = q->corrupt;
892 corrupt.correlation = q->corrupt_cor.rho;
1e90474c 893 NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
c865e5d9 894
7bc0f28c 895 rate.rate = q->rate;
90b41a1c
HPP
896 rate.packet_overhead = q->packet_overhead;
897 rate.cell_size = q->cell_size;
898 rate.cell_overhead = q->cell_overhead;
7bc0f28c
HPP
899 NLA_PUT(skb, TCA_NETEM_RATE, sizeof(rate), &rate);
900
661b7972 901 if (dump_loss_model(q, skb) != 0)
902 goto nla_put_failure;
903
861d7f74 904 return nla_nest_end(skb, nla);
1da177e4 905
1e90474c 906nla_put_failure:
861d7f74 907 nlmsg_trim(skb, nla);
1da177e4
LT
908 return -1;
909}
910
10f6dfcf 911static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
912 struct sk_buff *skb, struct tcmsg *tcm)
913{
914 struct netem_sched_data *q = qdisc_priv(sch);
915
50612537 916 if (cl != 1 || !q->qdisc) /* only one class */
10f6dfcf 917 return -ENOENT;
918
919 tcm->tcm_handle |= TC_H_MIN(1);
920 tcm->tcm_info = q->qdisc->handle;
921
922 return 0;
923}
924
925static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
926 struct Qdisc **old)
927{
928 struct netem_sched_data *q = qdisc_priv(sch);
929
10f6dfcf 930 sch_tree_lock(sch);
931 *old = q->qdisc;
932 q->qdisc = new;
50612537
ED
933 if (*old) {
934 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
935 qdisc_reset(*old);
936 }
10f6dfcf 937 sch_tree_unlock(sch);
938
939 return 0;
940}
941
942static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
943{
944 struct netem_sched_data *q = qdisc_priv(sch);
945 return q->qdisc;
946}
947
948static unsigned long netem_get(struct Qdisc *sch, u32 classid)
949{
950 return 1;
951}
952
953static void netem_put(struct Qdisc *sch, unsigned long arg)
954{
955}
956
957static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
958{
959 if (!walker->stop) {
960 if (walker->count >= walker->skip)
961 if (walker->fn(sch, 1, walker) < 0) {
962 walker->stop = 1;
963 return;
964 }
965 walker->count++;
966 }
967}
968
969static const struct Qdisc_class_ops netem_class_ops = {
970 .graft = netem_graft,
971 .leaf = netem_leaf,
972 .get = netem_get,
973 .put = netem_put,
974 .walk = netem_walk,
975 .dump = netem_dump_class,
976};
977
20fea08b 978static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
1da177e4 979 .id = "netem",
10f6dfcf 980 .cl_ops = &netem_class_ops,
1da177e4
LT
981 .priv_size = sizeof(struct netem_sched_data),
982 .enqueue = netem_enqueue,
983 .dequeue = netem_dequeue,
77be155c 984 .peek = qdisc_peek_dequeued,
1da177e4
LT
985 .drop = netem_drop,
986 .init = netem_init,
987 .reset = netem_reset,
988 .destroy = netem_destroy,
989 .change = netem_change,
990 .dump = netem_dump,
991 .owner = THIS_MODULE,
992};
993
994
995static int __init netem_module_init(void)
996{
eb229c4c 997 pr_info("netem: version " VERSION "\n");
1da177e4
LT
998 return register_qdisc(&netem_qdisc_ops);
999}
1000static void __exit netem_module_exit(void)
1001{
1002 unregister_qdisc(&netem_qdisc_ops);
1003}
1004module_init(netem_module_init)
1005module_exit(netem_module_exit)
1006MODULE_LICENSE("GPL");