]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/sched/sch_cbq.c
net_sched: Add qdisc_enqueue wrapper
[mirror_ubuntu-jammy-kernel.git] / net / sched / sch_cbq.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 */
12
1da177e4 13#include <linux/module.h>
1da177e4
LT
14#include <linux/types.h>
15#include <linux/kernel.h>
1da177e4 16#include <linux/string.h>
1da177e4 17#include <linux/errno.h>
1da177e4 18#include <linux/skbuff.h>
0ba48053 19#include <net/netlink.h>
1da177e4
LT
20#include <net/pkt_sched.h>
21
22
23/* Class-Based Queueing (CBQ) algorithm.
24 =======================================
25
26 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
10297b99 27 Management Models for Packet Networks",
1da177e4
LT
28 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
29
10297b99 30 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
1da177e4 31
10297b99 32 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
1da177e4
LT
33 Parameters", 1996
34
35 [4] Sally Floyd and Michael Speer, "Experimental Results
36 for Class-Based Queueing", 1998, not published.
37
38 -----------------------------------------------------------------------
39
40 Algorithm skeleton was taken from NS simulator cbq.cc.
41 If someone wants to check this code against the LBL version,
42 he should take into account that ONLY the skeleton was borrowed,
43 the implementation is different. Particularly:
44
45 --- The WRR algorithm is different. Our version looks more
10297b99
YH
46 reasonable (I hope) and works when quanta are allowed to be
47 less than MTU, which is always the case when real time classes
48 have small rates. Note, that the statement of [3] is
49 incomplete, delay may actually be estimated even if class
50 per-round allotment is less than MTU. Namely, if per-round
51 allotment is W*r_i, and r_1+...+r_k = r < 1
1da177e4
LT
52
53 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
54
55 In the worst case we have IntServ estimate with D = W*r+k*MTU
56 and C = MTU*r. The proof (if correct at all) is trivial.
57
58
59 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
60 interpret some places, which look like wrong translations
61 from NS. Anyone is advised to find these differences
62 and explain to me, why I am wrong 8).
63
64 --- Linux has no EOI event, so that we cannot estimate true class
65 idle time. Workaround is to consider the next dequeue event
66 as sign that previous packet is finished. This is wrong because of
67 internal device queueing, but on a permanently loaded link it is true.
68 Moreover, combined with clock integrator, this scheme looks
69 very close to an ideal solution. */
70
71struct cbq_sched_data;
72
73
74struct cbq_class
75{
d77fea2e 76 struct Qdisc_class_common common;
1da177e4
LT
77 struct cbq_class *next_alive; /* next class with backlog in this priority band */
78
79/* Parameters */
1da177e4
LT
80 unsigned char priority; /* class priority */
81 unsigned char priority2; /* priority to be used after overlimit */
82 unsigned char ewma_log; /* time constant for idle time calculation */
83 unsigned char ovl_strategy;
c3bc7cff 84#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
85 unsigned char police;
86#endif
87
88 u32 defmap;
89
90 /* Link-sharing scheduler parameters */
91 long maxidle; /* Class parameters: see below. */
92 long offtime;
93 long minidle;
94 u32 avpkt;
95 struct qdisc_rate_table *R_tab;
96
97 /* Overlimit strategy parameters */
98 void (*overlimit)(struct cbq_class *cl);
1a13cb63 99 psched_tdiff_t penalty;
1da177e4
LT
100
101 /* General scheduler (WRR) parameters */
102 long allot;
103 long quantum; /* Allotment per WRR round */
104 long weight; /* Relative allotment: see below */
105
106 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
107 struct cbq_class *split; /* Ptr to split node */
108 struct cbq_class *share; /* Ptr to LS parent in the class tree */
109 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
110 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
111 parent otherwise */
112 struct cbq_class *sibling; /* Sibling chain */
113 struct cbq_class *children; /* Pointer to children chain */
114
115 struct Qdisc *q; /* Elementary queueing discipline */
116
117
118/* Variables */
119 unsigned char cpriority; /* Effective priority */
120 unsigned char delayed;
121 unsigned char level; /* level of the class in hierarchy:
122 0 for leaf classes, and maximal
123 level of children + 1 for nodes.
124 */
125
126 psched_time_t last; /* Last end of service */
127 psched_time_t undertime;
128 long avgidle;
129 long deficit; /* Saved deficit for WRR */
1a13cb63 130 psched_time_t penalized;
1da177e4
LT
131 struct gnet_stats_basic bstats;
132 struct gnet_stats_queue qstats;
133 struct gnet_stats_rate_est rate_est;
1da177e4
LT
134 struct tc_cbq_xstats xstats;
135
136 struct tcf_proto *filter_list;
137
138 int refcnt;
139 int filters;
140
141 struct cbq_class *defaults[TC_PRIO_MAX+1];
142};
143
144struct cbq_sched_data
145{
d77fea2e 146 struct Qdisc_class_hash clhash; /* Hash table of all classes */
1da177e4
LT
147 int nclasses[TC_CBQ_MAXPRIO+1];
148 unsigned quanta[TC_CBQ_MAXPRIO+1];
149
150 struct cbq_class link;
151
152 unsigned activemask;
153 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
154 with backlog */
155
c3bc7cff 156#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
157 struct cbq_class *rx_class;
158#endif
159 struct cbq_class *tx_class;
160 struct cbq_class *tx_borrowed;
161 int tx_len;
162 psched_time_t now; /* Cached timestamp */
163 psched_time_t now_rt; /* Cached real time */
164 unsigned pmask;
165
1a13cb63 166 struct hrtimer delay_timer;
88a99354 167 struct qdisc_watchdog watchdog; /* Watchdog timer,
1da177e4
LT
168 started when CBQ has
169 backlog, but cannot
170 transmit just now */
88a99354 171 psched_tdiff_t wd_expires;
1da177e4
LT
172 int toplevel;
173 u32 hgenerator;
174};
175
176
e9bef55d 177#define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
1da177e4 178
1da177e4
LT
179static __inline__ struct cbq_class *
180cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
181{
d77fea2e 182 struct Qdisc_class_common *clc;
1da177e4 183
d77fea2e
PM
184 clc = qdisc_class_find(&q->clhash, classid);
185 if (clc == NULL)
186 return NULL;
187 return container_of(clc, struct cbq_class, common);
1da177e4
LT
188}
189
c3bc7cff 190#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
191
192static struct cbq_class *
193cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
194{
195 struct cbq_class *cl, *new;
196
197 for (cl = this->tparent; cl; cl = cl->tparent)
198 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
199 return new;
200
201 return NULL;
202}
203
204#endif
205
206/* Classify packet. The procedure is pretty complicated, but
207 it allows us to combine link sharing and priority scheduling
208 transparently.
209
210 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
211 so that it resolves to split nodes. Then packets are classified
212 by logical priority, or a more specific classifier may be attached
213 to the split node.
214 */
215
216static struct cbq_class *
217cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
218{
219 struct cbq_sched_data *q = qdisc_priv(sch);
220 struct cbq_class *head = &q->link;
221 struct cbq_class **defmap;
222 struct cbq_class *cl = NULL;
223 u32 prio = skb->priority;
224 struct tcf_result res;
225
226 /*
227 * Step 1. If skb->priority points to one of our classes, use it.
228 */
229 if (TC_H_MAJ(prio^sch->handle) == 0 &&
230 (cl = cbq_class_lookup(q, prio)) != NULL)
231 return cl;
232
29f1df6c 233 *qerr = NET_XMIT_BYPASS;
1da177e4
LT
234 for (;;) {
235 int result = 0;
236 defmap = head->defaults;
237
238 /*
239 * Step 2+n. Apply classifier.
240 */
73ca4918
PM
241 if (!head->filter_list ||
242 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
1da177e4
LT
243 goto fallback;
244
245 if ((cl = (void*)res.class) == NULL) {
246 if (TC_H_MAJ(res.classid))
247 cl = cbq_class_lookup(q, res.classid);
248 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
249 cl = defmap[TC_PRIO_BESTEFFORT];
250
251 if (cl == NULL || cl->level >= head->level)
252 goto fallback;
253 }
254
255#ifdef CONFIG_NET_CLS_ACT
256 switch (result) {
257 case TC_ACT_QUEUED:
10297b99 258 case TC_ACT_STOLEN:
1da177e4
LT
259 *qerr = NET_XMIT_SUCCESS;
260 case TC_ACT_SHOT:
261 return NULL;
73ca4918
PM
262 case TC_ACT_RECLASSIFY:
263 return cbq_reclassify(skb, cl);
1da177e4 264 }
1da177e4
LT
265#endif
266 if (cl->level == 0)
267 return cl;
268
269 /*
270 * Step 3+n. If classifier selected a link sharing class,
271 * apply agency specific classifier.
272 * Repeat this procdure until we hit a leaf node.
273 */
274 head = cl;
275 }
276
277fallback:
278 cl = head;
279
280 /*
281 * Step 4. No success...
282 */
283 if (TC_H_MAJ(prio) == 0 &&
284 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
285 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
286 return head;
287
288 return cl;
289}
290
291/*
292 A packet has just been enqueued on the empty class.
293 cbq_activate_class adds it to the tail of active class list
294 of its priority band.
295 */
296
297static __inline__ void cbq_activate_class(struct cbq_class *cl)
298{
299 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
300 int prio = cl->cpriority;
301 struct cbq_class *cl_tail;
302
303 cl_tail = q->active[prio];
304 q->active[prio] = cl;
305
306 if (cl_tail != NULL) {
307 cl->next_alive = cl_tail->next_alive;
308 cl_tail->next_alive = cl;
309 } else {
310 cl->next_alive = cl;
311 q->activemask |= (1<<prio);
312 }
313}
314
315/*
316 Unlink class from active chain.
317 Note that this same procedure is done directly in cbq_dequeue*
318 during round-robin procedure.
319 */
320
321static void cbq_deactivate_class(struct cbq_class *this)
322{
323 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
324 int prio = this->cpriority;
325 struct cbq_class *cl;
326 struct cbq_class *cl_prev = q->active[prio];
327
328 do {
329 cl = cl_prev->next_alive;
330 if (cl == this) {
331 cl_prev->next_alive = cl->next_alive;
332 cl->next_alive = NULL;
333
334 if (cl == q->active[prio]) {
335 q->active[prio] = cl_prev;
336 if (cl == q->active[prio]) {
337 q->active[prio] = NULL;
338 q->activemask &= ~(1<<prio);
339 return;
340 }
341 }
1da177e4
LT
342 return;
343 }
344 } while ((cl_prev = cl) != q->active[prio]);
345}
346
347static void
348cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
349{
350 int toplevel = q->toplevel;
351
352 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
353 psched_time_t now;
354 psched_tdiff_t incr;
355
3bebcda2 356 now = psched_get_time();
8edc0c31 357 incr = now - q->now_rt;
7c59e25f 358 now = q->now + incr;
1da177e4
LT
359
360 do {
104e0878 361 if (cl->undertime < now) {
1da177e4
LT
362 q->toplevel = cl->level;
363 return;
364 }
365 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
366 }
367}
368
369static int
370cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
371{
372 struct cbq_sched_data *q = qdisc_priv(sch);
373 int len = skb->len;
ddeee3ce 374 int uninitialized_var(ret);
1da177e4
LT
375 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
376
c3bc7cff 377#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
378 q->rx_class = cl;
379#endif
380 if (cl == NULL) {
29f1df6c 381 if (ret == NET_XMIT_BYPASS)
1da177e4
LT
382 sch->qstats.drops++;
383 kfree_skb(skb);
384 return ret;
385 }
386
c3bc7cff 387#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
388 cl->q->__parent = sch;
389#endif
5f86173b
JK
390 ret = qdisc_enqueue(skb, cl->q);
391 if (ret == NET_XMIT_SUCCESS) {
1da177e4
LT
392 sch->q.qlen++;
393 sch->bstats.packets++;
394 sch->bstats.bytes+=len;
395 cbq_mark_toplevel(q, cl);
396 if (!cl->next_alive)
397 cbq_activate_class(cl);
398 return ret;
399 }
400
401 sch->qstats.drops++;
402 cbq_mark_toplevel(q, cl);
403 cl->qstats.drops++;
404 return ret;
405}
406
407static int
408cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
409{
410 struct cbq_sched_data *q = qdisc_priv(sch);
411 struct cbq_class *cl;
412 int ret;
413
414 if ((cl = q->tx_class) == NULL) {
415 kfree_skb(skb);
416 sch->qstats.drops++;
417 return NET_XMIT_CN;
418 }
419 q->tx_class = NULL;
420
421 cbq_mark_toplevel(q, cl);
422
c3bc7cff 423#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
424 q->rx_class = cl;
425 cl->q->__parent = sch;
426#endif
427 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
428 sch->q.qlen++;
429 sch->qstats.requeues++;
430 if (!cl->next_alive)
431 cbq_activate_class(cl);
432 return 0;
433 }
434 sch->qstats.drops++;
435 cl->qstats.drops++;
436 return ret;
437}
438
439/* Overlimit actions */
440
441/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
442
443static void cbq_ovl_classic(struct cbq_class *cl)
444{
445 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
8edc0c31 446 psched_tdiff_t delay = cl->undertime - q->now;
1da177e4
LT
447
448 if (!cl->delayed) {
449 delay += cl->offtime;
450
10297b99 451 /*
1da177e4
LT
452 Class goes to sleep, so that it will have no
453 chance to work avgidle. Let's forgive it 8)
454
455 BTW cbq-2.0 has a crap in this
456 place, apparently they forgot to shift it by cl->ewma_log.
457 */
458 if (cl->avgidle < 0)
459 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
460 if (cl->avgidle < cl->minidle)
461 cl->avgidle = cl->minidle;
462 if (delay <= 0)
463 delay = 1;
7c59e25f 464 cl->undertime = q->now + delay;
1da177e4
LT
465
466 cl->xstats.overactions++;
467 cl->delayed = 1;
468 }
469 if (q->wd_expires == 0 || q->wd_expires > delay)
470 q->wd_expires = delay;
471
472 /* Dirty work! We must schedule wakeups based on
473 real available rate, rather than leaf rate,
474 which may be tiny (even zero).
475 */
476 if (q->toplevel == TC_CBQ_MAXLEVEL) {
477 struct cbq_class *b;
478 psched_tdiff_t base_delay = q->wd_expires;
479
480 for (b = cl->borrow; b; b = b->borrow) {
8edc0c31 481 delay = b->undertime - q->now;
1da177e4
LT
482 if (delay < base_delay) {
483 if (delay <= 0)
484 delay = 1;
485 base_delay = delay;
486 }
487 }
488
489 q->wd_expires = base_delay;
490 }
491}
492
493/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
494 they go overlimit
495 */
496
497static void cbq_ovl_rclassic(struct cbq_class *cl)
498{
499 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
500 struct cbq_class *this = cl;
501
502 do {
503 if (cl->level > q->toplevel) {
504 cl = NULL;
505 break;
506 }
507 } while ((cl = cl->borrow) != NULL);
508
509 if (cl == NULL)
510 cl = this;
511 cbq_ovl_classic(cl);
512}
513
514/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
515
516static void cbq_ovl_delay(struct cbq_class *cl)
517{
518 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
8edc0c31 519 psched_tdiff_t delay = cl->undertime - q->now;
1da177e4
LT
520
521 if (!cl->delayed) {
1a13cb63
PM
522 psched_time_t sched = q->now;
523 ktime_t expires;
1da177e4
LT
524
525 delay += cl->offtime;
526 if (cl->avgidle < 0)
527 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
528 if (cl->avgidle < cl->minidle)
529 cl->avgidle = cl->minidle;
7c59e25f 530 cl->undertime = q->now + delay;
1da177e4
LT
531
532 if (delay > 0) {
1a13cb63 533 sched += delay + cl->penalty;
1da177e4
LT
534 cl->penalized = sched;
535 cl->cpriority = TC_CBQ_MAXPRIO;
536 q->pmask |= (1<<TC_CBQ_MAXPRIO);
1a13cb63
PM
537
538 expires = ktime_set(0, 0);
539 expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
540 if (hrtimer_try_to_cancel(&q->delay_timer) &&
541 ktime_to_ns(ktime_sub(q->delay_timer.expires,
542 expires)) > 0)
543 q->delay_timer.expires = expires;
544 hrtimer_restart(&q->delay_timer);
1da177e4
LT
545 cl->delayed = 1;
546 cl->xstats.overactions++;
547 return;
548 }
549 delay = 1;
550 }
551 if (q->wd_expires == 0 || q->wd_expires > delay)
552 q->wd_expires = delay;
553}
554
555/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
556
557static void cbq_ovl_lowprio(struct cbq_class *cl)
558{
559 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
560
1a13cb63 561 cl->penalized = q->now + cl->penalty;
1da177e4
LT
562
563 if (cl->cpriority != cl->priority2) {
564 cl->cpriority = cl->priority2;
565 q->pmask |= (1<<cl->cpriority);
566 cl->xstats.overactions++;
567 }
568 cbq_ovl_classic(cl);
569}
570
571/* TC_CBQ_OVL_DROP: penalize class by dropping */
572
573static void cbq_ovl_drop(struct cbq_class *cl)
574{
575 if (cl->q->ops->drop)
576 if (cl->q->ops->drop(cl->q))
577 cl->qdisc->q.qlen--;
578 cl->xstats.overactions++;
579 cbq_ovl_classic(cl);
580}
581
1a13cb63
PM
582static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
583 psched_time_t now)
1da177e4
LT
584{
585 struct cbq_class *cl;
586 struct cbq_class *cl_prev = q->active[prio];
1a13cb63 587 psched_time_t sched = now;
1da177e4
LT
588
589 if (cl_prev == NULL)
e9054a33 590 return 0;
1da177e4
LT
591
592 do {
593 cl = cl_prev->next_alive;
1a13cb63 594 if (now - cl->penalized > 0) {
1da177e4
LT
595 cl_prev->next_alive = cl->next_alive;
596 cl->next_alive = NULL;
597 cl->cpriority = cl->priority;
598 cl->delayed = 0;
599 cbq_activate_class(cl);
600
601 if (cl == q->active[prio]) {
602 q->active[prio] = cl_prev;
603 if (cl == q->active[prio]) {
604 q->active[prio] = NULL;
605 return 0;
606 }
607 }
608
609 cl = cl_prev->next_alive;
1a13cb63 610 } else if (sched - cl->penalized > 0)
1da177e4
LT
611 sched = cl->penalized;
612 } while ((cl_prev = cl) != q->active[prio]);
613
1a13cb63 614 return sched - now;
1da177e4
LT
615}
616
1a13cb63 617static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
1da177e4 618{
1a13cb63
PM
619 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
620 delay_timer);
621 struct Qdisc *sch = q->watchdog.qdisc;
622 psched_time_t now;
623 psched_tdiff_t delay = 0;
1da177e4
LT
624 unsigned pmask;
625
3bebcda2 626 now = psched_get_time();
1a13cb63 627
1da177e4
LT
628 pmask = q->pmask;
629 q->pmask = 0;
630
631 while (pmask) {
632 int prio = ffz(~pmask);
1a13cb63 633 psched_tdiff_t tmp;
1da177e4
LT
634
635 pmask &= ~(1<<prio);
636
1a13cb63 637 tmp = cbq_undelay_prio(q, prio, now);
1da177e4
LT
638 if (tmp > 0) {
639 q->pmask |= 1<<prio;
640 if (tmp < delay || delay == 0)
641 delay = tmp;
642 }
643 }
644
645 if (delay) {
1a13cb63
PM
646 ktime_t time;
647
648 time = ktime_set(0, 0);
649 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
650 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
1da177e4
LT
651 }
652
653 sch->flags &= ~TCQ_F_THROTTLED;
37437bb2 654 __netif_schedule(sch);
1a13cb63 655 return HRTIMER_NORESTART;
1da177e4
LT
656}
657
c3bc7cff 658#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
659static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
660{
661 int len = skb->len;
662 struct Qdisc *sch = child->__parent;
663 struct cbq_sched_data *q = qdisc_priv(sch);
664 struct cbq_class *cl = q->rx_class;
665
666 q->rx_class = NULL;
667
668 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
669
670 cbq_mark_toplevel(q, cl);
671
672 q->rx_class = cl;
673 cl->q->__parent = sch;
674
5f86173b 675 if (qdisc_enqueue(skb, cl->q) == 0) {
1da177e4
LT
676 sch->q.qlen++;
677 sch->bstats.packets++;
678 sch->bstats.bytes+=len;
679 if (!cl->next_alive)
680 cbq_activate_class(cl);
681 return 0;
682 }
683 sch->qstats.drops++;
684 return 0;
685 }
686
687 sch->qstats.drops++;
688 return -1;
689}
690#endif
691
10297b99 692/*
1da177e4
LT
693 It is mission critical procedure.
694
695 We "regenerate" toplevel cutoff, if transmitting class
696 has backlog and it is not regulated. It is not part of
697 original CBQ description, but looks more reasonable.
698 Probably, it is wrong. This question needs further investigation.
699*/
700
701static __inline__ void
702cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
703 struct cbq_class *borrowed)
704{
705 if (cl && q->toplevel >= borrowed->level) {
706 if (cl->q->q.qlen > 1) {
707 do {
a084980d 708 if (borrowed->undertime == PSCHED_PASTPERFECT) {
1da177e4
LT
709 q->toplevel = borrowed->level;
710 return;
711 }
712 } while ((borrowed=borrowed->borrow) != NULL);
713 }
10297b99 714#if 0
1da177e4
LT
715 /* It is not necessary now. Uncommenting it
716 will save CPU cycles, but decrease fairness.
717 */
718 q->toplevel = TC_CBQ_MAXLEVEL;
719#endif
720 }
721}
722
723static void
724cbq_update(struct cbq_sched_data *q)
725{
726 struct cbq_class *this = q->tx_class;
727 struct cbq_class *cl = this;
728 int len = q->tx_len;
729
730 q->tx_class = NULL;
731
732 for ( ; cl; cl = cl->share) {
733 long avgidle = cl->avgidle;
734 long idle;
735
736 cl->bstats.packets++;
737 cl->bstats.bytes += len;
738
739 /*
740 (now - last) is total time between packet right edges.
741 (last_pktlen/rate) is "virtual" busy time, so that
742
10297b99 743 idle = (now - last) - last_pktlen/rate
1da177e4
LT
744 */
745
8edc0c31 746 idle = q->now - cl->last;
1da177e4
LT
747 if ((unsigned long)idle > 128*1024*1024) {
748 avgidle = cl->maxidle;
749 } else {
750 idle -= L2T(cl, len);
751
752 /* true_avgidle := (1-W)*true_avgidle + W*idle,
753 where W=2^{-ewma_log}. But cl->avgidle is scaled:
754 cl->avgidle == true_avgidle/W,
755 hence:
756 */
757 avgidle += idle - (avgidle>>cl->ewma_log);
758 }
759
760 if (avgidle <= 0) {
761 /* Overlimit or at-limit */
762
763 if (avgidle < cl->minidle)
764 avgidle = cl->minidle;
765
766 cl->avgidle = avgidle;
767
768 /* Calculate expected time, when this class
769 will be allowed to send.
770 It will occur, when:
771 (1-W)*true_avgidle + W*delay = 0, i.e.
772 idle = (1/W - 1)*(-true_avgidle)
773 or
774 idle = (1 - W)*(-cl->avgidle);
775 */
776 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
777
778 /*
779 That is not all.
780 To maintain the rate allocated to the class,
781 we add to undertime virtual clock,
782 necessary to complete transmitted packet.
783 (len/phys_bandwidth has been already passed
784 to the moment of cbq_update)
785 */
786
787 idle -= L2T(&q->link, len);
788 idle += L2T(cl, len);
789
7c59e25f 790 cl->undertime = q->now + idle;
1da177e4
LT
791 } else {
792 /* Underlimit */
793
a084980d 794 cl->undertime = PSCHED_PASTPERFECT;
1da177e4
LT
795 if (avgidle > cl->maxidle)
796 cl->avgidle = cl->maxidle;
797 else
798 cl->avgidle = avgidle;
799 }
800 cl->last = q->now;
801 }
802
803 cbq_update_toplevel(q, this, q->tx_borrowed);
804}
805
806static __inline__ struct cbq_class *
807cbq_under_limit(struct cbq_class *cl)
808{
809 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
810 struct cbq_class *this_cl = cl;
811
812 if (cl->tparent == NULL)
813 return cl;
814
a084980d 815 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
1da177e4
LT
816 cl->delayed = 0;
817 return cl;
818 }
819
820 do {
821 /* It is very suspicious place. Now overlimit
822 action is generated for not bounded classes
823 only if link is completely congested.
824 Though it is in agree with ancestor-only paradigm,
825 it looks very stupid. Particularly,
826 it means that this chunk of code will either
827 never be called or result in strong amplification
828 of burstiness. Dangerous, silly, and, however,
829 no another solution exists.
830 */
831 if ((cl = cl->borrow) == NULL) {
832 this_cl->qstats.overlimits++;
833 this_cl->overlimit(this_cl);
834 return NULL;
835 }
836 if (cl->level > q->toplevel)
837 return NULL;
a084980d 838 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
1da177e4
LT
839
840 cl->delayed = 0;
841 return cl;
842}
843
844static __inline__ struct sk_buff *
845cbq_dequeue_prio(struct Qdisc *sch, int prio)
846{
847 struct cbq_sched_data *q = qdisc_priv(sch);
848 struct cbq_class *cl_tail, *cl_prev, *cl;
849 struct sk_buff *skb;
850 int deficit;
851
852 cl_tail = cl_prev = q->active[prio];
853 cl = cl_prev->next_alive;
854
855 do {
856 deficit = 0;
857
858 /* Start round */
859 do {
860 struct cbq_class *borrow = cl;
861
862 if (cl->q->q.qlen &&
863 (borrow = cbq_under_limit(cl)) == NULL)
864 goto skip_class;
865
866 if (cl->deficit <= 0) {
867 /* Class exhausted its allotment per
868 this round. Switch to the next one.
869 */
870 deficit = 1;
871 cl->deficit += cl->quantum;
872 goto next_class;
873 }
874
875 skb = cl->q->dequeue(cl->q);
876
877 /* Class did not give us any skb :-(
10297b99 878 It could occur even if cl->q->q.qlen != 0
1da177e4
LT
879 f.e. if cl->q == "tbf"
880 */
881 if (skb == NULL)
882 goto skip_class;
883
884 cl->deficit -= skb->len;
885 q->tx_class = cl;
886 q->tx_borrowed = borrow;
887 if (borrow != cl) {
888#ifndef CBQ_XSTATS_BORROWS_BYTES
889 borrow->xstats.borrows++;
890 cl->xstats.borrows++;
891#else
892 borrow->xstats.borrows += skb->len;
893 cl->xstats.borrows += skb->len;
894#endif
895 }
896 q->tx_len = skb->len;
897
898 if (cl->deficit <= 0) {
899 q->active[prio] = cl;
900 cl = cl->next_alive;
901 cl->deficit += cl->quantum;
902 }
903 return skb;
904
905skip_class:
906 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
907 /* Class is empty or penalized.
908 Unlink it from active chain.
909 */
910 cl_prev->next_alive = cl->next_alive;
911 cl->next_alive = NULL;
912
913 /* Did cl_tail point to it? */
914 if (cl == cl_tail) {
915 /* Repair it! */
916 cl_tail = cl_prev;
917
918 /* Was it the last class in this band? */
919 if (cl == cl_tail) {
920 /* Kill the band! */
921 q->active[prio] = NULL;
922 q->activemask &= ~(1<<prio);
923 if (cl->q->q.qlen)
924 cbq_activate_class(cl);
925 return NULL;
926 }
927
928 q->active[prio] = cl_tail;
929 }
930 if (cl->q->q.qlen)
931 cbq_activate_class(cl);
932
933 cl = cl_prev;
934 }
935
936next_class:
937 cl_prev = cl;
938 cl = cl->next_alive;
939 } while (cl_prev != cl_tail);
940 } while (deficit);
941
942 q->active[prio] = cl_prev;
943
944 return NULL;
945}
946
947static __inline__ struct sk_buff *
948cbq_dequeue_1(struct Qdisc *sch)
949{
950 struct cbq_sched_data *q = qdisc_priv(sch);
951 struct sk_buff *skb;
952 unsigned activemask;
953
954 activemask = q->activemask&0xFF;
955 while (activemask) {
956 int prio = ffz(~activemask);
957 activemask &= ~(1<<prio);
958 skb = cbq_dequeue_prio(sch, prio);
959 if (skb)
960 return skb;
961 }
962 return NULL;
963}
964
965static struct sk_buff *
966cbq_dequeue(struct Qdisc *sch)
967{
968 struct sk_buff *skb;
969 struct cbq_sched_data *q = qdisc_priv(sch);
970 psched_time_t now;
971 psched_tdiff_t incr;
972
3bebcda2 973 now = psched_get_time();
8edc0c31 974 incr = now - q->now_rt;
1da177e4
LT
975
976 if (q->tx_class) {
977 psched_tdiff_t incr2;
978 /* Time integrator. We calculate EOS time
979 by adding expected packet transmission time.
980 If real time is greater, we warp artificial clock,
981 so that:
982
983 cbq_time = max(real_time, work);
984 */
985 incr2 = L2T(&q->link, q->tx_len);
7c59e25f 986 q->now += incr2;
1da177e4
LT
987 cbq_update(q);
988 if ((incr -= incr2) < 0)
989 incr = 0;
990 }
7c59e25f 991 q->now += incr;
1da177e4
LT
992 q->now_rt = now;
993
994 for (;;) {
995 q->wd_expires = 0;
996
997 skb = cbq_dequeue_1(sch);
998 if (skb) {
999 sch->q.qlen--;
1000 sch->flags &= ~TCQ_F_THROTTLED;
1001 return skb;
1002 }
1003
1004 /* All the classes are overlimit.
1005
1006 It is possible, if:
1007
1008 1. Scheduler is empty.
1009 2. Toplevel cutoff inhibited borrowing.
1010 3. Root class is overlimit.
1011
1012 Reset 2d and 3d conditions and retry.
1013
1014 Note, that NS and cbq-2.0 are buggy, peeking
1015 an arbitrary class is appropriate for ancestor-only
1016 sharing, but not for toplevel algorithm.
1017
1018 Our version is better, but slower, because it requires
1019 two passes, but it is unavoidable with top-level sharing.
1020 */
1021
1022 if (q->toplevel == TC_CBQ_MAXLEVEL &&
a084980d 1023 q->link.undertime == PSCHED_PASTPERFECT)
1da177e4
LT
1024 break;
1025
1026 q->toplevel = TC_CBQ_MAXLEVEL;
a084980d 1027 q->link.undertime = PSCHED_PASTPERFECT;
1da177e4
LT
1028 }
1029
1030 /* No packets in scheduler or nobody wants to give them to us :-(
1031 Sigh... start watchdog timer in the last case. */
1032
1033 if (sch->q.qlen) {
1034 sch->qstats.overlimits++;
88a99354
PM
1035 if (q->wd_expires)
1036 qdisc_watchdog_schedule(&q->watchdog,
bb239acf 1037 now + q->wd_expires);
1da177e4
LT
1038 }
1039 return NULL;
1040}
1041
1042/* CBQ class maintanance routines */
1043
1044static void cbq_adjust_levels(struct cbq_class *this)
1045{
1046 if (this == NULL)
1047 return;
1048
1049 do {
1050 int level = 0;
1051 struct cbq_class *cl;
1052
1053 if ((cl = this->children) != NULL) {
1054 do {
1055 if (cl->level > level)
1056 level = cl->level;
1057 } while ((cl = cl->sibling) != this->children);
1058 }
1059 this->level = level+1;
1060 } while ((this = this->tparent) != NULL);
1061}
1062
1063static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1064{
1065 struct cbq_class *cl;
d77fea2e
PM
1066 struct hlist_node *n;
1067 unsigned int h;
1da177e4
LT
1068
1069 if (q->quanta[prio] == 0)
1070 return;
1071
d77fea2e
PM
1072 for (h = 0; h < q->clhash.hashsize; h++) {
1073 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1da177e4
LT
1074 /* BUGGGG... Beware! This expression suffer of
1075 arithmetic overflows!
1076 */
1077 if (cl->priority == prio) {
1078 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1079 q->quanta[prio];
1080 }
5ce2d488 1081 if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
d77fea2e 1082 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
5ce2d488 1083 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1da177e4
LT
1084 }
1085 }
1086 }
1087}
1088
1089static void cbq_sync_defmap(struct cbq_class *cl)
1090{
1091 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1092 struct cbq_class *split = cl->split;
1093 unsigned h;
1094 int i;
1095
1096 if (split == NULL)
1097 return;
1098
1099 for (i=0; i<=TC_PRIO_MAX; i++) {
1100 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1101 split->defaults[i] = NULL;
1102 }
1103
1104 for (i=0; i<=TC_PRIO_MAX; i++) {
1105 int level = split->level;
1106
1107 if (split->defaults[i])
1108 continue;
1109
d77fea2e
PM
1110 for (h = 0; h < q->clhash.hashsize; h++) {
1111 struct hlist_node *n;
1da177e4
LT
1112 struct cbq_class *c;
1113
d77fea2e
PM
1114 hlist_for_each_entry(c, n, &q->clhash.hash[h],
1115 common.hnode) {
1da177e4
LT
1116 if (c->split == split && c->level < level &&
1117 c->defmap&(1<<i)) {
1118 split->defaults[i] = c;
1119 level = c->level;
1120 }
1121 }
1122 }
1123 }
1124}
1125
1126static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1127{
1128 struct cbq_class *split = NULL;
1129
1130 if (splitid == 0) {
1131 if ((split = cl->split) == NULL)
1132 return;
d77fea2e 1133 splitid = split->common.classid;
1da177e4
LT
1134 }
1135
d77fea2e 1136 if (split == NULL || split->common.classid != splitid) {
1da177e4 1137 for (split = cl->tparent; split; split = split->tparent)
d77fea2e 1138 if (split->common.classid == splitid)
1da177e4
LT
1139 break;
1140 }
1141
1142 if (split == NULL)
1143 return;
1144
1145 if (cl->split != split) {
1146 cl->defmap = 0;
1147 cbq_sync_defmap(cl);
1148 cl->split = split;
1149 cl->defmap = def&mask;
1150 } else
1151 cl->defmap = (cl->defmap&~mask)|(def&mask);
1152
1153 cbq_sync_defmap(cl);
1154}
1155
1156static void cbq_unlink_class(struct cbq_class *this)
1157{
1158 struct cbq_class *cl, **clp;
1159 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1160
d77fea2e 1161 qdisc_class_hash_remove(&q->clhash, &this->common);
1da177e4
LT
1162
1163 if (this->tparent) {
1164 clp=&this->sibling;
1165 cl = *clp;
1166 do {
1167 if (cl == this) {
1168 *clp = cl->sibling;
1169 break;
1170 }
1171 clp = &cl->sibling;
1172 } while ((cl = *clp) != this->sibling);
1173
1174 if (this->tparent->children == this) {
1175 this->tparent->children = this->sibling;
1176 if (this->sibling == this)
1177 this->tparent->children = NULL;
1178 }
1179 } else {
1180 BUG_TRAP(this->sibling == this);
1181 }
1182}
1183
1184static void cbq_link_class(struct cbq_class *this)
1185{
1186 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1da177e4
LT
1187 struct cbq_class *parent = this->tparent;
1188
1189 this->sibling = this;
d77fea2e 1190 qdisc_class_hash_insert(&q->clhash, &this->common);
1da177e4
LT
1191
1192 if (parent == NULL)
1193 return;
1194
1195 if (parent->children == NULL) {
1196 parent->children = this;
1197 } else {
1198 this->sibling = parent->children->sibling;
1199 parent->children->sibling = this;
1200 }
1201}
1202
1203static unsigned int cbq_drop(struct Qdisc* sch)
1204{
1205 struct cbq_sched_data *q = qdisc_priv(sch);
1206 struct cbq_class *cl, *cl_head;
1207 int prio;
1208 unsigned int len;
1209
1210 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1211 if ((cl_head = q->active[prio]) == NULL)
1212 continue;
1213
1214 cl = cl_head;
1215 do {
1216 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1217 sch->q.qlen--;
a37ef2e3
JP
1218 if (!cl->q->q.qlen)
1219 cbq_deactivate_class(cl);
1da177e4
LT
1220 return len;
1221 }
1222 } while ((cl = cl->next_alive) != cl_head);
1223 }
1224 return 0;
1225}
1226
1227static void
1228cbq_reset(struct Qdisc* sch)
1229{
1230 struct cbq_sched_data *q = qdisc_priv(sch);
1231 struct cbq_class *cl;
d77fea2e 1232 struct hlist_node *n;
1da177e4
LT
1233 int prio;
1234 unsigned h;
1235
1236 q->activemask = 0;
1237 q->pmask = 0;
1238 q->tx_class = NULL;
1239 q->tx_borrowed = NULL;
88a99354 1240 qdisc_watchdog_cancel(&q->watchdog);
1a13cb63 1241 hrtimer_cancel(&q->delay_timer);
1da177e4 1242 q->toplevel = TC_CBQ_MAXLEVEL;
3bebcda2 1243 q->now = psched_get_time();
1da177e4
LT
1244 q->now_rt = q->now;
1245
1246 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1247 q->active[prio] = NULL;
1248
d77fea2e
PM
1249 for (h = 0; h < q->clhash.hashsize; h++) {
1250 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1da177e4
LT
1251 qdisc_reset(cl->q);
1252
1253 cl->next_alive = NULL;
a084980d 1254 cl->undertime = PSCHED_PASTPERFECT;
1da177e4
LT
1255 cl->avgidle = cl->maxidle;
1256 cl->deficit = cl->quantum;
1257 cl->cpriority = cl->priority;
1258 }
1259 }
1260 sch->q.qlen = 0;
1261}
1262
1263
1264static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1265{
1266 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1267 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1268 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1269 }
1270 if (lss->change&TCF_CBQ_LSS_EWMA)
1271 cl->ewma_log = lss->ewma_log;
1272 if (lss->change&TCF_CBQ_LSS_AVPKT)
1273 cl->avpkt = lss->avpkt;
1274 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1275 cl->minidle = -(long)lss->minidle;
1276 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1277 cl->maxidle = lss->maxidle;
1278 cl->avgidle = lss->maxidle;
1279 }
1280 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1281 cl->offtime = lss->offtime;
1282 return 0;
1283}
1284
1285static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1286{
1287 q->nclasses[cl->priority]--;
1288 q->quanta[cl->priority] -= cl->weight;
1289 cbq_normalize_quanta(q, cl->priority);
1290}
1291
1292static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1293{
1294 q->nclasses[cl->priority]++;
1295 q->quanta[cl->priority] += cl->weight;
1296 cbq_normalize_quanta(q, cl->priority);
1297}
1298
1299static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1300{
1301 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1302
1303 if (wrr->allot)
1304 cl->allot = wrr->allot;
1305 if (wrr->weight)
1306 cl->weight = wrr->weight;
1307 if (wrr->priority) {
1308 cl->priority = wrr->priority-1;
1309 cl->cpriority = cl->priority;
1310 if (cl->priority >= cl->priority2)
1311 cl->priority2 = TC_CBQ_MAXPRIO-1;
1312 }
1313
1314 cbq_addprio(q, cl);
1315 return 0;
1316}
1317
1318static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1319{
1320 switch (ovl->strategy) {
1321 case TC_CBQ_OVL_CLASSIC:
1322 cl->overlimit = cbq_ovl_classic;
1323 break;
1324 case TC_CBQ_OVL_DELAY:
1325 cl->overlimit = cbq_ovl_delay;
1326 break;
1327 case TC_CBQ_OVL_LOWPRIO:
1328 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1329 ovl->priority2-1 <= cl->priority)
1330 return -EINVAL;
1331 cl->priority2 = ovl->priority2-1;
1332 cl->overlimit = cbq_ovl_lowprio;
1333 break;
1334 case TC_CBQ_OVL_DROP:
1335 cl->overlimit = cbq_ovl_drop;
1336 break;
1337 case TC_CBQ_OVL_RCLASSIC:
1338 cl->overlimit = cbq_ovl_rclassic;
1339 break;
1340 default:
1341 return -EINVAL;
1342 }
1a13cb63 1343 cl->penalty = ovl->penalty;
1da177e4
LT
1344 return 0;
1345}
1346
c3bc7cff 1347#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1348static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1349{
1350 cl->police = p->police;
1351
1352 if (cl->q->handle) {
1353 if (p->police == TC_POLICE_RECLASSIFY)
1354 cl->q->reshape_fail = cbq_reshape_fail;
1355 else
1356 cl->q->reshape_fail = NULL;
1357 }
1358 return 0;
1359}
1360#endif
1361
1362static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1363{
1364 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1365 return 0;
1366}
1367
27a3421e
PM
1368static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1369 [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
1370 [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
1371 [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
1372 [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
1373 [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
1374 [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1375 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
1376};
1377
1e90474c 1378static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
1379{
1380 struct cbq_sched_data *q = qdisc_priv(sch);
1e90474c 1381 struct nlattr *tb[TCA_CBQ_MAX + 1];
1da177e4 1382 struct tc_ratespec *r;
cee63723
PM
1383 int err;
1384
27a3421e 1385 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
cee63723
PM
1386 if (err < 0)
1387 return err;
1da177e4 1388
27a3421e 1389 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1da177e4
LT
1390 return -EINVAL;
1391
1e90474c 1392 r = nla_data(tb[TCA_CBQ_RATE]);
1da177e4 1393
1e90474c 1394 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1da177e4
LT
1395 return -EINVAL;
1396
d77fea2e
PM
1397 err = qdisc_class_hash_init(&q->clhash);
1398 if (err < 0)
1399 goto put_rtab;
1400
1da177e4
LT
1401 q->link.refcnt = 1;
1402 q->link.sibling = &q->link;
d77fea2e 1403 q->link.common.classid = sch->handle;
1da177e4 1404 q->link.qdisc = sch;
5ce2d488 1405 if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd 1406 &pfifo_qdisc_ops,
9f9afec4 1407 sch->handle)))
1da177e4
LT
1408 q->link.q = &noop_qdisc;
1409
1410 q->link.priority = TC_CBQ_MAXPRIO-1;
1411 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1412 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1413 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1414 q->link.overlimit = cbq_ovl_classic;
5ce2d488 1415 q->link.allot = psched_mtu(qdisc_dev(sch));
1da177e4
LT
1416 q->link.quantum = q->link.allot;
1417 q->link.weight = q->link.R_tab->rate.rate;
1418
1419 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1420 q->link.avpkt = q->link.allot/2;
1421 q->link.minidle = -0x7FFFFFFF;
1da177e4 1422
88a99354 1423 qdisc_watchdog_init(&q->watchdog, sch);
1a13cb63 1424 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1da177e4
LT
1425 q->delay_timer.function = cbq_undelay;
1426 q->toplevel = TC_CBQ_MAXLEVEL;
3bebcda2 1427 q->now = psched_get_time();
1da177e4
LT
1428 q->now_rt = q->now;
1429
1430 cbq_link_class(&q->link);
1431
1e90474c
PM
1432 if (tb[TCA_CBQ_LSSOPT])
1433 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1da177e4
LT
1434
1435 cbq_addprio(q, &q->link);
1436 return 0;
d77fea2e
PM
1437
1438put_rtab:
1439 qdisc_put_rtab(q->link.R_tab);
1440 return err;
1da177e4
LT
1441}
1442
1443static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1444{
27a884dc 1445 unsigned char *b = skb_tail_pointer(skb);
1da177e4 1446
1e90474c 1447 NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1da177e4
LT
1448 return skb->len;
1449
1e90474c 1450nla_put_failure:
dc5fc579 1451 nlmsg_trim(skb, b);
1da177e4
LT
1452 return -1;
1453}
1454
1455static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1456{
27a884dc 1457 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1458 struct tc_cbq_lssopt opt;
1459
1460 opt.flags = 0;
1461 if (cl->borrow == NULL)
1462 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1463 if (cl->share == NULL)
1464 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1465 opt.ewma_log = cl->ewma_log;
1466 opt.level = cl->level;
1467 opt.avpkt = cl->avpkt;
1468 opt.maxidle = cl->maxidle;
1469 opt.minidle = (u32)(-cl->minidle);
1470 opt.offtime = cl->offtime;
1471 opt.change = ~0;
1e90474c 1472 NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1da177e4
LT
1473 return skb->len;
1474
1e90474c 1475nla_put_failure:
dc5fc579 1476 nlmsg_trim(skb, b);
1da177e4
LT
1477 return -1;
1478}
1479
1480static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1481{
27a884dc 1482 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1483 struct tc_cbq_wrropt opt;
1484
1485 opt.flags = 0;
1486 opt.allot = cl->allot;
1487 opt.priority = cl->priority+1;
1488 opt.cpriority = cl->cpriority+1;
1489 opt.weight = cl->weight;
1e90474c 1490 NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1da177e4
LT
1491 return skb->len;
1492
1e90474c 1493nla_put_failure:
dc5fc579 1494 nlmsg_trim(skb, b);
1da177e4
LT
1495 return -1;
1496}
1497
1498static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1499{
27a884dc 1500 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1501 struct tc_cbq_ovl opt;
1502
1503 opt.strategy = cl->ovl_strategy;
1504 opt.priority2 = cl->priority2+1;
8a47077a 1505 opt.pad = 0;
1a13cb63 1506 opt.penalty = cl->penalty;
1e90474c 1507 NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1da177e4
LT
1508 return skb->len;
1509
1e90474c 1510nla_put_failure:
dc5fc579 1511 nlmsg_trim(skb, b);
1da177e4
LT
1512 return -1;
1513}
1514
1515static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1516{
27a884dc 1517 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1518 struct tc_cbq_fopt opt;
1519
1520 if (cl->split || cl->defmap) {
d77fea2e 1521 opt.split = cl->split ? cl->split->common.classid : 0;
1da177e4
LT
1522 opt.defmap = cl->defmap;
1523 opt.defchange = ~0;
1e90474c 1524 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1da177e4
LT
1525 }
1526 return skb->len;
1527
1e90474c 1528nla_put_failure:
dc5fc579 1529 nlmsg_trim(skb, b);
1da177e4
LT
1530 return -1;
1531}
1532
c3bc7cff 1533#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1534static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1535{
27a884dc 1536 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1537 struct tc_cbq_police opt;
1538
1539 if (cl->police) {
1540 opt.police = cl->police;
9ef1d4c7
PM
1541 opt.__res1 = 0;
1542 opt.__res2 = 0;
1e90474c 1543 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1da177e4
LT
1544 }
1545 return skb->len;
1546
1e90474c 1547nla_put_failure:
dc5fc579 1548 nlmsg_trim(skb, b);
1da177e4
LT
1549 return -1;
1550}
1551#endif
1552
1553static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1554{
1555 if (cbq_dump_lss(skb, cl) < 0 ||
1556 cbq_dump_rate(skb, cl) < 0 ||
1557 cbq_dump_wrr(skb, cl) < 0 ||
1558 cbq_dump_ovl(skb, cl) < 0 ||
c3bc7cff 1559#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1560 cbq_dump_police(skb, cl) < 0 ||
1561#endif
1562 cbq_dump_fopt(skb, cl) < 0)
1563 return -1;
1564 return 0;
1565}
1566
1567static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1568{
1569 struct cbq_sched_data *q = qdisc_priv(sch);
4b3550ef 1570 struct nlattr *nest;
1da177e4 1571
4b3550ef
PM
1572 nest = nla_nest_start(skb, TCA_OPTIONS);
1573 if (nest == NULL)
1574 goto nla_put_failure;
1da177e4 1575 if (cbq_dump_attr(skb, &q->link) < 0)
1e90474c 1576 goto nla_put_failure;
4b3550ef 1577 nla_nest_end(skb, nest);
1da177e4
LT
1578 return skb->len;
1579
1e90474c 1580nla_put_failure:
4b3550ef 1581 nla_nest_cancel(skb, nest);
1da177e4
LT
1582 return -1;
1583}
1584
1585static int
1586cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1587{
1588 struct cbq_sched_data *q = qdisc_priv(sch);
1589
1590 q->link.xstats.avgidle = q->link.avgidle;
1591 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1592}
1593
1594static int
1595cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1596 struct sk_buff *skb, struct tcmsg *tcm)
1597{
1598 struct cbq_class *cl = (struct cbq_class*)arg;
4b3550ef 1599 struct nlattr *nest;
1da177e4
LT
1600
1601 if (cl->tparent)
d77fea2e 1602 tcm->tcm_parent = cl->tparent->common.classid;
1da177e4
LT
1603 else
1604 tcm->tcm_parent = TC_H_ROOT;
d77fea2e 1605 tcm->tcm_handle = cl->common.classid;
1da177e4
LT
1606 tcm->tcm_info = cl->q->handle;
1607
4b3550ef
PM
1608 nest = nla_nest_start(skb, TCA_OPTIONS);
1609 if (nest == NULL)
1610 goto nla_put_failure;
1da177e4 1611 if (cbq_dump_attr(skb, cl) < 0)
1e90474c 1612 goto nla_put_failure;
4b3550ef 1613 nla_nest_end(skb, nest);
1da177e4
LT
1614 return skb->len;
1615
1e90474c 1616nla_put_failure:
4b3550ef 1617 nla_nest_cancel(skb, nest);
1da177e4
LT
1618 return -1;
1619}
1620
1621static int
1622cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1623 struct gnet_dump *d)
1624{
1625 struct cbq_sched_data *q = qdisc_priv(sch);
1626 struct cbq_class *cl = (struct cbq_class*)arg;
1627
1628 cl->qstats.qlen = cl->q->q.qlen;
1629 cl->xstats.avgidle = cl->avgidle;
1630 cl->xstats.undertime = 0;
1631
a084980d 1632 if (cl->undertime != PSCHED_PASTPERFECT)
8edc0c31 1633 cl->xstats.undertime = cl->undertime - q->now;
1da177e4
LT
1634
1635 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1da177e4 1636 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1da177e4
LT
1637 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1638 return -1;
1639
1640 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1641}
1642
1643static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1644 struct Qdisc **old)
1645{
1646 struct cbq_class *cl = (struct cbq_class*)arg;
1647
1648 if (cl) {
1649 if (new == NULL) {
5ce2d488 1650 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd 1651 &pfifo_qdisc_ops,
d77fea2e
PM
1652 cl->common.classid);
1653 if (new == NULL)
1da177e4
LT
1654 return -ENOBUFS;
1655 } else {
c3bc7cff 1656#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1657 if (cl->police == TC_POLICE_RECLASSIFY)
1658 new->reshape_fail = cbq_reshape_fail;
1659#endif
1660 }
1661 sch_tree_lock(sch);
a37ef2e3 1662 *old = xchg(&cl->q, new);
5e50da01 1663 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1da177e4
LT
1664 qdisc_reset(*old);
1665 sch_tree_unlock(sch);
1666
1667 return 0;
1668 }
1669 return -ENOENT;
1670}
1671
1672static struct Qdisc *
1673cbq_leaf(struct Qdisc *sch, unsigned long arg)
1674{
1675 struct cbq_class *cl = (struct cbq_class*)arg;
1676
1677 return cl ? cl->q : NULL;
1678}
1679
a37ef2e3
JP
1680static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1681{
1682 struct cbq_class *cl = (struct cbq_class *)arg;
1683
1684 if (cl->q->q.qlen == 0)
1685 cbq_deactivate_class(cl);
1686}
1687
1da177e4
LT
1688static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1689{
1690 struct cbq_sched_data *q = qdisc_priv(sch);
1691 struct cbq_class *cl = cbq_class_lookup(q, classid);
1692
1693 if (cl) {
1694 cl->refcnt++;
1695 return (unsigned long)cl;
1696 }
1697 return 0;
1698}
1699
1da177e4
LT
1700static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1701{
1702 struct cbq_sched_data *q = qdisc_priv(sch);
1703
1704 BUG_TRAP(!cl->filters);
1705
ff31ab56 1706 tcf_destroy_chain(&cl->filter_list);
1da177e4
LT
1707 qdisc_destroy(cl->q);
1708 qdisc_put_rtab(cl->R_tab);
1da177e4 1709 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1da177e4
LT
1710 if (cl != &q->link)
1711 kfree(cl);
1712}
1713
1714static void
1715cbq_destroy(struct Qdisc* sch)
1716{
1717 struct cbq_sched_data *q = qdisc_priv(sch);
d77fea2e 1718 struct hlist_node *n, *next;
1da177e4
LT
1719 struct cbq_class *cl;
1720 unsigned h;
1721
c3bc7cff 1722#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1723 q->rx_class = NULL;
1724#endif
1725 /*
1726 * Filters must be destroyed first because we don't destroy the
1727 * classes from root to leafs which means that filters can still
1728 * be bound to classes which have been destroyed already. --TGR '04
1729 */
d77fea2e
PM
1730 for (h = 0; h < q->clhash.hashsize; h++) {
1731 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
ff31ab56 1732 tcf_destroy_chain(&cl->filter_list);
b00b4bf9 1733 }
d77fea2e
PM
1734 for (h = 0; h < q->clhash.hashsize; h++) {
1735 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1736 common.hnode)
1da177e4 1737 cbq_destroy_class(sch, cl);
1da177e4 1738 }
d77fea2e 1739 qdisc_class_hash_destroy(&q->clhash);
1da177e4
LT
1740}
1741
1742static void cbq_put(struct Qdisc *sch, unsigned long arg)
1743{
1744 struct cbq_class *cl = (struct cbq_class*)arg;
1745
1746 if (--cl->refcnt == 0) {
c3bc7cff 1747#ifdef CONFIG_NET_CLS_ACT
7698b4fc 1748 spinlock_t *root_lock = qdisc_root_lock(sch);
1da177e4
LT
1749 struct cbq_sched_data *q = qdisc_priv(sch);
1750
7698b4fc 1751 spin_lock_bh(root_lock);
1da177e4
LT
1752 if (q->rx_class == cl)
1753 q->rx_class = NULL;
7698b4fc 1754 spin_unlock_bh(root_lock);
1da177e4
LT
1755#endif
1756
1757 cbq_destroy_class(sch, cl);
1758 }
1759}
1760
1761static int
1e90474c 1762cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1da177e4
LT
1763 unsigned long *arg)
1764{
1765 int err;
1766 struct cbq_sched_data *q = qdisc_priv(sch);
1767 struct cbq_class *cl = (struct cbq_class*)*arg;
1e90474c
PM
1768 struct nlattr *opt = tca[TCA_OPTIONS];
1769 struct nlattr *tb[TCA_CBQ_MAX + 1];
1da177e4
LT
1770 struct cbq_class *parent;
1771 struct qdisc_rate_table *rtab = NULL;
1772
cee63723 1773 if (opt == NULL)
1da177e4
LT
1774 return -EINVAL;
1775
27a3421e 1776 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
cee63723
PM
1777 if (err < 0)
1778 return err;
1779
1da177e4
LT
1780 if (cl) {
1781 /* Check parent */
1782 if (parentid) {
d77fea2e
PM
1783 if (cl->tparent &&
1784 cl->tparent->common.classid != parentid)
1da177e4
LT
1785 return -EINVAL;
1786 if (!cl->tparent && parentid != TC_H_ROOT)
1787 return -EINVAL;
1788 }
1789
1e90474c
PM
1790 if (tb[TCA_CBQ_RATE]) {
1791 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1da177e4
LT
1792 if (rtab == NULL)
1793 return -EINVAL;
1794 }
1795
1796 /* Change class parameters */
1797 sch_tree_lock(sch);
1798
1799 if (cl->next_alive != NULL)
1800 cbq_deactivate_class(cl);
1801
1802 if (rtab) {
1803 rtab = xchg(&cl->R_tab, rtab);
1804 qdisc_put_rtab(rtab);
1805 }
1806
1e90474c
PM
1807 if (tb[TCA_CBQ_LSSOPT])
1808 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1da177e4 1809
1e90474c 1810 if (tb[TCA_CBQ_WRROPT]) {
1da177e4 1811 cbq_rmprio(q, cl);
1e90474c 1812 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1da177e4
LT
1813 }
1814
1e90474c
PM
1815 if (tb[TCA_CBQ_OVL_STRATEGY])
1816 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1da177e4 1817
c3bc7cff 1818#ifdef CONFIG_NET_CLS_ACT
1e90474c
PM
1819 if (tb[TCA_CBQ_POLICE])
1820 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1da177e4
LT
1821#endif
1822
1e90474c
PM
1823 if (tb[TCA_CBQ_FOPT])
1824 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1da177e4
LT
1825
1826 if (cl->q->q.qlen)
1827 cbq_activate_class(cl);
1828
1829 sch_tree_unlock(sch);
1830
1e90474c 1831 if (tca[TCA_RATE])
1da177e4 1832 gen_replace_estimator(&cl->bstats, &cl->rate_est,
7698b4fc 1833 qdisc_root_lock(sch),
1e90474c 1834 tca[TCA_RATE]);
1da177e4
LT
1835 return 0;
1836 }
1837
1838 if (parentid == TC_H_ROOT)
1839 return -EINVAL;
1840
1e90474c
PM
1841 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1842 tb[TCA_CBQ_LSSOPT] == NULL)
1da177e4
LT
1843 return -EINVAL;
1844
1e90474c 1845 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1da177e4
LT
1846 if (rtab == NULL)
1847 return -EINVAL;
1848
1849 if (classid) {
1850 err = -EINVAL;
1851 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1852 goto failure;
1853 } else {
1854 int i;
1855 classid = TC_H_MAKE(sch->handle,0x8000);
1856
1857 for (i=0; i<0x8000; i++) {
1858 if (++q->hgenerator >= 0x8000)
1859 q->hgenerator = 1;
1860 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1861 break;
1862 }
1863 err = -ENOSR;
1864 if (i >= 0x8000)
1865 goto failure;
1866 classid = classid|q->hgenerator;
1867 }
1868
1869 parent = &q->link;
1870 if (parentid) {
1871 parent = cbq_class_lookup(q, parentid);
1872 err = -EINVAL;
1873 if (parent == NULL)
1874 goto failure;
1875 }
1876
1877 err = -ENOBUFS;
0da974f4 1878 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1da177e4
LT
1879 if (cl == NULL)
1880 goto failure;
1da177e4
LT
1881 cl->R_tab = rtab;
1882 rtab = NULL;
1883 cl->refcnt = 1;
5ce2d488 1884 if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd 1885 &pfifo_qdisc_ops, classid)))
1da177e4 1886 cl->q = &noop_qdisc;
d77fea2e 1887 cl->common.classid = classid;
1da177e4
LT
1888 cl->tparent = parent;
1889 cl->qdisc = sch;
1890 cl->allot = parent->allot;
1891 cl->quantum = cl->allot;
1892 cl->weight = cl->R_tab->rate.rate;
1da177e4
LT
1893
1894 sch_tree_lock(sch);
1895 cbq_link_class(cl);
1896 cl->borrow = cl->tparent;
1897 if (cl->tparent != &q->link)
1898 cl->share = cl->tparent;
1899 cbq_adjust_levels(parent);
1900 cl->minidle = -0x7FFFFFFF;
1e90474c
PM
1901 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1902 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1da177e4
LT
1903 if (cl->ewma_log==0)
1904 cl->ewma_log = q->link.ewma_log;
1905 if (cl->maxidle==0)
1906 cl->maxidle = q->link.maxidle;
1907 if (cl->avpkt==0)
1908 cl->avpkt = q->link.avpkt;
1909 cl->overlimit = cbq_ovl_classic;
1e90474c
PM
1910 if (tb[TCA_CBQ_OVL_STRATEGY])
1911 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
c3bc7cff 1912#ifdef CONFIG_NET_CLS_ACT
1e90474c
PM
1913 if (tb[TCA_CBQ_POLICE])
1914 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1da177e4 1915#endif
1e90474c
PM
1916 if (tb[TCA_CBQ_FOPT])
1917 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1da177e4
LT
1918 sch_tree_unlock(sch);
1919
d77fea2e
PM
1920 qdisc_class_hash_grow(sch, &q->clhash);
1921
1e90474c 1922 if (tca[TCA_RATE])
1da177e4 1923 gen_new_estimator(&cl->bstats, &cl->rate_est,
7698b4fc 1924 qdisc_root_lock(sch), tca[TCA_RATE]);
1da177e4
LT
1925
1926 *arg = (unsigned long)cl;
1927 return 0;
1928
1929failure:
1930 qdisc_put_rtab(rtab);
1931 return err;
1932}
1933
1934static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1935{
1936 struct cbq_sched_data *q = qdisc_priv(sch);
1937 struct cbq_class *cl = (struct cbq_class*)arg;
a37ef2e3 1938 unsigned int qlen;
1da177e4
LT
1939
1940 if (cl->filters || cl->children || cl == &q->link)
1941 return -EBUSY;
1942
1943 sch_tree_lock(sch);
1944
a37ef2e3
JP
1945 qlen = cl->q->q.qlen;
1946 qdisc_reset(cl->q);
1947 qdisc_tree_decrease_qlen(cl->q, qlen);
1948
1da177e4
LT
1949 if (cl->next_alive)
1950 cbq_deactivate_class(cl);
1951
1952 if (q->tx_borrowed == cl)
1953 q->tx_borrowed = q->tx_class;
1954 if (q->tx_class == cl) {
1955 q->tx_class = NULL;
1956 q->tx_borrowed = NULL;
1957 }
c3bc7cff 1958#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1959 if (q->rx_class == cl)
1960 q->rx_class = NULL;
1961#endif
1962
1963 cbq_unlink_class(cl);
1964 cbq_adjust_levels(cl->tparent);
1965 cl->defmap = 0;
1966 cbq_sync_defmap(cl);
1967
1968 cbq_rmprio(q, cl);
1969 sch_tree_unlock(sch);
1970
1971 if (--cl->refcnt == 0)
1972 cbq_destroy_class(sch, cl);
1973
1974 return 0;
1975}
1976
1977static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1978{
1979 struct cbq_sched_data *q = qdisc_priv(sch);
1980 struct cbq_class *cl = (struct cbq_class *)arg;
1981
1982 if (cl == NULL)
1983 cl = &q->link;
1984
1985 return &cl->filter_list;
1986}
1987
1988static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1989 u32 classid)
1990{
1991 struct cbq_sched_data *q = qdisc_priv(sch);
1992 struct cbq_class *p = (struct cbq_class*)parent;
1993 struct cbq_class *cl = cbq_class_lookup(q, classid);
1994
1995 if (cl) {
1996 if (p && p->level <= cl->level)
1997 return 0;
1998 cl->filters++;
1999 return (unsigned long)cl;
2000 }
2001 return 0;
2002}
2003
2004static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2005{
2006 struct cbq_class *cl = (struct cbq_class*)arg;
2007
2008 cl->filters--;
2009}
2010
2011static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2012{
2013 struct cbq_sched_data *q = qdisc_priv(sch);
d77fea2e
PM
2014 struct cbq_class *cl;
2015 struct hlist_node *n;
1da177e4
LT
2016 unsigned h;
2017
2018 if (arg->stop)
2019 return;
2020
d77fea2e
PM
2021 for (h = 0; h < q->clhash.hashsize; h++) {
2022 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1da177e4
LT
2023 if (arg->count < arg->skip) {
2024 arg->count++;
2025 continue;
2026 }
2027 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2028 arg->stop = 1;
2029 return;
2030 }
2031 arg->count++;
2032 }
2033 }
2034}
2035
20fea08b 2036static const struct Qdisc_class_ops cbq_class_ops = {
1da177e4
LT
2037 .graft = cbq_graft,
2038 .leaf = cbq_leaf,
a37ef2e3 2039 .qlen_notify = cbq_qlen_notify,
1da177e4
LT
2040 .get = cbq_get,
2041 .put = cbq_put,
2042 .change = cbq_change_class,
2043 .delete = cbq_delete,
2044 .walk = cbq_walk,
2045 .tcf_chain = cbq_find_tcf,
2046 .bind_tcf = cbq_bind_filter,
2047 .unbind_tcf = cbq_unbind_filter,
2048 .dump = cbq_dump_class,
2049 .dump_stats = cbq_dump_class_stats,
2050};
2051
20fea08b 2052static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
1da177e4
LT
2053 .next = NULL,
2054 .cl_ops = &cbq_class_ops,
2055 .id = "cbq",
2056 .priv_size = sizeof(struct cbq_sched_data),
2057 .enqueue = cbq_enqueue,
2058 .dequeue = cbq_dequeue,
2059 .requeue = cbq_requeue,
2060 .drop = cbq_drop,
2061 .init = cbq_init,
2062 .reset = cbq_reset,
2063 .destroy = cbq_destroy,
2064 .change = NULL,
2065 .dump = cbq_dump,
2066 .dump_stats = cbq_dump_stats,
2067 .owner = THIS_MODULE,
2068};
2069
2070static int __init cbq_module_init(void)
2071{
2072 return register_qdisc(&cbq_qdisc_ops);
2073}
10297b99 2074static void __exit cbq_module_exit(void)
1da177e4
LT
2075{
2076 unregister_qdisc(&cbq_qdisc_ops);
2077}
2078module_init(cbq_module_init)
2079module_exit(cbq_module_exit)
2080MODULE_LICENSE("GPL");