]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/sched/sch_cbq.c
cbq: remove TCA_CBQ_OVL_STRATEGY support
[mirror_ubuntu-jammy-kernel.git] / net / sched / sch_cbq.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 */
12
1da177e4 13#include <linux/module.h>
5a0e3ad6 14#include <linux/slab.h>
1da177e4
LT
15#include <linux/types.h>
16#include <linux/kernel.h>
1da177e4 17#include <linux/string.h>
1da177e4 18#include <linux/errno.h>
1da177e4 19#include <linux/skbuff.h>
0ba48053 20#include <net/netlink.h>
1da177e4
LT
21#include <net/pkt_sched.h>
22
23
24/* Class-Based Queueing (CBQ) algorithm.
25 =======================================
26
27 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
10297b99 28 Management Models for Packet Networks",
1da177e4
LT
29 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
30
10297b99 31 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
1da177e4 32
10297b99 33 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
1da177e4
LT
34 Parameters", 1996
35
36 [4] Sally Floyd and Michael Speer, "Experimental Results
37 for Class-Based Queueing", 1998, not published.
38
39 -----------------------------------------------------------------------
40
41 Algorithm skeleton was taken from NS simulator cbq.cc.
42 If someone wants to check this code against the LBL version,
43 he should take into account that ONLY the skeleton was borrowed,
44 the implementation is different. Particularly:
45
46 --- The WRR algorithm is different. Our version looks more
10297b99
YH
47 reasonable (I hope) and works when quanta are allowed to be
48 less than MTU, which is always the case when real time classes
49 have small rates. Note, that the statement of [3] is
50 incomplete, delay may actually be estimated even if class
51 per-round allotment is less than MTU. Namely, if per-round
52 allotment is W*r_i, and r_1+...+r_k = r < 1
1da177e4
LT
53
54 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
55
56 In the worst case we have IntServ estimate with D = W*r+k*MTU
57 and C = MTU*r. The proof (if correct at all) is trivial.
58
59
60 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
61 interpret some places, which look like wrong translations
62 from NS. Anyone is advised to find these differences
63 and explain to me, why I am wrong 8).
64
65 --- Linux has no EOI event, so that we cannot estimate true class
66 idle time. Workaround is to consider the next dequeue event
67 as sign that previous packet is finished. This is wrong because of
68 internal device queueing, but on a permanently loaded link it is true.
69 Moreover, combined with clock integrator, this scheme looks
70 very close to an ideal solution. */
71
72struct cbq_sched_data;
73
74
cc7ec456 75struct cbq_class {
d77fea2e 76 struct Qdisc_class_common common;
1da177e4
LT
77 struct cbq_class *next_alive; /* next class with backlog in this priority band */
78
79/* Parameters */
1da177e4
LT
80 unsigned char priority; /* class priority */
81 unsigned char priority2; /* priority to be used after overlimit */
82 unsigned char ewma_log; /* time constant for idle time calculation */
c3bc7cff 83#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
84 unsigned char police;
85#endif
86
87 u32 defmap;
88
89 /* Link-sharing scheduler parameters */
90 long maxidle; /* Class parameters: see below. */
91 long offtime;
92 long minidle;
93 u32 avpkt;
94 struct qdisc_rate_table *R_tab;
95
1da177e4
LT
96 /* General scheduler (WRR) parameters */
97 long allot;
98 long quantum; /* Allotment per WRR round */
99 long weight; /* Relative allotment: see below */
100
101 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
102 struct cbq_class *split; /* Ptr to split node */
103 struct cbq_class *share; /* Ptr to LS parent in the class tree */
104 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
105 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
106 parent otherwise */
107 struct cbq_class *sibling; /* Sibling chain */
108 struct cbq_class *children; /* Pointer to children chain */
109
110 struct Qdisc *q; /* Elementary queueing discipline */
111
112
113/* Variables */
114 unsigned char cpriority; /* Effective priority */
115 unsigned char delayed;
116 unsigned char level; /* level of the class in hierarchy:
117 0 for leaf classes, and maximal
118 level of children + 1 for nodes.
119 */
120
121 psched_time_t last; /* Last end of service */
122 psched_time_t undertime;
123 long avgidle;
124 long deficit; /* Saved deficit for WRR */
1a13cb63 125 psched_time_t penalized;
c1a8f1f1 126 struct gnet_stats_basic_packed bstats;
1da177e4 127 struct gnet_stats_queue qstats;
45203a3b 128 struct gnet_stats_rate_est64 rate_est;
1da177e4
LT
129 struct tc_cbq_xstats xstats;
130
25d8c0d5 131 struct tcf_proto __rcu *filter_list;
1da177e4
LT
132
133 int refcnt;
134 int filters;
135
cc7ec456 136 struct cbq_class *defaults[TC_PRIO_MAX + 1];
1da177e4
LT
137};
138
cc7ec456 139struct cbq_sched_data {
d77fea2e 140 struct Qdisc_class_hash clhash; /* Hash table of all classes */
cc7ec456
ED
141 int nclasses[TC_CBQ_MAXPRIO + 1];
142 unsigned int quanta[TC_CBQ_MAXPRIO + 1];
1da177e4
LT
143
144 struct cbq_class link;
145
cc7ec456
ED
146 unsigned int activemask;
147 struct cbq_class *active[TC_CBQ_MAXPRIO + 1]; /* List of all classes
1da177e4
LT
148 with backlog */
149
c3bc7cff 150#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
151 struct cbq_class *rx_class;
152#endif
153 struct cbq_class *tx_class;
154 struct cbq_class *tx_borrowed;
155 int tx_len;
156 psched_time_t now; /* Cached timestamp */
cc7ec456 157 unsigned int pmask;
1da177e4 158
2fbd3da3 159 struct hrtimer delay_timer;
88a99354 160 struct qdisc_watchdog watchdog; /* Watchdog timer,
1da177e4
LT
161 started when CBQ has
162 backlog, but cannot
163 transmit just now */
88a99354 164 psched_tdiff_t wd_expires;
1da177e4
LT
165 int toplevel;
166 u32 hgenerator;
167};
168
169
cc7ec456 170#define L2T(cl, len) qdisc_l2t((cl)->R_tab, len)
1da177e4 171
cc7ec456 172static inline struct cbq_class *
1da177e4
LT
173cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
174{
d77fea2e 175 struct Qdisc_class_common *clc;
1da177e4 176
d77fea2e
PM
177 clc = qdisc_class_find(&q->clhash, classid);
178 if (clc == NULL)
179 return NULL;
180 return container_of(clc, struct cbq_class, common);
1da177e4
LT
181}
182
c3bc7cff 183#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
184
185static struct cbq_class *
186cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
187{
cc7ec456 188 struct cbq_class *cl;
1da177e4 189
cc7ec456
ED
190 for (cl = this->tparent; cl; cl = cl->tparent) {
191 struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
1da177e4 192
cc7ec456
ED
193 if (new != NULL && new != this)
194 return new;
195 }
1da177e4
LT
196 return NULL;
197}
198
199#endif
200
201/* Classify packet. The procedure is pretty complicated, but
cc7ec456
ED
202 * it allows us to combine link sharing and priority scheduling
203 * transparently.
204 *
205 * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
206 * so that it resolves to split nodes. Then packets are classified
207 * by logical priority, or a more specific classifier may be attached
208 * to the split node.
1da177e4
LT
209 */
210
211static struct cbq_class *
212cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
213{
214 struct cbq_sched_data *q = qdisc_priv(sch);
215 struct cbq_class *head = &q->link;
216 struct cbq_class **defmap;
217 struct cbq_class *cl = NULL;
218 u32 prio = skb->priority;
25d8c0d5 219 struct tcf_proto *fl;
1da177e4
LT
220 struct tcf_result res;
221
222 /*
223 * Step 1. If skb->priority points to one of our classes, use it.
224 */
cc7ec456 225 if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
1da177e4
LT
226 (cl = cbq_class_lookup(q, prio)) != NULL)
227 return cl;
228
c27f339a 229 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1da177e4
LT
230 for (;;) {
231 int result = 0;
232 defmap = head->defaults;
233
25d8c0d5 234 fl = rcu_dereference_bh(head->filter_list);
1da177e4
LT
235 /*
236 * Step 2+n. Apply classifier.
237 */
3b3ae880 238 result = tc_classify(skb, fl, &res, true);
25d8c0d5 239 if (!fl || result < 0)
1da177e4
LT
240 goto fallback;
241
cc7ec456
ED
242 cl = (void *)res.class;
243 if (!cl) {
1da177e4
LT
244 if (TC_H_MAJ(res.classid))
245 cl = cbq_class_lookup(q, res.classid);
cc7ec456 246 else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
1da177e4
LT
247 cl = defmap[TC_PRIO_BESTEFFORT];
248
bdfc87f7 249 if (cl == NULL)
1da177e4
LT
250 goto fallback;
251 }
bdfc87f7
ED
252 if (cl->level >= head->level)
253 goto fallback;
1da177e4
LT
254#ifdef CONFIG_NET_CLS_ACT
255 switch (result) {
256 case TC_ACT_QUEUED:
10297b99 257 case TC_ACT_STOLEN:
378a2f09 258 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
1da177e4
LT
259 case TC_ACT_SHOT:
260 return NULL;
73ca4918
PM
261 case TC_ACT_RECLASSIFY:
262 return cbq_reclassify(skb, cl);
1da177e4 263 }
1da177e4
LT
264#endif
265 if (cl->level == 0)
266 return cl;
267
268 /*
269 * Step 3+n. If classifier selected a link sharing class,
270 * apply agency specific classifier.
271 * Repeat this procdure until we hit a leaf node.
272 */
273 head = cl;
274 }
275
276fallback:
277 cl = head;
278
279 /*
280 * Step 4. No success...
281 */
282 if (TC_H_MAJ(prio) == 0 &&
cc7ec456 283 !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
1da177e4
LT
284 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
285 return head;
286
287 return cl;
288}
289
290/*
cc7ec456
ED
291 * A packet has just been enqueued on the empty class.
292 * cbq_activate_class adds it to the tail of active class list
293 * of its priority band.
1da177e4
LT
294 */
295
cc7ec456 296static inline void cbq_activate_class(struct cbq_class *cl)
1da177e4
LT
297{
298 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
299 int prio = cl->cpriority;
300 struct cbq_class *cl_tail;
301
302 cl_tail = q->active[prio];
303 q->active[prio] = cl;
304
305 if (cl_tail != NULL) {
306 cl->next_alive = cl_tail->next_alive;
307 cl_tail->next_alive = cl;
308 } else {
309 cl->next_alive = cl;
310 q->activemask |= (1<<prio);
311 }
312}
313
314/*
cc7ec456
ED
315 * Unlink class from active chain.
316 * Note that this same procedure is done directly in cbq_dequeue*
317 * during round-robin procedure.
1da177e4
LT
318 */
319
320static void cbq_deactivate_class(struct cbq_class *this)
321{
322 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
323 int prio = this->cpriority;
324 struct cbq_class *cl;
325 struct cbq_class *cl_prev = q->active[prio];
326
327 do {
328 cl = cl_prev->next_alive;
329 if (cl == this) {
330 cl_prev->next_alive = cl->next_alive;
331 cl->next_alive = NULL;
332
333 if (cl == q->active[prio]) {
334 q->active[prio] = cl_prev;
335 if (cl == q->active[prio]) {
336 q->active[prio] = NULL;
337 q->activemask &= ~(1<<prio);
338 return;
339 }
340 }
1da177e4
LT
341 return;
342 }
343 } while ((cl_prev = cl) != q->active[prio]);
344}
345
346static void
347cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
348{
349 int toplevel = q->toplevel;
350
fd245a4a 351 if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
7201c1dd 352 psched_time_t now = psched_get_time();
1da177e4
LT
353
354 do {
104e0878 355 if (cl->undertime < now) {
1da177e4
LT
356 q->toplevel = cl->level;
357 return;
358 }
cc7ec456 359 } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
1da177e4
LT
360 }
361}
362
363static int
364cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
365{
366 struct cbq_sched_data *q = qdisc_priv(sch);
ddeee3ce 367 int uninitialized_var(ret);
1da177e4
LT
368 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
369
c3bc7cff 370#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
371 q->rx_class = cl;
372#endif
373 if (cl == NULL) {
c27f339a 374 if (ret & __NET_XMIT_BYPASS)
25331d6c 375 qdisc_qstats_drop(sch);
1da177e4
LT
376 kfree_skb(skb);
377 return ret;
378 }
379
c3bc7cff 380#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
381 cl->q->__parent = sch;
382#endif
5f86173b
JK
383 ret = qdisc_enqueue(skb, cl->q);
384 if (ret == NET_XMIT_SUCCESS) {
1da177e4 385 sch->q.qlen++;
1da177e4
LT
386 cbq_mark_toplevel(q, cl);
387 if (!cl->next_alive)
388 cbq_activate_class(cl);
389 return ret;
390 }
391
378a2f09 392 if (net_xmit_drop_count(ret)) {
25331d6c 393 qdisc_qstats_drop(sch);
378a2f09
JP
394 cbq_mark_toplevel(q, cl);
395 cl->qstats.drops++;
396 }
1da177e4
LT
397 return ret;
398}
399
c3498d34
FW
400/* Overlimit action: penalize leaf class by adding offtime */
401static void cbq_overlimit(struct cbq_class *cl)
1da177e4
LT
402{
403 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
8edc0c31 404 psched_tdiff_t delay = cl->undertime - q->now;
1da177e4
LT
405
406 if (!cl->delayed) {
407 delay += cl->offtime;
408
10297b99 409 /*
cc7ec456
ED
410 * Class goes to sleep, so that it will have no
411 * chance to work avgidle. Let's forgive it 8)
412 *
413 * BTW cbq-2.0 has a crap in this
414 * place, apparently they forgot to shift it by cl->ewma_log.
1da177e4
LT
415 */
416 if (cl->avgidle < 0)
417 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
418 if (cl->avgidle < cl->minidle)
419 cl->avgidle = cl->minidle;
420 if (delay <= 0)
421 delay = 1;
7c59e25f 422 cl->undertime = q->now + delay;
1da177e4
LT
423
424 cl->xstats.overactions++;
425 cl->delayed = 1;
426 }
427 if (q->wd_expires == 0 || q->wd_expires > delay)
428 q->wd_expires = delay;
429
430 /* Dirty work! We must schedule wakeups based on
cc7ec456
ED
431 * real available rate, rather than leaf rate,
432 * which may be tiny (even zero).
1da177e4
LT
433 */
434 if (q->toplevel == TC_CBQ_MAXLEVEL) {
435 struct cbq_class *b;
436 psched_tdiff_t base_delay = q->wd_expires;
437
438 for (b = cl->borrow; b; b = b->borrow) {
8edc0c31 439 delay = b->undertime - q->now;
1da177e4
LT
440 if (delay < base_delay) {
441 if (delay <= 0)
442 delay = 1;
443 base_delay = delay;
444 }
445 }
446
447 q->wd_expires = base_delay;
448 }
449}
450
1a13cb63
PM
451static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
452 psched_time_t now)
1da177e4
LT
453{
454 struct cbq_class *cl;
455 struct cbq_class *cl_prev = q->active[prio];
1a13cb63 456 psched_time_t sched = now;
1da177e4
LT
457
458 if (cl_prev == NULL)
e9054a33 459 return 0;
1da177e4
LT
460
461 do {
462 cl = cl_prev->next_alive;
1a13cb63 463 if (now - cl->penalized > 0) {
1da177e4
LT
464 cl_prev->next_alive = cl->next_alive;
465 cl->next_alive = NULL;
466 cl->cpriority = cl->priority;
467 cl->delayed = 0;
468 cbq_activate_class(cl);
469
470 if (cl == q->active[prio]) {
471 q->active[prio] = cl_prev;
472 if (cl == q->active[prio]) {
473 q->active[prio] = NULL;
474 return 0;
475 }
476 }
477
478 cl = cl_prev->next_alive;
1a13cb63 479 } else if (sched - cl->penalized > 0)
1da177e4
LT
480 sched = cl->penalized;
481 } while ((cl_prev = cl) != q->active[prio]);
482
1a13cb63 483 return sched - now;
1da177e4
LT
484}
485
1a13cb63 486static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
1da177e4 487{
1a13cb63 488 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
2fbd3da3 489 delay_timer);
1a13cb63
PM
490 struct Qdisc *sch = q->watchdog.qdisc;
491 psched_time_t now;
492 psched_tdiff_t delay = 0;
cc7ec456 493 unsigned int pmask;
1da177e4 494
3bebcda2 495 now = psched_get_time();
1a13cb63 496
1da177e4
LT
497 pmask = q->pmask;
498 q->pmask = 0;
499
500 while (pmask) {
501 int prio = ffz(~pmask);
1a13cb63 502 psched_tdiff_t tmp;
1da177e4
LT
503
504 pmask &= ~(1<<prio);
505
1a13cb63 506 tmp = cbq_undelay_prio(q, prio, now);
1da177e4
LT
507 if (tmp > 0) {
508 q->pmask |= 1<<prio;
509 if (tmp < delay || delay == 0)
510 delay = tmp;
511 }
512 }
513
514 if (delay) {
1a13cb63
PM
515 ktime_t time;
516
517 time = ktime_set(0, 0);
ca44d6e6 518 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
4a8e320c 519 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS_PINNED);
1da177e4
LT
520 }
521
fd245a4a 522 qdisc_unthrottled(sch);
8608db03 523 __netif_schedule(qdisc_root(sch));
1a13cb63 524 return HRTIMER_NORESTART;
1da177e4
LT
525}
526
c3bc7cff 527#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
528static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
529{
1da177e4
LT
530 struct Qdisc *sch = child->__parent;
531 struct cbq_sched_data *q = qdisc_priv(sch);
532 struct cbq_class *cl = q->rx_class;
533
534 q->rx_class = NULL;
535
536 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
378a2f09 537 int ret;
1da177e4
LT
538
539 cbq_mark_toplevel(q, cl);
540
541 q->rx_class = cl;
542 cl->q->__parent = sch;
543
378a2f09
JP
544 ret = qdisc_enqueue(skb, cl->q);
545 if (ret == NET_XMIT_SUCCESS) {
1da177e4 546 sch->q.qlen++;
1da177e4
LT
547 if (!cl->next_alive)
548 cbq_activate_class(cl);
549 return 0;
550 }
378a2f09 551 if (net_xmit_drop_count(ret))
25331d6c 552 qdisc_qstats_drop(sch);
1da177e4
LT
553 return 0;
554 }
555
25331d6c 556 qdisc_qstats_drop(sch);
1da177e4
LT
557 return -1;
558}
559#endif
560
10297b99 561/*
cc7ec456
ED
562 * It is mission critical procedure.
563 *
564 * We "regenerate" toplevel cutoff, if transmitting class
565 * has backlog and it is not regulated. It is not part of
566 * original CBQ description, but looks more reasonable.
567 * Probably, it is wrong. This question needs further investigation.
568 */
1da177e4 569
cc7ec456 570static inline void
1da177e4
LT
571cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
572 struct cbq_class *borrowed)
573{
574 if (cl && q->toplevel >= borrowed->level) {
575 if (cl->q->q.qlen > 1) {
576 do {
a084980d 577 if (borrowed->undertime == PSCHED_PASTPERFECT) {
1da177e4
LT
578 q->toplevel = borrowed->level;
579 return;
580 }
cc7ec456 581 } while ((borrowed = borrowed->borrow) != NULL);
1da177e4 582 }
10297b99 583#if 0
1da177e4
LT
584 /* It is not necessary now. Uncommenting it
585 will save CPU cycles, but decrease fairness.
586 */
587 q->toplevel = TC_CBQ_MAXLEVEL;
588#endif
589 }
590}
591
592static void
593cbq_update(struct cbq_sched_data *q)
594{
595 struct cbq_class *this = q->tx_class;
596 struct cbq_class *cl = this;
597 int len = q->tx_len;
73d0f37a 598 psched_time_t now;
1da177e4
LT
599
600 q->tx_class = NULL;
73d0f37a
VA
601 /* Time integrator. We calculate EOS time
602 * by adding expected packet transmission time.
603 */
604 now = q->now + L2T(&q->link, len);
1da177e4
LT
605
606 for ( ; cl; cl = cl->share) {
607 long avgidle = cl->avgidle;
608 long idle;
609
610 cl->bstats.packets++;
611 cl->bstats.bytes += len;
612
613 /*
cc7ec456
ED
614 * (now - last) is total time between packet right edges.
615 * (last_pktlen/rate) is "virtual" busy time, so that
616 *
617 * idle = (now - last) - last_pktlen/rate
1da177e4
LT
618 */
619
73d0f37a 620 idle = now - cl->last;
1da177e4
LT
621 if ((unsigned long)idle > 128*1024*1024) {
622 avgidle = cl->maxidle;
623 } else {
624 idle -= L2T(cl, len);
625
626 /* true_avgidle := (1-W)*true_avgidle + W*idle,
cc7ec456
ED
627 * where W=2^{-ewma_log}. But cl->avgidle is scaled:
628 * cl->avgidle == true_avgidle/W,
629 * hence:
1da177e4
LT
630 */
631 avgidle += idle - (avgidle>>cl->ewma_log);
632 }
633
634 if (avgidle <= 0) {
635 /* Overlimit or at-limit */
636
637 if (avgidle < cl->minidle)
638 avgidle = cl->minidle;
639
640 cl->avgidle = avgidle;
641
642 /* Calculate expected time, when this class
cc7ec456
ED
643 * will be allowed to send.
644 * It will occur, when:
645 * (1-W)*true_avgidle + W*delay = 0, i.e.
646 * idle = (1/W - 1)*(-true_avgidle)
647 * or
648 * idle = (1 - W)*(-cl->avgidle);
1da177e4
LT
649 */
650 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
651
652 /*
cc7ec456
ED
653 * That is not all.
654 * To maintain the rate allocated to the class,
655 * we add to undertime virtual clock,
656 * necessary to complete transmitted packet.
657 * (len/phys_bandwidth has been already passed
658 * to the moment of cbq_update)
1da177e4
LT
659 */
660
661 idle -= L2T(&q->link, len);
662 idle += L2T(cl, len);
663
73d0f37a 664 cl->undertime = now + idle;
1da177e4
LT
665 } else {
666 /* Underlimit */
667
a084980d 668 cl->undertime = PSCHED_PASTPERFECT;
1da177e4
LT
669 if (avgidle > cl->maxidle)
670 cl->avgidle = cl->maxidle;
671 else
672 cl->avgidle = avgidle;
673 }
73d0f37a
VA
674 if ((s64)(now - cl->last) > 0)
675 cl->last = now;
1da177e4
LT
676 }
677
678 cbq_update_toplevel(q, this, q->tx_borrowed);
679}
680
cc7ec456 681static inline struct cbq_class *
1da177e4
LT
682cbq_under_limit(struct cbq_class *cl)
683{
684 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
685 struct cbq_class *this_cl = cl;
686
687 if (cl->tparent == NULL)
688 return cl;
689
a084980d 690 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
1da177e4
LT
691 cl->delayed = 0;
692 return cl;
693 }
694
695 do {
696 /* It is very suspicious place. Now overlimit
cc7ec456
ED
697 * action is generated for not bounded classes
698 * only if link is completely congested.
699 * Though it is in agree with ancestor-only paradigm,
700 * it looks very stupid. Particularly,
701 * it means that this chunk of code will either
702 * never be called or result in strong amplification
703 * of burstiness. Dangerous, silly, and, however,
704 * no another solution exists.
1da177e4 705 */
cc7ec456
ED
706 cl = cl->borrow;
707 if (!cl) {
1da177e4 708 this_cl->qstats.overlimits++;
c3498d34 709 cbq_overlimit(this_cl);
1da177e4
LT
710 return NULL;
711 }
712 if (cl->level > q->toplevel)
713 return NULL;
a084980d 714 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
1da177e4
LT
715
716 cl->delayed = 0;
717 return cl;
718}
719
cc7ec456 720static inline struct sk_buff *
1da177e4
LT
721cbq_dequeue_prio(struct Qdisc *sch, int prio)
722{
723 struct cbq_sched_data *q = qdisc_priv(sch);
724 struct cbq_class *cl_tail, *cl_prev, *cl;
725 struct sk_buff *skb;
726 int deficit;
727
728 cl_tail = cl_prev = q->active[prio];
729 cl = cl_prev->next_alive;
730
731 do {
732 deficit = 0;
733
734 /* Start round */
735 do {
736 struct cbq_class *borrow = cl;
737
738 if (cl->q->q.qlen &&
739 (borrow = cbq_under_limit(cl)) == NULL)
740 goto skip_class;
741
742 if (cl->deficit <= 0) {
743 /* Class exhausted its allotment per
cc7ec456 744 * this round. Switch to the next one.
1da177e4
LT
745 */
746 deficit = 1;
747 cl->deficit += cl->quantum;
748 goto next_class;
749 }
750
751 skb = cl->q->dequeue(cl->q);
752
753 /* Class did not give us any skb :-(
cc7ec456
ED
754 * It could occur even if cl->q->q.qlen != 0
755 * f.e. if cl->q == "tbf"
1da177e4
LT
756 */
757 if (skb == NULL)
758 goto skip_class;
759
0abf77e5 760 cl->deficit -= qdisc_pkt_len(skb);
1da177e4
LT
761 q->tx_class = cl;
762 q->tx_borrowed = borrow;
763 if (borrow != cl) {
764#ifndef CBQ_XSTATS_BORROWS_BYTES
765 borrow->xstats.borrows++;
766 cl->xstats.borrows++;
767#else
0abf77e5
JK
768 borrow->xstats.borrows += qdisc_pkt_len(skb);
769 cl->xstats.borrows += qdisc_pkt_len(skb);
1da177e4
LT
770#endif
771 }
0abf77e5 772 q->tx_len = qdisc_pkt_len(skb);
1da177e4
LT
773
774 if (cl->deficit <= 0) {
775 q->active[prio] = cl;
776 cl = cl->next_alive;
777 cl->deficit += cl->quantum;
778 }
779 return skb;
780
781skip_class:
782 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
783 /* Class is empty or penalized.
cc7ec456 784 * Unlink it from active chain.
1da177e4
LT
785 */
786 cl_prev->next_alive = cl->next_alive;
787 cl->next_alive = NULL;
788
789 /* Did cl_tail point to it? */
790 if (cl == cl_tail) {
791 /* Repair it! */
792 cl_tail = cl_prev;
793
794 /* Was it the last class in this band? */
795 if (cl == cl_tail) {
796 /* Kill the band! */
797 q->active[prio] = NULL;
798 q->activemask &= ~(1<<prio);
799 if (cl->q->q.qlen)
800 cbq_activate_class(cl);
801 return NULL;
802 }
803
804 q->active[prio] = cl_tail;
805 }
806 if (cl->q->q.qlen)
807 cbq_activate_class(cl);
808
809 cl = cl_prev;
810 }
811
812next_class:
813 cl_prev = cl;
814 cl = cl->next_alive;
815 } while (cl_prev != cl_tail);
816 } while (deficit);
817
818 q->active[prio] = cl_prev;
819
820 return NULL;
821}
822
cc7ec456 823static inline struct sk_buff *
1da177e4
LT
824cbq_dequeue_1(struct Qdisc *sch)
825{
826 struct cbq_sched_data *q = qdisc_priv(sch);
827 struct sk_buff *skb;
cc7ec456 828 unsigned int activemask;
1da177e4 829
cc7ec456 830 activemask = q->activemask & 0xFF;
1da177e4
LT
831 while (activemask) {
832 int prio = ffz(~activemask);
833 activemask &= ~(1<<prio);
834 skb = cbq_dequeue_prio(sch, prio);
835 if (skb)
836 return skb;
837 }
838 return NULL;
839}
840
841static struct sk_buff *
842cbq_dequeue(struct Qdisc *sch)
843{
844 struct sk_buff *skb;
845 struct cbq_sched_data *q = qdisc_priv(sch);
846 psched_time_t now;
1da177e4 847
3bebcda2 848 now = psched_get_time();
73d0f37a
VA
849
850 if (q->tx_class)
1da177e4 851 cbq_update(q);
73d0f37a
VA
852
853 q->now = now;
1da177e4
LT
854
855 for (;;) {
856 q->wd_expires = 0;
857
858 skb = cbq_dequeue_1(sch);
859 if (skb) {
9190b3b3 860 qdisc_bstats_update(sch, skb);
1da177e4 861 sch->q.qlen--;
fd245a4a 862 qdisc_unthrottled(sch);
1da177e4
LT
863 return skb;
864 }
865
866 /* All the classes are overlimit.
cc7ec456
ED
867 *
868 * It is possible, if:
869 *
870 * 1. Scheduler is empty.
871 * 2. Toplevel cutoff inhibited borrowing.
872 * 3. Root class is overlimit.
873 *
874 * Reset 2d and 3d conditions and retry.
875 *
876 * Note, that NS and cbq-2.0 are buggy, peeking
877 * an arbitrary class is appropriate for ancestor-only
878 * sharing, but not for toplevel algorithm.
879 *
880 * Our version is better, but slower, because it requires
881 * two passes, but it is unavoidable with top-level sharing.
882 */
1da177e4
LT
883
884 if (q->toplevel == TC_CBQ_MAXLEVEL &&
a084980d 885 q->link.undertime == PSCHED_PASTPERFECT)
1da177e4
LT
886 break;
887
888 q->toplevel = TC_CBQ_MAXLEVEL;
a084980d 889 q->link.undertime = PSCHED_PASTPERFECT;
1da177e4
LT
890 }
891
892 /* No packets in scheduler or nobody wants to give them to us :-(
cc7ec456
ED
893 * Sigh... start watchdog timer in the last case.
894 */
1da177e4
LT
895
896 if (sch->q.qlen) {
25331d6c 897 qdisc_qstats_overlimit(sch);
88a99354
PM
898 if (q->wd_expires)
899 qdisc_watchdog_schedule(&q->watchdog,
bb239acf 900 now + q->wd_expires);
1da177e4
LT
901 }
902 return NULL;
903}
904
905/* CBQ class maintanance routines */
906
907static void cbq_adjust_levels(struct cbq_class *this)
908{
909 if (this == NULL)
910 return;
911
912 do {
913 int level = 0;
914 struct cbq_class *cl;
915
cc7ec456
ED
916 cl = this->children;
917 if (cl) {
1da177e4
LT
918 do {
919 if (cl->level > level)
920 level = cl->level;
921 } while ((cl = cl->sibling) != this->children);
922 }
cc7ec456 923 this->level = level + 1;
1da177e4
LT
924 } while ((this = this->tparent) != NULL);
925}
926
927static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
928{
929 struct cbq_class *cl;
d77fea2e 930 unsigned int h;
1da177e4
LT
931
932 if (q->quanta[prio] == 0)
933 return;
934
d77fea2e 935 for (h = 0; h < q->clhash.hashsize; h++) {
b67bfe0d 936 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
1da177e4 937 /* BUGGGG... Beware! This expression suffer of
cc7ec456 938 * arithmetic overflows!
1da177e4
LT
939 */
940 if (cl->priority == prio) {
941 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
942 q->quanta[prio];
943 }
833fa743
YY
944 if (cl->quantum <= 0 ||
945 cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) {
c17988a9
YY
946 pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n",
947 cl->common.classid, cl->quantum);
5ce2d488 948 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1da177e4
LT
949 }
950 }
951 }
952}
953
954static void cbq_sync_defmap(struct cbq_class *cl)
955{
956 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
957 struct cbq_class *split = cl->split;
cc7ec456 958 unsigned int h;
1da177e4
LT
959 int i;
960
961 if (split == NULL)
962 return;
963
cc7ec456
ED
964 for (i = 0; i <= TC_PRIO_MAX; i++) {
965 if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
1da177e4
LT
966 split->defaults[i] = NULL;
967 }
968
cc7ec456 969 for (i = 0; i <= TC_PRIO_MAX; i++) {
1da177e4
LT
970 int level = split->level;
971
972 if (split->defaults[i])
973 continue;
974
d77fea2e 975 for (h = 0; h < q->clhash.hashsize; h++) {
1da177e4
LT
976 struct cbq_class *c;
977
b67bfe0d 978 hlist_for_each_entry(c, &q->clhash.hash[h],
d77fea2e 979 common.hnode) {
1da177e4 980 if (c->split == split && c->level < level &&
cc7ec456 981 c->defmap & (1<<i)) {
1da177e4
LT
982 split->defaults[i] = c;
983 level = c->level;
984 }
985 }
986 }
987 }
988}
989
990static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
991{
992 struct cbq_class *split = NULL;
993
994 if (splitid == 0) {
cc7ec456
ED
995 split = cl->split;
996 if (!split)
1da177e4 997 return;
d77fea2e 998 splitid = split->common.classid;
1da177e4
LT
999 }
1000
d77fea2e 1001 if (split == NULL || split->common.classid != splitid) {
1da177e4 1002 for (split = cl->tparent; split; split = split->tparent)
d77fea2e 1003 if (split->common.classid == splitid)
1da177e4
LT
1004 break;
1005 }
1006
1007 if (split == NULL)
1008 return;
1009
1010 if (cl->split != split) {
1011 cl->defmap = 0;
1012 cbq_sync_defmap(cl);
1013 cl->split = split;
cc7ec456 1014 cl->defmap = def & mask;
1da177e4 1015 } else
cc7ec456 1016 cl->defmap = (cl->defmap & ~mask) | (def & mask);
1da177e4
LT
1017
1018 cbq_sync_defmap(cl);
1019}
1020
1021static void cbq_unlink_class(struct cbq_class *this)
1022{
1023 struct cbq_class *cl, **clp;
1024 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1025
d77fea2e 1026 qdisc_class_hash_remove(&q->clhash, &this->common);
1da177e4
LT
1027
1028 if (this->tparent) {
cc7ec456 1029 clp = &this->sibling;
1da177e4
LT
1030 cl = *clp;
1031 do {
1032 if (cl == this) {
1033 *clp = cl->sibling;
1034 break;
1035 }
1036 clp = &cl->sibling;
1037 } while ((cl = *clp) != this->sibling);
1038
1039 if (this->tparent->children == this) {
1040 this->tparent->children = this->sibling;
1041 if (this->sibling == this)
1042 this->tparent->children = NULL;
1043 }
1044 } else {
547b792c 1045 WARN_ON(this->sibling != this);
1da177e4
LT
1046 }
1047}
1048
1049static void cbq_link_class(struct cbq_class *this)
1050{
1051 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1da177e4
LT
1052 struct cbq_class *parent = this->tparent;
1053
1054 this->sibling = this;
d77fea2e 1055 qdisc_class_hash_insert(&q->clhash, &this->common);
1da177e4
LT
1056
1057 if (parent == NULL)
1058 return;
1059
1060 if (parent->children == NULL) {
1061 parent->children = this;
1062 } else {
1063 this->sibling = parent->children->sibling;
1064 parent->children->sibling = this;
1065 }
1066}
1067
cc7ec456 1068static unsigned int cbq_drop(struct Qdisc *sch)
1da177e4
LT
1069{
1070 struct cbq_sched_data *q = qdisc_priv(sch);
1071 struct cbq_class *cl, *cl_head;
1072 int prio;
1073 unsigned int len;
1074
1075 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
cc7ec456
ED
1076 cl_head = q->active[prio];
1077 if (!cl_head)
1da177e4
LT
1078 continue;
1079
1080 cl = cl_head;
1081 do {
1082 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1083 sch->q.qlen--;
a37ef2e3
JP
1084 if (!cl->q->q.qlen)
1085 cbq_deactivate_class(cl);
1da177e4
LT
1086 return len;
1087 }
1088 } while ((cl = cl->next_alive) != cl_head);
1089 }
1090 return 0;
1091}
1092
1093static void
cc7ec456 1094cbq_reset(struct Qdisc *sch)
1da177e4
LT
1095{
1096 struct cbq_sched_data *q = qdisc_priv(sch);
1097 struct cbq_class *cl;
1098 int prio;
cc7ec456 1099 unsigned int h;
1da177e4
LT
1100
1101 q->activemask = 0;
1102 q->pmask = 0;
1103 q->tx_class = NULL;
1104 q->tx_borrowed = NULL;
88a99354 1105 qdisc_watchdog_cancel(&q->watchdog);
2fbd3da3 1106 hrtimer_cancel(&q->delay_timer);
1da177e4 1107 q->toplevel = TC_CBQ_MAXLEVEL;
3bebcda2 1108 q->now = psched_get_time();
1da177e4
LT
1109
1110 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1111 q->active[prio] = NULL;
1112
d77fea2e 1113 for (h = 0; h < q->clhash.hashsize; h++) {
b67bfe0d 1114 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
1da177e4
LT
1115 qdisc_reset(cl->q);
1116
1117 cl->next_alive = NULL;
a084980d 1118 cl->undertime = PSCHED_PASTPERFECT;
1da177e4
LT
1119 cl->avgidle = cl->maxidle;
1120 cl->deficit = cl->quantum;
1121 cl->cpriority = cl->priority;
1122 }
1123 }
1124 sch->q.qlen = 0;
1125}
1126
1127
1128static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1129{
cc7ec456
ED
1130 if (lss->change & TCF_CBQ_LSS_FLAGS) {
1131 cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1132 cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1da177e4 1133 }
cc7ec456 1134 if (lss->change & TCF_CBQ_LSS_EWMA)
1da177e4 1135 cl->ewma_log = lss->ewma_log;
cc7ec456 1136 if (lss->change & TCF_CBQ_LSS_AVPKT)
1da177e4 1137 cl->avpkt = lss->avpkt;
cc7ec456 1138 if (lss->change & TCF_CBQ_LSS_MINIDLE)
1da177e4 1139 cl->minidle = -(long)lss->minidle;
cc7ec456 1140 if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
1da177e4
LT
1141 cl->maxidle = lss->maxidle;
1142 cl->avgidle = lss->maxidle;
1143 }
cc7ec456 1144 if (lss->change & TCF_CBQ_LSS_OFFTIME)
1da177e4
LT
1145 cl->offtime = lss->offtime;
1146 return 0;
1147}
1148
1149static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1150{
1151 q->nclasses[cl->priority]--;
1152 q->quanta[cl->priority] -= cl->weight;
1153 cbq_normalize_quanta(q, cl->priority);
1154}
1155
1156static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1157{
1158 q->nclasses[cl->priority]++;
1159 q->quanta[cl->priority] += cl->weight;
1160 cbq_normalize_quanta(q, cl->priority);
1161}
1162
1163static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1164{
1165 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1166
1167 if (wrr->allot)
1168 cl->allot = wrr->allot;
1169 if (wrr->weight)
1170 cl->weight = wrr->weight;
1171 if (wrr->priority) {
cc7ec456 1172 cl->priority = wrr->priority - 1;
1da177e4
LT
1173 cl->cpriority = cl->priority;
1174 if (cl->priority >= cl->priority2)
cc7ec456 1175 cl->priority2 = TC_CBQ_MAXPRIO - 1;
1da177e4
LT
1176 }
1177
1178 cbq_addprio(q, cl);
1179 return 0;
1180}
1181
c3bc7cff 1182#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1183static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1184{
1185 cl->police = p->police;
1186
1187 if (cl->q->handle) {
1188 if (p->police == TC_POLICE_RECLASSIFY)
1189 cl->q->reshape_fail = cbq_reshape_fail;
1190 else
1191 cl->q->reshape_fail = NULL;
1192 }
1193 return 0;
1194}
1195#endif
1196
1197static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1198{
1199 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1200 return 0;
1201}
1202
27a3421e
PM
1203static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1204 [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
1205 [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
1206 [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
1207 [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
1208 [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
1209 [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1210 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
1211};
1212
1e90474c 1213static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
1214{
1215 struct cbq_sched_data *q = qdisc_priv(sch);
1e90474c 1216 struct nlattr *tb[TCA_CBQ_MAX + 1];
1da177e4 1217 struct tc_ratespec *r;
cee63723
PM
1218 int err;
1219
27a3421e 1220 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
cee63723
PM
1221 if (err < 0)
1222 return err;
1da177e4 1223
27a3421e 1224 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1da177e4
LT
1225 return -EINVAL;
1226
1e90474c 1227 r = nla_data(tb[TCA_CBQ_RATE]);
1da177e4 1228
1e90474c 1229 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1da177e4
LT
1230 return -EINVAL;
1231
d77fea2e
PM
1232 err = qdisc_class_hash_init(&q->clhash);
1233 if (err < 0)
1234 goto put_rtab;
1235
1da177e4
LT
1236 q->link.refcnt = 1;
1237 q->link.sibling = &q->link;
d77fea2e 1238 q->link.common.classid = sch->handle;
1da177e4 1239 q->link.qdisc = sch;
3511c913
CG
1240 q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1241 sch->handle);
1242 if (!q->link.q)
1da177e4
LT
1243 q->link.q = &noop_qdisc;
1244
cc7ec456
ED
1245 q->link.priority = TC_CBQ_MAXPRIO - 1;
1246 q->link.priority2 = TC_CBQ_MAXPRIO - 1;
1247 q->link.cpriority = TC_CBQ_MAXPRIO - 1;
5ce2d488 1248 q->link.allot = psched_mtu(qdisc_dev(sch));
1da177e4
LT
1249 q->link.quantum = q->link.allot;
1250 q->link.weight = q->link.R_tab->rate.rate;
1251
1252 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1253 q->link.avpkt = q->link.allot/2;
1254 q->link.minidle = -0x7FFFFFFF;
1da177e4 1255
88a99354 1256 qdisc_watchdog_init(&q->watchdog, sch);
4a8e320c 1257 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
1da177e4
LT
1258 q->delay_timer.function = cbq_undelay;
1259 q->toplevel = TC_CBQ_MAXLEVEL;
3bebcda2 1260 q->now = psched_get_time();
1da177e4
LT
1261
1262 cbq_link_class(&q->link);
1263
1e90474c
PM
1264 if (tb[TCA_CBQ_LSSOPT])
1265 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1da177e4
LT
1266
1267 cbq_addprio(q, &q->link);
1268 return 0;
d77fea2e
PM
1269
1270put_rtab:
1271 qdisc_put_rtab(q->link.R_tab);
1272 return err;
1da177e4
LT
1273}
1274
cc7ec456 1275static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1da177e4 1276{
27a884dc 1277 unsigned char *b = skb_tail_pointer(skb);
1da177e4 1278
1b34ec43
DM
1279 if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate))
1280 goto nla_put_failure;
1da177e4
LT
1281 return skb->len;
1282
1e90474c 1283nla_put_failure:
dc5fc579 1284 nlmsg_trim(skb, b);
1da177e4
LT
1285 return -1;
1286}
1287
cc7ec456 1288static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1da177e4 1289{
27a884dc 1290 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1291 struct tc_cbq_lssopt opt;
1292
1293 opt.flags = 0;
1294 if (cl->borrow == NULL)
1295 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1296 if (cl->share == NULL)
1297 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1298 opt.ewma_log = cl->ewma_log;
1299 opt.level = cl->level;
1300 opt.avpkt = cl->avpkt;
1301 opt.maxidle = cl->maxidle;
1302 opt.minidle = (u32)(-cl->minidle);
1303 opt.offtime = cl->offtime;
1304 opt.change = ~0;
1b34ec43
DM
1305 if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt))
1306 goto nla_put_failure;
1da177e4
LT
1307 return skb->len;
1308
1e90474c 1309nla_put_failure:
dc5fc579 1310 nlmsg_trim(skb, b);
1da177e4
LT
1311 return -1;
1312}
1313
cc7ec456 1314static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1da177e4 1315{
27a884dc 1316 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1317 struct tc_cbq_wrropt opt;
1318
a0db856a 1319 memset(&opt, 0, sizeof(opt));
1da177e4
LT
1320 opt.flags = 0;
1321 opt.allot = cl->allot;
cc7ec456
ED
1322 opt.priority = cl->priority + 1;
1323 opt.cpriority = cl->cpriority + 1;
1da177e4 1324 opt.weight = cl->weight;
1b34ec43
DM
1325 if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt))
1326 goto nla_put_failure;
1da177e4
LT
1327 return skb->len;
1328
1e90474c 1329nla_put_failure:
dc5fc579 1330 nlmsg_trim(skb, b);
1da177e4
LT
1331 return -1;
1332}
1333
cc7ec456 1334static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1da177e4 1335{
27a884dc 1336 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1337 struct tc_cbq_fopt opt;
1338
1339 if (cl->split || cl->defmap) {
d77fea2e 1340 opt.split = cl->split ? cl->split->common.classid : 0;
1da177e4
LT
1341 opt.defmap = cl->defmap;
1342 opt.defchange = ~0;
1b34ec43
DM
1343 if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt))
1344 goto nla_put_failure;
1da177e4
LT
1345 }
1346 return skb->len;
1347
1e90474c 1348nla_put_failure:
dc5fc579 1349 nlmsg_trim(skb, b);
1da177e4
LT
1350 return -1;
1351}
1352
c3bc7cff 1353#ifdef CONFIG_NET_CLS_ACT
cc7ec456 1354static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1da177e4 1355{
27a884dc 1356 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1357 struct tc_cbq_police opt;
1358
1359 if (cl->police) {
1360 opt.police = cl->police;
9ef1d4c7
PM
1361 opt.__res1 = 0;
1362 opt.__res2 = 0;
1b34ec43
DM
1363 if (nla_put(skb, TCA_CBQ_POLICE, sizeof(opt), &opt))
1364 goto nla_put_failure;
1da177e4
LT
1365 }
1366 return skb->len;
1367
1e90474c 1368nla_put_failure:
dc5fc579 1369 nlmsg_trim(skb, b);
1da177e4
LT
1370 return -1;
1371}
1372#endif
1373
1374static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1375{
1376 if (cbq_dump_lss(skb, cl) < 0 ||
1377 cbq_dump_rate(skb, cl) < 0 ||
1378 cbq_dump_wrr(skb, cl) < 0 ||
c3bc7cff 1379#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1380 cbq_dump_police(skb, cl) < 0 ||
1381#endif
1382 cbq_dump_fopt(skb, cl) < 0)
1383 return -1;
1384 return 0;
1385}
1386
1387static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1388{
1389 struct cbq_sched_data *q = qdisc_priv(sch);
4b3550ef 1390 struct nlattr *nest;
1da177e4 1391
4b3550ef
PM
1392 nest = nla_nest_start(skb, TCA_OPTIONS);
1393 if (nest == NULL)
1394 goto nla_put_failure;
1da177e4 1395 if (cbq_dump_attr(skb, &q->link) < 0)
1e90474c 1396 goto nla_put_failure;
d59b7d80 1397 return nla_nest_end(skb, nest);
1da177e4 1398
1e90474c 1399nla_put_failure:
4b3550ef 1400 nla_nest_cancel(skb, nest);
1da177e4
LT
1401 return -1;
1402}
1403
1404static int
1405cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1406{
1407 struct cbq_sched_data *q = qdisc_priv(sch);
1408
1409 q->link.xstats.avgidle = q->link.avgidle;
1410 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1411}
1412
1413static int
1414cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1415 struct sk_buff *skb, struct tcmsg *tcm)
1416{
cc7ec456 1417 struct cbq_class *cl = (struct cbq_class *)arg;
4b3550ef 1418 struct nlattr *nest;
1da177e4
LT
1419
1420 if (cl->tparent)
d77fea2e 1421 tcm->tcm_parent = cl->tparent->common.classid;
1da177e4
LT
1422 else
1423 tcm->tcm_parent = TC_H_ROOT;
d77fea2e 1424 tcm->tcm_handle = cl->common.classid;
1da177e4
LT
1425 tcm->tcm_info = cl->q->handle;
1426
4b3550ef
PM
1427 nest = nla_nest_start(skb, TCA_OPTIONS);
1428 if (nest == NULL)
1429 goto nla_put_failure;
1da177e4 1430 if (cbq_dump_attr(skb, cl) < 0)
1e90474c 1431 goto nla_put_failure;
d59b7d80 1432 return nla_nest_end(skb, nest);
1da177e4 1433
1e90474c 1434nla_put_failure:
4b3550ef 1435 nla_nest_cancel(skb, nest);
1da177e4
LT
1436 return -1;
1437}
1438
1439static int
1440cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1441 struct gnet_dump *d)
1442{
1443 struct cbq_sched_data *q = qdisc_priv(sch);
cc7ec456 1444 struct cbq_class *cl = (struct cbq_class *)arg;
1da177e4 1445
1da177e4
LT
1446 cl->xstats.avgidle = cl->avgidle;
1447 cl->xstats.undertime = 0;
1448
a084980d 1449 if (cl->undertime != PSCHED_PASTPERFECT)
8edc0c31 1450 cl->xstats.undertime = cl->undertime - q->now;
1da177e4 1451
edb09eb1
ED
1452 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
1453 d, NULL, &cl->bstats) < 0 ||
d250a5f9 1454 gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
b0ab6f92 1455 gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0)
1da177e4
LT
1456 return -1;
1457
1458 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1459}
1460
1461static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1462 struct Qdisc **old)
1463{
cc7ec456 1464 struct cbq_class *cl = (struct cbq_class *)arg;
1da177e4 1465
5b9a9ccf 1466 if (new == NULL) {
3511c913 1467 new = qdisc_create_dflt(sch->dev_queue,
5b9a9ccf
PM
1468 &pfifo_qdisc_ops, cl->common.classid);
1469 if (new == NULL)
1470 return -ENOBUFS;
1471 } else {
c3bc7cff 1472#ifdef CONFIG_NET_CLS_ACT
5b9a9ccf
PM
1473 if (cl->police == TC_POLICE_RECLASSIFY)
1474 new->reshape_fail = cbq_reshape_fail;
1da177e4 1475#endif
1da177e4 1476 }
5b9a9ccf 1477
86a7996c 1478 *old = qdisc_replace(sch, new, &cl->q);
5b9a9ccf 1479 return 0;
1da177e4
LT
1480}
1481
cc7ec456 1482static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
1da177e4 1483{
cc7ec456 1484 struct cbq_class *cl = (struct cbq_class *)arg;
1da177e4 1485
5b9a9ccf 1486 return cl->q;
1da177e4
LT
1487}
1488
a37ef2e3
JP
1489static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1490{
1491 struct cbq_class *cl = (struct cbq_class *)arg;
1492
1493 if (cl->q->q.qlen == 0)
1494 cbq_deactivate_class(cl);
1495}
1496
1da177e4
LT
1497static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1498{
1499 struct cbq_sched_data *q = qdisc_priv(sch);
1500 struct cbq_class *cl = cbq_class_lookup(q, classid);
1501
1502 if (cl) {
1503 cl->refcnt++;
1504 return (unsigned long)cl;
1505 }
1506 return 0;
1507}
1508
1da177e4
LT
1509static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1510{
1511 struct cbq_sched_data *q = qdisc_priv(sch);
1512
547b792c 1513 WARN_ON(cl->filters);
1da177e4 1514
ff31ab56 1515 tcf_destroy_chain(&cl->filter_list);
1da177e4
LT
1516 qdisc_destroy(cl->q);
1517 qdisc_put_rtab(cl->R_tab);
1da177e4 1518 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1da177e4
LT
1519 if (cl != &q->link)
1520 kfree(cl);
1521}
1522
cc7ec456 1523static void cbq_destroy(struct Qdisc *sch)
1da177e4
LT
1524{
1525 struct cbq_sched_data *q = qdisc_priv(sch);
b67bfe0d 1526 struct hlist_node *next;
1da177e4 1527 struct cbq_class *cl;
cc7ec456 1528 unsigned int h;
1da177e4 1529
c3bc7cff 1530#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1531 q->rx_class = NULL;
1532#endif
1533 /*
1534 * Filters must be destroyed first because we don't destroy the
1535 * classes from root to leafs which means that filters can still
1536 * be bound to classes which have been destroyed already. --TGR '04
1537 */
d77fea2e 1538 for (h = 0; h < q->clhash.hashsize; h++) {
b67bfe0d 1539 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode)
ff31ab56 1540 tcf_destroy_chain(&cl->filter_list);
b00b4bf9 1541 }
d77fea2e 1542 for (h = 0; h < q->clhash.hashsize; h++) {
b67bfe0d 1543 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
d77fea2e 1544 common.hnode)
1da177e4 1545 cbq_destroy_class(sch, cl);
1da177e4 1546 }
d77fea2e 1547 qdisc_class_hash_destroy(&q->clhash);
1da177e4
LT
1548}
1549
1550static void cbq_put(struct Qdisc *sch, unsigned long arg)
1551{
cc7ec456 1552 struct cbq_class *cl = (struct cbq_class *)arg;
1da177e4
LT
1553
1554 if (--cl->refcnt == 0) {
c3bc7cff 1555#ifdef CONFIG_NET_CLS_ACT
102396ae 1556 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1da177e4
LT
1557 struct cbq_sched_data *q = qdisc_priv(sch);
1558
7698b4fc 1559 spin_lock_bh(root_lock);
1da177e4
LT
1560 if (q->rx_class == cl)
1561 q->rx_class = NULL;
7698b4fc 1562 spin_unlock_bh(root_lock);
1da177e4
LT
1563#endif
1564
1565 cbq_destroy_class(sch, cl);
1566 }
1567}
1568
1569static int
1e90474c 1570cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1da177e4
LT
1571 unsigned long *arg)
1572{
1573 int err;
1574 struct cbq_sched_data *q = qdisc_priv(sch);
cc7ec456 1575 struct cbq_class *cl = (struct cbq_class *)*arg;
1e90474c
PM
1576 struct nlattr *opt = tca[TCA_OPTIONS];
1577 struct nlattr *tb[TCA_CBQ_MAX + 1];
1da177e4
LT
1578 struct cbq_class *parent;
1579 struct qdisc_rate_table *rtab = NULL;
1580
cee63723 1581 if (opt == NULL)
1da177e4
LT
1582 return -EINVAL;
1583
27a3421e 1584 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
cee63723
PM
1585 if (err < 0)
1586 return err;
1587
c3498d34
FW
1588 if (tb[TCA_CBQ_OVL_STRATEGY])
1589 return -EOPNOTSUPP;
1590
1da177e4
LT
1591 if (cl) {
1592 /* Check parent */
1593 if (parentid) {
d77fea2e
PM
1594 if (cl->tparent &&
1595 cl->tparent->common.classid != parentid)
1da177e4
LT
1596 return -EINVAL;
1597 if (!cl->tparent && parentid != TC_H_ROOT)
1598 return -EINVAL;
1599 }
1600
1e90474c 1601 if (tb[TCA_CBQ_RATE]) {
71bcb09a
SH
1602 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1603 tb[TCA_CBQ_RTAB]);
1da177e4
LT
1604 if (rtab == NULL)
1605 return -EINVAL;
1606 }
1607
71bcb09a 1608 if (tca[TCA_RATE]) {
22e0f8b9
JF
1609 err = gen_replace_estimator(&cl->bstats, NULL,
1610 &cl->rate_est,
edb09eb1
ED
1611 NULL,
1612 qdisc_root_sleeping_running(sch),
71bcb09a
SH
1613 tca[TCA_RATE]);
1614 if (err) {
79c11f2e 1615 qdisc_put_rtab(rtab);
71bcb09a
SH
1616 return err;
1617 }
1618 }
1619
1da177e4
LT
1620 /* Change class parameters */
1621 sch_tree_lock(sch);
1622
1623 if (cl->next_alive != NULL)
1624 cbq_deactivate_class(cl);
1625
1626 if (rtab) {
b94c8afc
PM
1627 qdisc_put_rtab(cl->R_tab);
1628 cl->R_tab = rtab;
1da177e4
LT
1629 }
1630
1e90474c
PM
1631 if (tb[TCA_CBQ_LSSOPT])
1632 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1da177e4 1633
1e90474c 1634 if (tb[TCA_CBQ_WRROPT]) {
1da177e4 1635 cbq_rmprio(q, cl);
1e90474c 1636 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1da177e4
LT
1637 }
1638
c3bc7cff 1639#ifdef CONFIG_NET_CLS_ACT
1e90474c
PM
1640 if (tb[TCA_CBQ_POLICE])
1641 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1da177e4
LT
1642#endif
1643
1e90474c
PM
1644 if (tb[TCA_CBQ_FOPT])
1645 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1da177e4
LT
1646
1647 if (cl->q->q.qlen)
1648 cbq_activate_class(cl);
1649
1650 sch_tree_unlock(sch);
1651
1da177e4
LT
1652 return 0;
1653 }
1654
1655 if (parentid == TC_H_ROOT)
1656 return -EINVAL;
1657
1e90474c
PM
1658 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1659 tb[TCA_CBQ_LSSOPT] == NULL)
1da177e4
LT
1660 return -EINVAL;
1661
1e90474c 1662 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1da177e4
LT
1663 if (rtab == NULL)
1664 return -EINVAL;
1665
1666 if (classid) {
1667 err = -EINVAL;
cc7ec456
ED
1668 if (TC_H_MAJ(classid ^ sch->handle) ||
1669 cbq_class_lookup(q, classid))
1da177e4
LT
1670 goto failure;
1671 } else {
1672 int i;
cc7ec456 1673 classid = TC_H_MAKE(sch->handle, 0x8000);
1da177e4 1674
cc7ec456 1675 for (i = 0; i < 0x8000; i++) {
1da177e4
LT
1676 if (++q->hgenerator >= 0x8000)
1677 q->hgenerator = 1;
1678 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1679 break;
1680 }
1681 err = -ENOSR;
1682 if (i >= 0x8000)
1683 goto failure;
1684 classid = classid|q->hgenerator;
1685 }
1686
1687 parent = &q->link;
1688 if (parentid) {
1689 parent = cbq_class_lookup(q, parentid);
1690 err = -EINVAL;
1691 if (parent == NULL)
1692 goto failure;
1693 }
1694
1695 err = -ENOBUFS;
0da974f4 1696 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1da177e4
LT
1697 if (cl == NULL)
1698 goto failure;
71bcb09a
SH
1699
1700 if (tca[TCA_RATE]) {
22e0f8b9 1701 err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
edb09eb1
ED
1702 NULL,
1703 qdisc_root_sleeping_running(sch),
71bcb09a
SH
1704 tca[TCA_RATE]);
1705 if (err) {
1706 kfree(cl);
1707 goto failure;
1708 }
1709 }
1710
1da177e4
LT
1711 cl->R_tab = rtab;
1712 rtab = NULL;
1713 cl->refcnt = 1;
3511c913
CG
1714 cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1715 if (!cl->q)
1da177e4 1716 cl->q = &noop_qdisc;
d77fea2e 1717 cl->common.classid = classid;
1da177e4
LT
1718 cl->tparent = parent;
1719 cl->qdisc = sch;
1720 cl->allot = parent->allot;
1721 cl->quantum = cl->allot;
1722 cl->weight = cl->R_tab->rate.rate;
1da177e4
LT
1723
1724 sch_tree_lock(sch);
1725 cbq_link_class(cl);
1726 cl->borrow = cl->tparent;
1727 if (cl->tparent != &q->link)
1728 cl->share = cl->tparent;
1729 cbq_adjust_levels(parent);
1730 cl->minidle = -0x7FFFFFFF;
1e90474c
PM
1731 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1732 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
cc7ec456 1733 if (cl->ewma_log == 0)
1da177e4 1734 cl->ewma_log = q->link.ewma_log;
cc7ec456 1735 if (cl->maxidle == 0)
1da177e4 1736 cl->maxidle = q->link.maxidle;
cc7ec456 1737 if (cl->avpkt == 0)
1da177e4 1738 cl->avpkt = q->link.avpkt;
c3bc7cff 1739#ifdef CONFIG_NET_CLS_ACT
1e90474c
PM
1740 if (tb[TCA_CBQ_POLICE])
1741 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1da177e4 1742#endif
1e90474c
PM
1743 if (tb[TCA_CBQ_FOPT])
1744 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1da177e4
LT
1745 sch_tree_unlock(sch);
1746
d77fea2e
PM
1747 qdisc_class_hash_grow(sch, &q->clhash);
1748
1da177e4
LT
1749 *arg = (unsigned long)cl;
1750 return 0;
1751
1752failure:
1753 qdisc_put_rtab(rtab);
1754 return err;
1755}
1756
1757static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1758{
1759 struct cbq_sched_data *q = qdisc_priv(sch);
cc7ec456 1760 struct cbq_class *cl = (struct cbq_class *)arg;
2ccccf5f 1761 unsigned int qlen, backlog;
1da177e4
LT
1762
1763 if (cl->filters || cl->children || cl == &q->link)
1764 return -EBUSY;
1765
1766 sch_tree_lock(sch);
1767
a37ef2e3 1768 qlen = cl->q->q.qlen;
2ccccf5f 1769 backlog = cl->q->qstats.backlog;
a37ef2e3 1770 qdisc_reset(cl->q);
2ccccf5f 1771 qdisc_tree_reduce_backlog(cl->q, qlen, backlog);
a37ef2e3 1772
1da177e4
LT
1773 if (cl->next_alive)
1774 cbq_deactivate_class(cl);
1775
1776 if (q->tx_borrowed == cl)
1777 q->tx_borrowed = q->tx_class;
1778 if (q->tx_class == cl) {
1779 q->tx_class = NULL;
1780 q->tx_borrowed = NULL;
1781 }
c3bc7cff 1782#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1783 if (q->rx_class == cl)
1784 q->rx_class = NULL;
1785#endif
1786
1787 cbq_unlink_class(cl);
1788 cbq_adjust_levels(cl->tparent);
1789 cl->defmap = 0;
1790 cbq_sync_defmap(cl);
1791
1792 cbq_rmprio(q, cl);
1793 sch_tree_unlock(sch);
1794
7cd0a638
JP
1795 BUG_ON(--cl->refcnt == 0);
1796 /*
1797 * This shouldn't happen: we "hold" one cops->get() when called
1798 * from tc_ctl_tclass; the destroy method is done from cops->put().
1799 */
1da177e4
LT
1800
1801 return 0;
1802}
1803
25d8c0d5
JF
1804static struct tcf_proto __rcu **cbq_find_tcf(struct Qdisc *sch,
1805 unsigned long arg)
1da177e4
LT
1806{
1807 struct cbq_sched_data *q = qdisc_priv(sch);
1808 struct cbq_class *cl = (struct cbq_class *)arg;
1809
1810 if (cl == NULL)
1811 cl = &q->link;
1812
1813 return &cl->filter_list;
1814}
1815
1816static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1817 u32 classid)
1818{
1819 struct cbq_sched_data *q = qdisc_priv(sch);
cc7ec456 1820 struct cbq_class *p = (struct cbq_class *)parent;
1da177e4
LT
1821 struct cbq_class *cl = cbq_class_lookup(q, classid);
1822
1823 if (cl) {
1824 if (p && p->level <= cl->level)
1825 return 0;
1826 cl->filters++;
1827 return (unsigned long)cl;
1828 }
1829 return 0;
1830}
1831
1832static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
1833{
cc7ec456 1834 struct cbq_class *cl = (struct cbq_class *)arg;
1da177e4
LT
1835
1836 cl->filters--;
1837}
1838
1839static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1840{
1841 struct cbq_sched_data *q = qdisc_priv(sch);
d77fea2e 1842 struct cbq_class *cl;
cc7ec456 1843 unsigned int h;
1da177e4
LT
1844
1845 if (arg->stop)
1846 return;
1847
d77fea2e 1848 for (h = 0; h < q->clhash.hashsize; h++) {
b67bfe0d 1849 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
1da177e4
LT
1850 if (arg->count < arg->skip) {
1851 arg->count++;
1852 continue;
1853 }
1854 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1855 arg->stop = 1;
1856 return;
1857 }
1858 arg->count++;
1859 }
1860 }
1861}
1862
20fea08b 1863static const struct Qdisc_class_ops cbq_class_ops = {
1da177e4
LT
1864 .graft = cbq_graft,
1865 .leaf = cbq_leaf,
a37ef2e3 1866 .qlen_notify = cbq_qlen_notify,
1da177e4
LT
1867 .get = cbq_get,
1868 .put = cbq_put,
1869 .change = cbq_change_class,
1870 .delete = cbq_delete,
1871 .walk = cbq_walk,
1872 .tcf_chain = cbq_find_tcf,
1873 .bind_tcf = cbq_bind_filter,
1874 .unbind_tcf = cbq_unbind_filter,
1875 .dump = cbq_dump_class,
1876 .dump_stats = cbq_dump_class_stats,
1877};
1878
20fea08b 1879static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
1da177e4
LT
1880 .next = NULL,
1881 .cl_ops = &cbq_class_ops,
1882 .id = "cbq",
1883 .priv_size = sizeof(struct cbq_sched_data),
1884 .enqueue = cbq_enqueue,
1885 .dequeue = cbq_dequeue,
77be155c 1886 .peek = qdisc_peek_dequeued,
1da177e4
LT
1887 .drop = cbq_drop,
1888 .init = cbq_init,
1889 .reset = cbq_reset,
1890 .destroy = cbq_destroy,
1891 .change = NULL,
1892 .dump = cbq_dump,
1893 .dump_stats = cbq_dump_stats,
1894 .owner = THIS_MODULE,
1895};
1896
1897static int __init cbq_module_init(void)
1898{
1899 return register_qdisc(&cbq_qdisc_ops);
1900}
10297b99 1901static void __exit cbq_module_exit(void)
1da177e4
LT
1902{
1903 unregister_qdisc(&cbq_qdisc_ops);
1904}
1905module_init(cbq_module_init)
1906module_exit(cbq_module_exit)
1907MODULE_LICENSE("GPL");