]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/sched/sch_cbq.c
net: qdisc: use rcu prefix and silence sparse warnings
[mirror_ubuntu-jammy-kernel.git] / net / sched / sch_cbq.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 */
12
1da177e4 13#include <linux/module.h>
5a0e3ad6 14#include <linux/slab.h>
1da177e4
LT
15#include <linux/types.h>
16#include <linux/kernel.h>
1da177e4 17#include <linux/string.h>
1da177e4 18#include <linux/errno.h>
1da177e4 19#include <linux/skbuff.h>
0ba48053 20#include <net/netlink.h>
1da177e4
LT
21#include <net/pkt_sched.h>
22
23
24/* Class-Based Queueing (CBQ) algorithm.
25 =======================================
26
27 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
10297b99 28 Management Models for Packet Networks",
1da177e4
LT
29 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
30
10297b99 31 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
1da177e4 32
10297b99 33 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
1da177e4
LT
34 Parameters", 1996
35
36 [4] Sally Floyd and Michael Speer, "Experimental Results
37 for Class-Based Queueing", 1998, not published.
38
39 -----------------------------------------------------------------------
40
41 Algorithm skeleton was taken from NS simulator cbq.cc.
42 If someone wants to check this code against the LBL version,
43 he should take into account that ONLY the skeleton was borrowed,
44 the implementation is different. Particularly:
45
46 --- The WRR algorithm is different. Our version looks more
10297b99
YH
47 reasonable (I hope) and works when quanta are allowed to be
48 less than MTU, which is always the case when real time classes
49 have small rates. Note, that the statement of [3] is
50 incomplete, delay may actually be estimated even if class
51 per-round allotment is less than MTU. Namely, if per-round
52 allotment is W*r_i, and r_1+...+r_k = r < 1
1da177e4
LT
53
54 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
55
56 In the worst case we have IntServ estimate with D = W*r+k*MTU
57 and C = MTU*r. The proof (if correct at all) is trivial.
58
59
60 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
61 interpret some places, which look like wrong translations
62 from NS. Anyone is advised to find these differences
63 and explain to me, why I am wrong 8).
64
65 --- Linux has no EOI event, so that we cannot estimate true class
66 idle time. Workaround is to consider the next dequeue event
67 as sign that previous packet is finished. This is wrong because of
68 internal device queueing, but on a permanently loaded link it is true.
69 Moreover, combined with clock integrator, this scheme looks
70 very close to an ideal solution. */
71
72struct cbq_sched_data;
73
74
cc7ec456 75struct cbq_class {
d77fea2e 76 struct Qdisc_class_common common;
1da177e4
LT
77 struct cbq_class *next_alive; /* next class with backlog in this priority band */
78
79/* Parameters */
1da177e4
LT
80 unsigned char priority; /* class priority */
81 unsigned char priority2; /* priority to be used after overlimit */
82 unsigned char ewma_log; /* time constant for idle time calculation */
83 unsigned char ovl_strategy;
c3bc7cff 84#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
85 unsigned char police;
86#endif
87
88 u32 defmap;
89
90 /* Link-sharing scheduler parameters */
91 long maxidle; /* Class parameters: see below. */
92 long offtime;
93 long minidle;
94 u32 avpkt;
95 struct qdisc_rate_table *R_tab;
96
97 /* Overlimit strategy parameters */
98 void (*overlimit)(struct cbq_class *cl);
1a13cb63 99 psched_tdiff_t penalty;
1da177e4
LT
100
101 /* General scheduler (WRR) parameters */
102 long allot;
103 long quantum; /* Allotment per WRR round */
104 long weight; /* Relative allotment: see below */
105
106 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
107 struct cbq_class *split; /* Ptr to split node */
108 struct cbq_class *share; /* Ptr to LS parent in the class tree */
109 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
110 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
111 parent otherwise */
112 struct cbq_class *sibling; /* Sibling chain */
113 struct cbq_class *children; /* Pointer to children chain */
114
115 struct Qdisc *q; /* Elementary queueing discipline */
116
117
118/* Variables */
119 unsigned char cpriority; /* Effective priority */
120 unsigned char delayed;
121 unsigned char level; /* level of the class in hierarchy:
122 0 for leaf classes, and maximal
123 level of children + 1 for nodes.
124 */
125
126 psched_time_t last; /* Last end of service */
127 psched_time_t undertime;
128 long avgidle;
129 long deficit; /* Saved deficit for WRR */
1a13cb63 130 psched_time_t penalized;
c1a8f1f1 131 struct gnet_stats_basic_packed bstats;
1da177e4 132 struct gnet_stats_queue qstats;
45203a3b 133 struct gnet_stats_rate_est64 rate_est;
1da177e4
LT
134 struct tc_cbq_xstats xstats;
135
136 struct tcf_proto *filter_list;
137
138 int refcnt;
139 int filters;
140
cc7ec456 141 struct cbq_class *defaults[TC_PRIO_MAX + 1];
1da177e4
LT
142};
143
cc7ec456 144struct cbq_sched_data {
d77fea2e 145 struct Qdisc_class_hash clhash; /* Hash table of all classes */
cc7ec456
ED
146 int nclasses[TC_CBQ_MAXPRIO + 1];
147 unsigned int quanta[TC_CBQ_MAXPRIO + 1];
1da177e4
LT
148
149 struct cbq_class link;
150
cc7ec456
ED
151 unsigned int activemask;
152 struct cbq_class *active[TC_CBQ_MAXPRIO + 1]; /* List of all classes
1da177e4
LT
153 with backlog */
154
c3bc7cff 155#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
156 struct cbq_class *rx_class;
157#endif
158 struct cbq_class *tx_class;
159 struct cbq_class *tx_borrowed;
160 int tx_len;
161 psched_time_t now; /* Cached timestamp */
cc7ec456 162 unsigned int pmask;
1da177e4 163
2fbd3da3 164 struct hrtimer delay_timer;
88a99354 165 struct qdisc_watchdog watchdog; /* Watchdog timer,
1da177e4
LT
166 started when CBQ has
167 backlog, but cannot
168 transmit just now */
88a99354 169 psched_tdiff_t wd_expires;
1da177e4
LT
170 int toplevel;
171 u32 hgenerator;
172};
173
174
cc7ec456 175#define L2T(cl, len) qdisc_l2t((cl)->R_tab, len)
1da177e4 176
cc7ec456 177static inline struct cbq_class *
1da177e4
LT
178cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
179{
d77fea2e 180 struct Qdisc_class_common *clc;
1da177e4 181
d77fea2e
PM
182 clc = qdisc_class_find(&q->clhash, classid);
183 if (clc == NULL)
184 return NULL;
185 return container_of(clc, struct cbq_class, common);
1da177e4
LT
186}
187
c3bc7cff 188#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
189
190static struct cbq_class *
191cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
192{
cc7ec456 193 struct cbq_class *cl;
1da177e4 194
cc7ec456
ED
195 for (cl = this->tparent; cl; cl = cl->tparent) {
196 struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
1da177e4 197
cc7ec456
ED
198 if (new != NULL && new != this)
199 return new;
200 }
1da177e4
LT
201 return NULL;
202}
203
204#endif
205
206/* Classify packet. The procedure is pretty complicated, but
cc7ec456
ED
207 * it allows us to combine link sharing and priority scheduling
208 * transparently.
209 *
210 * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
211 * so that it resolves to split nodes. Then packets are classified
212 * by logical priority, or a more specific classifier may be attached
213 * to the split node.
1da177e4
LT
214 */
215
216static struct cbq_class *
217cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
218{
219 struct cbq_sched_data *q = qdisc_priv(sch);
220 struct cbq_class *head = &q->link;
221 struct cbq_class **defmap;
222 struct cbq_class *cl = NULL;
223 u32 prio = skb->priority;
224 struct tcf_result res;
225
226 /*
227 * Step 1. If skb->priority points to one of our classes, use it.
228 */
cc7ec456 229 if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
1da177e4
LT
230 (cl = cbq_class_lookup(q, prio)) != NULL)
231 return cl;
232
c27f339a 233 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1da177e4
LT
234 for (;;) {
235 int result = 0;
236 defmap = head->defaults;
237
238 /*
239 * Step 2+n. Apply classifier.
240 */
73ca4918
PM
241 if (!head->filter_list ||
242 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
1da177e4
LT
243 goto fallback;
244
cc7ec456
ED
245 cl = (void *)res.class;
246 if (!cl) {
1da177e4
LT
247 if (TC_H_MAJ(res.classid))
248 cl = cbq_class_lookup(q, res.classid);
cc7ec456 249 else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
1da177e4
LT
250 cl = defmap[TC_PRIO_BESTEFFORT];
251
bdfc87f7 252 if (cl == NULL)
1da177e4
LT
253 goto fallback;
254 }
bdfc87f7
ED
255 if (cl->level >= head->level)
256 goto fallback;
1da177e4
LT
257#ifdef CONFIG_NET_CLS_ACT
258 switch (result) {
259 case TC_ACT_QUEUED:
10297b99 260 case TC_ACT_STOLEN:
378a2f09 261 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
1da177e4
LT
262 case TC_ACT_SHOT:
263 return NULL;
73ca4918
PM
264 case TC_ACT_RECLASSIFY:
265 return cbq_reclassify(skb, cl);
1da177e4 266 }
1da177e4
LT
267#endif
268 if (cl->level == 0)
269 return cl;
270
271 /*
272 * Step 3+n. If classifier selected a link sharing class,
273 * apply agency specific classifier.
274 * Repeat this procdure until we hit a leaf node.
275 */
276 head = cl;
277 }
278
279fallback:
280 cl = head;
281
282 /*
283 * Step 4. No success...
284 */
285 if (TC_H_MAJ(prio) == 0 &&
cc7ec456 286 !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
1da177e4
LT
287 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
288 return head;
289
290 return cl;
291}
292
293/*
cc7ec456
ED
294 * A packet has just been enqueued on the empty class.
295 * cbq_activate_class adds it to the tail of active class list
296 * of its priority band.
1da177e4
LT
297 */
298
cc7ec456 299static inline void cbq_activate_class(struct cbq_class *cl)
1da177e4
LT
300{
301 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
302 int prio = cl->cpriority;
303 struct cbq_class *cl_tail;
304
305 cl_tail = q->active[prio];
306 q->active[prio] = cl;
307
308 if (cl_tail != NULL) {
309 cl->next_alive = cl_tail->next_alive;
310 cl_tail->next_alive = cl;
311 } else {
312 cl->next_alive = cl;
313 q->activemask |= (1<<prio);
314 }
315}
316
317/*
cc7ec456
ED
318 * Unlink class from active chain.
319 * Note that this same procedure is done directly in cbq_dequeue*
320 * during round-robin procedure.
1da177e4
LT
321 */
322
323static void cbq_deactivate_class(struct cbq_class *this)
324{
325 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
326 int prio = this->cpriority;
327 struct cbq_class *cl;
328 struct cbq_class *cl_prev = q->active[prio];
329
330 do {
331 cl = cl_prev->next_alive;
332 if (cl == this) {
333 cl_prev->next_alive = cl->next_alive;
334 cl->next_alive = NULL;
335
336 if (cl == q->active[prio]) {
337 q->active[prio] = cl_prev;
338 if (cl == q->active[prio]) {
339 q->active[prio] = NULL;
340 q->activemask &= ~(1<<prio);
341 return;
342 }
343 }
1da177e4
LT
344 return;
345 }
346 } while ((cl_prev = cl) != q->active[prio]);
347}
348
349static void
350cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
351{
352 int toplevel = q->toplevel;
353
fd245a4a 354 if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
7201c1dd 355 psched_time_t now = psched_get_time();
1da177e4
LT
356
357 do {
104e0878 358 if (cl->undertime < now) {
1da177e4
LT
359 q->toplevel = cl->level;
360 return;
361 }
cc7ec456 362 } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
1da177e4
LT
363 }
364}
365
366static int
367cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
368{
369 struct cbq_sched_data *q = qdisc_priv(sch);
ddeee3ce 370 int uninitialized_var(ret);
1da177e4
LT
371 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
372
c3bc7cff 373#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
374 q->rx_class = cl;
375#endif
376 if (cl == NULL) {
c27f339a 377 if (ret & __NET_XMIT_BYPASS)
1da177e4
LT
378 sch->qstats.drops++;
379 kfree_skb(skb);
380 return ret;
381 }
382
c3bc7cff 383#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
384 cl->q->__parent = sch;
385#endif
5f86173b
JK
386 ret = qdisc_enqueue(skb, cl->q);
387 if (ret == NET_XMIT_SUCCESS) {
1da177e4 388 sch->q.qlen++;
1da177e4
LT
389 cbq_mark_toplevel(q, cl);
390 if (!cl->next_alive)
391 cbq_activate_class(cl);
392 return ret;
393 }
394
378a2f09
JP
395 if (net_xmit_drop_count(ret)) {
396 sch->qstats.drops++;
397 cbq_mark_toplevel(q, cl);
398 cl->qstats.drops++;
399 }
1da177e4
LT
400 return ret;
401}
402
1da177e4
LT
403/* Overlimit actions */
404
405/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
406
407static void cbq_ovl_classic(struct cbq_class *cl)
408{
409 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
8edc0c31 410 psched_tdiff_t delay = cl->undertime - q->now;
1da177e4
LT
411
412 if (!cl->delayed) {
413 delay += cl->offtime;
414
10297b99 415 /*
cc7ec456
ED
416 * Class goes to sleep, so that it will have no
417 * chance to work avgidle. Let's forgive it 8)
418 *
419 * BTW cbq-2.0 has a crap in this
420 * place, apparently they forgot to shift it by cl->ewma_log.
1da177e4
LT
421 */
422 if (cl->avgidle < 0)
423 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
424 if (cl->avgidle < cl->minidle)
425 cl->avgidle = cl->minidle;
426 if (delay <= 0)
427 delay = 1;
7c59e25f 428 cl->undertime = q->now + delay;
1da177e4
LT
429
430 cl->xstats.overactions++;
431 cl->delayed = 1;
432 }
433 if (q->wd_expires == 0 || q->wd_expires > delay)
434 q->wd_expires = delay;
435
436 /* Dirty work! We must schedule wakeups based on
cc7ec456
ED
437 * real available rate, rather than leaf rate,
438 * which may be tiny (even zero).
1da177e4
LT
439 */
440 if (q->toplevel == TC_CBQ_MAXLEVEL) {
441 struct cbq_class *b;
442 psched_tdiff_t base_delay = q->wd_expires;
443
444 for (b = cl->borrow; b; b = b->borrow) {
8edc0c31 445 delay = b->undertime - q->now;
1da177e4
LT
446 if (delay < base_delay) {
447 if (delay <= 0)
448 delay = 1;
449 base_delay = delay;
450 }
451 }
452
453 q->wd_expires = base_delay;
454 }
455}
456
457/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
cc7ec456 458 * they go overlimit
1da177e4
LT
459 */
460
461static void cbq_ovl_rclassic(struct cbq_class *cl)
462{
463 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
464 struct cbq_class *this = cl;
465
466 do {
467 if (cl->level > q->toplevel) {
468 cl = NULL;
469 break;
470 }
471 } while ((cl = cl->borrow) != NULL);
472
473 if (cl == NULL)
474 cl = this;
475 cbq_ovl_classic(cl);
476}
477
478/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
479
480static void cbq_ovl_delay(struct cbq_class *cl)
481{
482 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
8edc0c31 483 psched_tdiff_t delay = cl->undertime - q->now;
1da177e4 484
2540e051
JP
485 if (test_bit(__QDISC_STATE_DEACTIVATED,
486 &qdisc_root_sleeping(cl->qdisc)->state))
487 return;
488
1da177e4 489 if (!cl->delayed) {
1a13cb63
PM
490 psched_time_t sched = q->now;
491 ktime_t expires;
1da177e4
LT
492
493 delay += cl->offtime;
494 if (cl->avgidle < 0)
495 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
496 if (cl->avgidle < cl->minidle)
497 cl->avgidle = cl->minidle;
7c59e25f 498 cl->undertime = q->now + delay;
1da177e4
LT
499
500 if (delay > 0) {
1a13cb63 501 sched += delay + cl->penalty;
1da177e4
LT
502 cl->penalized = sched;
503 cl->cpriority = TC_CBQ_MAXPRIO;
504 q->pmask |= (1<<TC_CBQ_MAXPRIO);
1a13cb63 505
46baac38 506 expires = ns_to_ktime(PSCHED_TICKS2NS(sched));
2fbd3da3
DM
507 if (hrtimer_try_to_cancel(&q->delay_timer) &&
508 ktime_to_ns(ktime_sub(
509 hrtimer_get_expires(&q->delay_timer),
510 expires)) > 0)
511 hrtimer_set_expires(&q->delay_timer, expires);
512 hrtimer_restart(&q->delay_timer);
1da177e4
LT
513 cl->delayed = 1;
514 cl->xstats.overactions++;
515 return;
516 }
517 delay = 1;
518 }
519 if (q->wd_expires == 0 || q->wd_expires > delay)
520 q->wd_expires = delay;
521}
522
523/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
524
525static void cbq_ovl_lowprio(struct cbq_class *cl)
526{
527 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
528
1a13cb63 529 cl->penalized = q->now + cl->penalty;
1da177e4
LT
530
531 if (cl->cpriority != cl->priority2) {
532 cl->cpriority = cl->priority2;
533 q->pmask |= (1<<cl->cpriority);
534 cl->xstats.overactions++;
535 }
536 cbq_ovl_classic(cl);
537}
538
539/* TC_CBQ_OVL_DROP: penalize class by dropping */
540
541static void cbq_ovl_drop(struct cbq_class *cl)
542{
543 if (cl->q->ops->drop)
544 if (cl->q->ops->drop(cl->q))
545 cl->qdisc->q.qlen--;
546 cl->xstats.overactions++;
547 cbq_ovl_classic(cl);
548}
549
1a13cb63
PM
550static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
551 psched_time_t now)
1da177e4
LT
552{
553 struct cbq_class *cl;
554 struct cbq_class *cl_prev = q->active[prio];
1a13cb63 555 psched_time_t sched = now;
1da177e4
LT
556
557 if (cl_prev == NULL)
e9054a33 558 return 0;
1da177e4
LT
559
560 do {
561 cl = cl_prev->next_alive;
1a13cb63 562 if (now - cl->penalized > 0) {
1da177e4
LT
563 cl_prev->next_alive = cl->next_alive;
564 cl->next_alive = NULL;
565 cl->cpriority = cl->priority;
566 cl->delayed = 0;
567 cbq_activate_class(cl);
568
569 if (cl == q->active[prio]) {
570 q->active[prio] = cl_prev;
571 if (cl == q->active[prio]) {
572 q->active[prio] = NULL;
573 return 0;
574 }
575 }
576
577 cl = cl_prev->next_alive;
1a13cb63 578 } else if (sched - cl->penalized > 0)
1da177e4
LT
579 sched = cl->penalized;
580 } while ((cl_prev = cl) != q->active[prio]);
581
1a13cb63 582 return sched - now;
1da177e4
LT
583}
584
1a13cb63 585static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
1da177e4 586{
1a13cb63 587 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
2fbd3da3 588 delay_timer);
1a13cb63
PM
589 struct Qdisc *sch = q->watchdog.qdisc;
590 psched_time_t now;
591 psched_tdiff_t delay = 0;
cc7ec456 592 unsigned int pmask;
1da177e4 593
3bebcda2 594 now = psched_get_time();
1a13cb63 595
1da177e4
LT
596 pmask = q->pmask;
597 q->pmask = 0;
598
599 while (pmask) {
600 int prio = ffz(~pmask);
1a13cb63 601 psched_tdiff_t tmp;
1da177e4
LT
602
603 pmask &= ~(1<<prio);
604
1a13cb63 605 tmp = cbq_undelay_prio(q, prio, now);
1da177e4
LT
606 if (tmp > 0) {
607 q->pmask |= 1<<prio;
608 if (tmp < delay || delay == 0)
609 delay = tmp;
610 }
611 }
612
613 if (delay) {
1a13cb63
PM
614 ktime_t time;
615
616 time = ktime_set(0, 0);
ca44d6e6 617 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
2fbd3da3 618 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
1da177e4
LT
619 }
620
fd245a4a 621 qdisc_unthrottled(sch);
8608db03 622 __netif_schedule(qdisc_root(sch));
1a13cb63 623 return HRTIMER_NORESTART;
1da177e4
LT
624}
625
c3bc7cff 626#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
627static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
628{
1da177e4
LT
629 struct Qdisc *sch = child->__parent;
630 struct cbq_sched_data *q = qdisc_priv(sch);
631 struct cbq_class *cl = q->rx_class;
632
633 q->rx_class = NULL;
634
635 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
378a2f09 636 int ret;
1da177e4
LT
637
638 cbq_mark_toplevel(q, cl);
639
640 q->rx_class = cl;
641 cl->q->__parent = sch;
642
378a2f09
JP
643 ret = qdisc_enqueue(skb, cl->q);
644 if (ret == NET_XMIT_SUCCESS) {
1da177e4 645 sch->q.qlen++;
1da177e4
LT
646 if (!cl->next_alive)
647 cbq_activate_class(cl);
648 return 0;
649 }
378a2f09
JP
650 if (net_xmit_drop_count(ret))
651 sch->qstats.drops++;
1da177e4
LT
652 return 0;
653 }
654
655 sch->qstats.drops++;
656 return -1;
657}
658#endif
659
10297b99 660/*
cc7ec456
ED
661 * It is mission critical procedure.
662 *
663 * We "regenerate" toplevel cutoff, if transmitting class
664 * has backlog and it is not regulated. It is not part of
665 * original CBQ description, but looks more reasonable.
666 * Probably, it is wrong. This question needs further investigation.
667 */
1da177e4 668
cc7ec456 669static inline void
1da177e4
LT
670cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
671 struct cbq_class *borrowed)
672{
673 if (cl && q->toplevel >= borrowed->level) {
674 if (cl->q->q.qlen > 1) {
675 do {
a084980d 676 if (borrowed->undertime == PSCHED_PASTPERFECT) {
1da177e4
LT
677 q->toplevel = borrowed->level;
678 return;
679 }
cc7ec456 680 } while ((borrowed = borrowed->borrow) != NULL);
1da177e4 681 }
10297b99 682#if 0
1da177e4
LT
683 /* It is not necessary now. Uncommenting it
684 will save CPU cycles, but decrease fairness.
685 */
686 q->toplevel = TC_CBQ_MAXLEVEL;
687#endif
688 }
689}
690
691static void
692cbq_update(struct cbq_sched_data *q)
693{
694 struct cbq_class *this = q->tx_class;
695 struct cbq_class *cl = this;
696 int len = q->tx_len;
73d0f37a 697 psched_time_t now;
1da177e4
LT
698
699 q->tx_class = NULL;
73d0f37a
VA
700 /* Time integrator. We calculate EOS time
701 * by adding expected packet transmission time.
702 */
703 now = q->now + L2T(&q->link, len);
1da177e4
LT
704
705 for ( ; cl; cl = cl->share) {
706 long avgidle = cl->avgidle;
707 long idle;
708
709 cl->bstats.packets++;
710 cl->bstats.bytes += len;
711
712 /*
cc7ec456
ED
713 * (now - last) is total time between packet right edges.
714 * (last_pktlen/rate) is "virtual" busy time, so that
715 *
716 * idle = (now - last) - last_pktlen/rate
1da177e4
LT
717 */
718
73d0f37a 719 idle = now - cl->last;
1da177e4
LT
720 if ((unsigned long)idle > 128*1024*1024) {
721 avgidle = cl->maxidle;
722 } else {
723 idle -= L2T(cl, len);
724
725 /* true_avgidle := (1-W)*true_avgidle + W*idle,
cc7ec456
ED
726 * where W=2^{-ewma_log}. But cl->avgidle is scaled:
727 * cl->avgidle == true_avgidle/W,
728 * hence:
1da177e4
LT
729 */
730 avgidle += idle - (avgidle>>cl->ewma_log);
731 }
732
733 if (avgidle <= 0) {
734 /* Overlimit or at-limit */
735
736 if (avgidle < cl->minidle)
737 avgidle = cl->minidle;
738
739 cl->avgidle = avgidle;
740
741 /* Calculate expected time, when this class
cc7ec456
ED
742 * will be allowed to send.
743 * It will occur, when:
744 * (1-W)*true_avgidle + W*delay = 0, i.e.
745 * idle = (1/W - 1)*(-true_avgidle)
746 * or
747 * idle = (1 - W)*(-cl->avgidle);
1da177e4
LT
748 */
749 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
750
751 /*
cc7ec456
ED
752 * That is not all.
753 * To maintain the rate allocated to the class,
754 * we add to undertime virtual clock,
755 * necessary to complete transmitted packet.
756 * (len/phys_bandwidth has been already passed
757 * to the moment of cbq_update)
1da177e4
LT
758 */
759
760 idle -= L2T(&q->link, len);
761 idle += L2T(cl, len);
762
73d0f37a 763 cl->undertime = now + idle;
1da177e4
LT
764 } else {
765 /* Underlimit */
766
a084980d 767 cl->undertime = PSCHED_PASTPERFECT;
1da177e4
LT
768 if (avgidle > cl->maxidle)
769 cl->avgidle = cl->maxidle;
770 else
771 cl->avgidle = avgidle;
772 }
73d0f37a
VA
773 if ((s64)(now - cl->last) > 0)
774 cl->last = now;
1da177e4
LT
775 }
776
777 cbq_update_toplevel(q, this, q->tx_borrowed);
778}
779
cc7ec456 780static inline struct cbq_class *
1da177e4
LT
781cbq_under_limit(struct cbq_class *cl)
782{
783 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
784 struct cbq_class *this_cl = cl;
785
786 if (cl->tparent == NULL)
787 return cl;
788
a084980d 789 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
1da177e4
LT
790 cl->delayed = 0;
791 return cl;
792 }
793
794 do {
795 /* It is very suspicious place. Now overlimit
cc7ec456
ED
796 * action is generated for not bounded classes
797 * only if link is completely congested.
798 * Though it is in agree with ancestor-only paradigm,
799 * it looks very stupid. Particularly,
800 * it means that this chunk of code will either
801 * never be called or result in strong amplification
802 * of burstiness. Dangerous, silly, and, however,
803 * no another solution exists.
1da177e4 804 */
cc7ec456
ED
805 cl = cl->borrow;
806 if (!cl) {
1da177e4
LT
807 this_cl->qstats.overlimits++;
808 this_cl->overlimit(this_cl);
809 return NULL;
810 }
811 if (cl->level > q->toplevel)
812 return NULL;
a084980d 813 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
1da177e4
LT
814
815 cl->delayed = 0;
816 return cl;
817}
818
cc7ec456 819static inline struct sk_buff *
1da177e4
LT
820cbq_dequeue_prio(struct Qdisc *sch, int prio)
821{
822 struct cbq_sched_data *q = qdisc_priv(sch);
823 struct cbq_class *cl_tail, *cl_prev, *cl;
824 struct sk_buff *skb;
825 int deficit;
826
827 cl_tail = cl_prev = q->active[prio];
828 cl = cl_prev->next_alive;
829
830 do {
831 deficit = 0;
832
833 /* Start round */
834 do {
835 struct cbq_class *borrow = cl;
836
837 if (cl->q->q.qlen &&
838 (borrow = cbq_under_limit(cl)) == NULL)
839 goto skip_class;
840
841 if (cl->deficit <= 0) {
842 /* Class exhausted its allotment per
cc7ec456 843 * this round. Switch to the next one.
1da177e4
LT
844 */
845 deficit = 1;
846 cl->deficit += cl->quantum;
847 goto next_class;
848 }
849
850 skb = cl->q->dequeue(cl->q);
851
852 /* Class did not give us any skb :-(
cc7ec456
ED
853 * It could occur even if cl->q->q.qlen != 0
854 * f.e. if cl->q == "tbf"
1da177e4
LT
855 */
856 if (skb == NULL)
857 goto skip_class;
858
0abf77e5 859 cl->deficit -= qdisc_pkt_len(skb);
1da177e4
LT
860 q->tx_class = cl;
861 q->tx_borrowed = borrow;
862 if (borrow != cl) {
863#ifndef CBQ_XSTATS_BORROWS_BYTES
864 borrow->xstats.borrows++;
865 cl->xstats.borrows++;
866#else
0abf77e5
JK
867 borrow->xstats.borrows += qdisc_pkt_len(skb);
868 cl->xstats.borrows += qdisc_pkt_len(skb);
1da177e4
LT
869#endif
870 }
0abf77e5 871 q->tx_len = qdisc_pkt_len(skb);
1da177e4
LT
872
873 if (cl->deficit <= 0) {
874 q->active[prio] = cl;
875 cl = cl->next_alive;
876 cl->deficit += cl->quantum;
877 }
878 return skb;
879
880skip_class:
881 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
882 /* Class is empty or penalized.
cc7ec456 883 * Unlink it from active chain.
1da177e4
LT
884 */
885 cl_prev->next_alive = cl->next_alive;
886 cl->next_alive = NULL;
887
888 /* Did cl_tail point to it? */
889 if (cl == cl_tail) {
890 /* Repair it! */
891 cl_tail = cl_prev;
892
893 /* Was it the last class in this band? */
894 if (cl == cl_tail) {
895 /* Kill the band! */
896 q->active[prio] = NULL;
897 q->activemask &= ~(1<<prio);
898 if (cl->q->q.qlen)
899 cbq_activate_class(cl);
900 return NULL;
901 }
902
903 q->active[prio] = cl_tail;
904 }
905 if (cl->q->q.qlen)
906 cbq_activate_class(cl);
907
908 cl = cl_prev;
909 }
910
911next_class:
912 cl_prev = cl;
913 cl = cl->next_alive;
914 } while (cl_prev != cl_tail);
915 } while (deficit);
916
917 q->active[prio] = cl_prev;
918
919 return NULL;
920}
921
cc7ec456 922static inline struct sk_buff *
1da177e4
LT
923cbq_dequeue_1(struct Qdisc *sch)
924{
925 struct cbq_sched_data *q = qdisc_priv(sch);
926 struct sk_buff *skb;
cc7ec456 927 unsigned int activemask;
1da177e4 928
cc7ec456 929 activemask = q->activemask & 0xFF;
1da177e4
LT
930 while (activemask) {
931 int prio = ffz(~activemask);
932 activemask &= ~(1<<prio);
933 skb = cbq_dequeue_prio(sch, prio);
934 if (skb)
935 return skb;
936 }
937 return NULL;
938}
939
940static struct sk_buff *
941cbq_dequeue(struct Qdisc *sch)
942{
943 struct sk_buff *skb;
944 struct cbq_sched_data *q = qdisc_priv(sch);
945 psched_time_t now;
1da177e4 946
3bebcda2 947 now = psched_get_time();
73d0f37a
VA
948
949 if (q->tx_class)
1da177e4 950 cbq_update(q);
73d0f37a
VA
951
952 q->now = now;
1da177e4
LT
953
954 for (;;) {
955 q->wd_expires = 0;
956
957 skb = cbq_dequeue_1(sch);
958 if (skb) {
9190b3b3 959 qdisc_bstats_update(sch, skb);
1da177e4 960 sch->q.qlen--;
fd245a4a 961 qdisc_unthrottled(sch);
1da177e4
LT
962 return skb;
963 }
964
965 /* All the classes are overlimit.
cc7ec456
ED
966 *
967 * It is possible, if:
968 *
969 * 1. Scheduler is empty.
970 * 2. Toplevel cutoff inhibited borrowing.
971 * 3. Root class is overlimit.
972 *
973 * Reset 2d and 3d conditions and retry.
974 *
975 * Note, that NS and cbq-2.0 are buggy, peeking
976 * an arbitrary class is appropriate for ancestor-only
977 * sharing, but not for toplevel algorithm.
978 *
979 * Our version is better, but slower, because it requires
980 * two passes, but it is unavoidable with top-level sharing.
981 */
1da177e4
LT
982
983 if (q->toplevel == TC_CBQ_MAXLEVEL &&
a084980d 984 q->link.undertime == PSCHED_PASTPERFECT)
1da177e4
LT
985 break;
986
987 q->toplevel = TC_CBQ_MAXLEVEL;
a084980d 988 q->link.undertime = PSCHED_PASTPERFECT;
1da177e4
LT
989 }
990
991 /* No packets in scheduler or nobody wants to give them to us :-(
cc7ec456
ED
992 * Sigh... start watchdog timer in the last case.
993 */
1da177e4
LT
994
995 if (sch->q.qlen) {
996 sch->qstats.overlimits++;
88a99354
PM
997 if (q->wd_expires)
998 qdisc_watchdog_schedule(&q->watchdog,
bb239acf 999 now + q->wd_expires);
1da177e4
LT
1000 }
1001 return NULL;
1002}
1003
1004/* CBQ class maintanance routines */
1005
1006static void cbq_adjust_levels(struct cbq_class *this)
1007{
1008 if (this == NULL)
1009 return;
1010
1011 do {
1012 int level = 0;
1013 struct cbq_class *cl;
1014
cc7ec456
ED
1015 cl = this->children;
1016 if (cl) {
1da177e4
LT
1017 do {
1018 if (cl->level > level)
1019 level = cl->level;
1020 } while ((cl = cl->sibling) != this->children);
1021 }
cc7ec456 1022 this->level = level + 1;
1da177e4
LT
1023 } while ((this = this->tparent) != NULL);
1024}
1025
1026static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1027{
1028 struct cbq_class *cl;
d77fea2e 1029 unsigned int h;
1da177e4
LT
1030
1031 if (q->quanta[prio] == 0)
1032 return;
1033
d77fea2e 1034 for (h = 0; h < q->clhash.hashsize; h++) {
b67bfe0d 1035 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
1da177e4 1036 /* BUGGGG... Beware! This expression suffer of
cc7ec456 1037 * arithmetic overflows!
1da177e4
LT
1038 */
1039 if (cl->priority == prio) {
1040 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1041 q->quanta[prio];
1042 }
833fa743
YY
1043 if (cl->quantum <= 0 ||
1044 cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) {
c17988a9
YY
1045 pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n",
1046 cl->common.classid, cl->quantum);
5ce2d488 1047 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1da177e4
LT
1048 }
1049 }
1050 }
1051}
1052
1053static void cbq_sync_defmap(struct cbq_class *cl)
1054{
1055 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1056 struct cbq_class *split = cl->split;
cc7ec456 1057 unsigned int h;
1da177e4
LT
1058 int i;
1059
1060 if (split == NULL)
1061 return;
1062
cc7ec456
ED
1063 for (i = 0; i <= TC_PRIO_MAX; i++) {
1064 if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
1da177e4
LT
1065 split->defaults[i] = NULL;
1066 }
1067
cc7ec456 1068 for (i = 0; i <= TC_PRIO_MAX; i++) {
1da177e4
LT
1069 int level = split->level;
1070
1071 if (split->defaults[i])
1072 continue;
1073
d77fea2e 1074 for (h = 0; h < q->clhash.hashsize; h++) {
1da177e4
LT
1075 struct cbq_class *c;
1076
b67bfe0d 1077 hlist_for_each_entry(c, &q->clhash.hash[h],
d77fea2e 1078 common.hnode) {
1da177e4 1079 if (c->split == split && c->level < level &&
cc7ec456 1080 c->defmap & (1<<i)) {
1da177e4
LT
1081 split->defaults[i] = c;
1082 level = c->level;
1083 }
1084 }
1085 }
1086 }
1087}
1088
1089static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1090{
1091 struct cbq_class *split = NULL;
1092
1093 if (splitid == 0) {
cc7ec456
ED
1094 split = cl->split;
1095 if (!split)
1da177e4 1096 return;
d77fea2e 1097 splitid = split->common.classid;
1da177e4
LT
1098 }
1099
d77fea2e 1100 if (split == NULL || split->common.classid != splitid) {
1da177e4 1101 for (split = cl->tparent; split; split = split->tparent)
d77fea2e 1102 if (split->common.classid == splitid)
1da177e4
LT
1103 break;
1104 }
1105
1106 if (split == NULL)
1107 return;
1108
1109 if (cl->split != split) {
1110 cl->defmap = 0;
1111 cbq_sync_defmap(cl);
1112 cl->split = split;
cc7ec456 1113 cl->defmap = def & mask;
1da177e4 1114 } else
cc7ec456 1115 cl->defmap = (cl->defmap & ~mask) | (def & mask);
1da177e4
LT
1116
1117 cbq_sync_defmap(cl);
1118}
1119
1120static void cbq_unlink_class(struct cbq_class *this)
1121{
1122 struct cbq_class *cl, **clp;
1123 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1124
d77fea2e 1125 qdisc_class_hash_remove(&q->clhash, &this->common);
1da177e4
LT
1126
1127 if (this->tparent) {
cc7ec456 1128 clp = &this->sibling;
1da177e4
LT
1129 cl = *clp;
1130 do {
1131 if (cl == this) {
1132 *clp = cl->sibling;
1133 break;
1134 }
1135 clp = &cl->sibling;
1136 } while ((cl = *clp) != this->sibling);
1137
1138 if (this->tparent->children == this) {
1139 this->tparent->children = this->sibling;
1140 if (this->sibling == this)
1141 this->tparent->children = NULL;
1142 }
1143 } else {
547b792c 1144 WARN_ON(this->sibling != this);
1da177e4
LT
1145 }
1146}
1147
1148static void cbq_link_class(struct cbq_class *this)
1149{
1150 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1da177e4
LT
1151 struct cbq_class *parent = this->tparent;
1152
1153 this->sibling = this;
d77fea2e 1154 qdisc_class_hash_insert(&q->clhash, &this->common);
1da177e4
LT
1155
1156 if (parent == NULL)
1157 return;
1158
1159 if (parent->children == NULL) {
1160 parent->children = this;
1161 } else {
1162 this->sibling = parent->children->sibling;
1163 parent->children->sibling = this;
1164 }
1165}
1166
cc7ec456 1167static unsigned int cbq_drop(struct Qdisc *sch)
1da177e4
LT
1168{
1169 struct cbq_sched_data *q = qdisc_priv(sch);
1170 struct cbq_class *cl, *cl_head;
1171 int prio;
1172 unsigned int len;
1173
1174 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
cc7ec456
ED
1175 cl_head = q->active[prio];
1176 if (!cl_head)
1da177e4
LT
1177 continue;
1178
1179 cl = cl_head;
1180 do {
1181 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1182 sch->q.qlen--;
a37ef2e3
JP
1183 if (!cl->q->q.qlen)
1184 cbq_deactivate_class(cl);
1da177e4
LT
1185 return len;
1186 }
1187 } while ((cl = cl->next_alive) != cl_head);
1188 }
1189 return 0;
1190}
1191
1192static void
cc7ec456 1193cbq_reset(struct Qdisc *sch)
1da177e4
LT
1194{
1195 struct cbq_sched_data *q = qdisc_priv(sch);
1196 struct cbq_class *cl;
1197 int prio;
cc7ec456 1198 unsigned int h;
1da177e4
LT
1199
1200 q->activemask = 0;
1201 q->pmask = 0;
1202 q->tx_class = NULL;
1203 q->tx_borrowed = NULL;
88a99354 1204 qdisc_watchdog_cancel(&q->watchdog);
2fbd3da3 1205 hrtimer_cancel(&q->delay_timer);
1da177e4 1206 q->toplevel = TC_CBQ_MAXLEVEL;
3bebcda2 1207 q->now = psched_get_time();
1da177e4
LT
1208
1209 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1210 q->active[prio] = NULL;
1211
d77fea2e 1212 for (h = 0; h < q->clhash.hashsize; h++) {
b67bfe0d 1213 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
1da177e4
LT
1214 qdisc_reset(cl->q);
1215
1216 cl->next_alive = NULL;
a084980d 1217 cl->undertime = PSCHED_PASTPERFECT;
1da177e4
LT
1218 cl->avgidle = cl->maxidle;
1219 cl->deficit = cl->quantum;
1220 cl->cpriority = cl->priority;
1221 }
1222 }
1223 sch->q.qlen = 0;
1224}
1225
1226
1227static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1228{
cc7ec456
ED
1229 if (lss->change & TCF_CBQ_LSS_FLAGS) {
1230 cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1231 cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1da177e4 1232 }
cc7ec456 1233 if (lss->change & TCF_CBQ_LSS_EWMA)
1da177e4 1234 cl->ewma_log = lss->ewma_log;
cc7ec456 1235 if (lss->change & TCF_CBQ_LSS_AVPKT)
1da177e4 1236 cl->avpkt = lss->avpkt;
cc7ec456 1237 if (lss->change & TCF_CBQ_LSS_MINIDLE)
1da177e4 1238 cl->minidle = -(long)lss->minidle;
cc7ec456 1239 if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
1da177e4
LT
1240 cl->maxidle = lss->maxidle;
1241 cl->avgidle = lss->maxidle;
1242 }
cc7ec456 1243 if (lss->change & TCF_CBQ_LSS_OFFTIME)
1da177e4
LT
1244 cl->offtime = lss->offtime;
1245 return 0;
1246}
1247
1248static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1249{
1250 q->nclasses[cl->priority]--;
1251 q->quanta[cl->priority] -= cl->weight;
1252 cbq_normalize_quanta(q, cl->priority);
1253}
1254
1255static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1256{
1257 q->nclasses[cl->priority]++;
1258 q->quanta[cl->priority] += cl->weight;
1259 cbq_normalize_quanta(q, cl->priority);
1260}
1261
1262static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1263{
1264 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1265
1266 if (wrr->allot)
1267 cl->allot = wrr->allot;
1268 if (wrr->weight)
1269 cl->weight = wrr->weight;
1270 if (wrr->priority) {
cc7ec456 1271 cl->priority = wrr->priority - 1;
1da177e4
LT
1272 cl->cpriority = cl->priority;
1273 if (cl->priority >= cl->priority2)
cc7ec456 1274 cl->priority2 = TC_CBQ_MAXPRIO - 1;
1da177e4
LT
1275 }
1276
1277 cbq_addprio(q, cl);
1278 return 0;
1279}
1280
1281static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1282{
1283 switch (ovl->strategy) {
1284 case TC_CBQ_OVL_CLASSIC:
1285 cl->overlimit = cbq_ovl_classic;
1286 break;
1287 case TC_CBQ_OVL_DELAY:
1288 cl->overlimit = cbq_ovl_delay;
1289 break;
1290 case TC_CBQ_OVL_LOWPRIO:
cc7ec456
ED
1291 if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
1292 ovl->priority2 - 1 <= cl->priority)
1da177e4 1293 return -EINVAL;
cc7ec456 1294 cl->priority2 = ovl->priority2 - 1;
1da177e4
LT
1295 cl->overlimit = cbq_ovl_lowprio;
1296 break;
1297 case TC_CBQ_OVL_DROP:
1298 cl->overlimit = cbq_ovl_drop;
1299 break;
1300 case TC_CBQ_OVL_RCLASSIC:
1301 cl->overlimit = cbq_ovl_rclassic;
1302 break;
1303 default:
1304 return -EINVAL;
1305 }
1a13cb63 1306 cl->penalty = ovl->penalty;
1da177e4
LT
1307 return 0;
1308}
1309
c3bc7cff 1310#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1311static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1312{
1313 cl->police = p->police;
1314
1315 if (cl->q->handle) {
1316 if (p->police == TC_POLICE_RECLASSIFY)
1317 cl->q->reshape_fail = cbq_reshape_fail;
1318 else
1319 cl->q->reshape_fail = NULL;
1320 }
1321 return 0;
1322}
1323#endif
1324
1325static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1326{
1327 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1328 return 0;
1329}
1330
27a3421e
PM
1331static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1332 [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
1333 [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
1334 [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
1335 [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
1336 [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
1337 [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1338 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
1339};
1340
1e90474c 1341static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
1342{
1343 struct cbq_sched_data *q = qdisc_priv(sch);
1e90474c 1344 struct nlattr *tb[TCA_CBQ_MAX + 1];
1da177e4 1345 struct tc_ratespec *r;
cee63723
PM
1346 int err;
1347
27a3421e 1348 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
cee63723
PM
1349 if (err < 0)
1350 return err;
1da177e4 1351
27a3421e 1352 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1da177e4
LT
1353 return -EINVAL;
1354
1e90474c 1355 r = nla_data(tb[TCA_CBQ_RATE]);
1da177e4 1356
1e90474c 1357 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1da177e4
LT
1358 return -EINVAL;
1359
d77fea2e
PM
1360 err = qdisc_class_hash_init(&q->clhash);
1361 if (err < 0)
1362 goto put_rtab;
1363
1da177e4
LT
1364 q->link.refcnt = 1;
1365 q->link.sibling = &q->link;
d77fea2e 1366 q->link.common.classid = sch->handle;
1da177e4 1367 q->link.qdisc = sch;
3511c913
CG
1368 q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1369 sch->handle);
1370 if (!q->link.q)
1da177e4
LT
1371 q->link.q = &noop_qdisc;
1372
cc7ec456
ED
1373 q->link.priority = TC_CBQ_MAXPRIO - 1;
1374 q->link.priority2 = TC_CBQ_MAXPRIO - 1;
1375 q->link.cpriority = TC_CBQ_MAXPRIO - 1;
1da177e4
LT
1376 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1377 q->link.overlimit = cbq_ovl_classic;
5ce2d488 1378 q->link.allot = psched_mtu(qdisc_dev(sch));
1da177e4
LT
1379 q->link.quantum = q->link.allot;
1380 q->link.weight = q->link.R_tab->rate.rate;
1381
1382 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1383 q->link.avpkt = q->link.allot/2;
1384 q->link.minidle = -0x7FFFFFFF;
1da177e4 1385
88a99354 1386 qdisc_watchdog_init(&q->watchdog, sch);
2fbd3da3 1387 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1da177e4
LT
1388 q->delay_timer.function = cbq_undelay;
1389 q->toplevel = TC_CBQ_MAXLEVEL;
3bebcda2 1390 q->now = psched_get_time();
1da177e4
LT
1391
1392 cbq_link_class(&q->link);
1393
1e90474c
PM
1394 if (tb[TCA_CBQ_LSSOPT])
1395 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1da177e4
LT
1396
1397 cbq_addprio(q, &q->link);
1398 return 0;
d77fea2e
PM
1399
1400put_rtab:
1401 qdisc_put_rtab(q->link.R_tab);
1402 return err;
1da177e4
LT
1403}
1404
cc7ec456 1405static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1da177e4 1406{
27a884dc 1407 unsigned char *b = skb_tail_pointer(skb);
1da177e4 1408
1b34ec43
DM
1409 if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate))
1410 goto nla_put_failure;
1da177e4
LT
1411 return skb->len;
1412
1e90474c 1413nla_put_failure:
dc5fc579 1414 nlmsg_trim(skb, b);
1da177e4
LT
1415 return -1;
1416}
1417
cc7ec456 1418static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1da177e4 1419{
27a884dc 1420 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1421 struct tc_cbq_lssopt opt;
1422
1423 opt.flags = 0;
1424 if (cl->borrow == NULL)
1425 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1426 if (cl->share == NULL)
1427 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1428 opt.ewma_log = cl->ewma_log;
1429 opt.level = cl->level;
1430 opt.avpkt = cl->avpkt;
1431 opt.maxidle = cl->maxidle;
1432 opt.minidle = (u32)(-cl->minidle);
1433 opt.offtime = cl->offtime;
1434 opt.change = ~0;
1b34ec43
DM
1435 if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt))
1436 goto nla_put_failure;
1da177e4
LT
1437 return skb->len;
1438
1e90474c 1439nla_put_failure:
dc5fc579 1440 nlmsg_trim(skb, b);
1da177e4
LT
1441 return -1;
1442}
1443
cc7ec456 1444static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1da177e4 1445{
27a884dc 1446 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1447 struct tc_cbq_wrropt opt;
1448
a0db856a 1449 memset(&opt, 0, sizeof(opt));
1da177e4
LT
1450 opt.flags = 0;
1451 opt.allot = cl->allot;
cc7ec456
ED
1452 opt.priority = cl->priority + 1;
1453 opt.cpriority = cl->cpriority + 1;
1da177e4 1454 opt.weight = cl->weight;
1b34ec43
DM
1455 if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt))
1456 goto nla_put_failure;
1da177e4
LT
1457 return skb->len;
1458
1e90474c 1459nla_put_failure:
dc5fc579 1460 nlmsg_trim(skb, b);
1da177e4
LT
1461 return -1;
1462}
1463
cc7ec456 1464static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1da177e4 1465{
27a884dc 1466 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1467 struct tc_cbq_ovl opt;
1468
1469 opt.strategy = cl->ovl_strategy;
cc7ec456 1470 opt.priority2 = cl->priority2 + 1;
8a47077a 1471 opt.pad = 0;
1a13cb63 1472 opt.penalty = cl->penalty;
1b34ec43
DM
1473 if (nla_put(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt))
1474 goto nla_put_failure;
1da177e4
LT
1475 return skb->len;
1476
1e90474c 1477nla_put_failure:
dc5fc579 1478 nlmsg_trim(skb, b);
1da177e4
LT
1479 return -1;
1480}
1481
cc7ec456 1482static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1da177e4 1483{
27a884dc 1484 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1485 struct tc_cbq_fopt opt;
1486
1487 if (cl->split || cl->defmap) {
d77fea2e 1488 opt.split = cl->split ? cl->split->common.classid : 0;
1da177e4
LT
1489 opt.defmap = cl->defmap;
1490 opt.defchange = ~0;
1b34ec43
DM
1491 if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt))
1492 goto nla_put_failure;
1da177e4
LT
1493 }
1494 return skb->len;
1495
1e90474c 1496nla_put_failure:
dc5fc579 1497 nlmsg_trim(skb, b);
1da177e4
LT
1498 return -1;
1499}
1500
c3bc7cff 1501#ifdef CONFIG_NET_CLS_ACT
cc7ec456 1502static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1da177e4 1503{
27a884dc 1504 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1505 struct tc_cbq_police opt;
1506
1507 if (cl->police) {
1508 opt.police = cl->police;
9ef1d4c7
PM
1509 opt.__res1 = 0;
1510 opt.__res2 = 0;
1b34ec43
DM
1511 if (nla_put(skb, TCA_CBQ_POLICE, sizeof(opt), &opt))
1512 goto nla_put_failure;
1da177e4
LT
1513 }
1514 return skb->len;
1515
1e90474c 1516nla_put_failure:
dc5fc579 1517 nlmsg_trim(skb, b);
1da177e4
LT
1518 return -1;
1519}
1520#endif
1521
1522static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1523{
1524 if (cbq_dump_lss(skb, cl) < 0 ||
1525 cbq_dump_rate(skb, cl) < 0 ||
1526 cbq_dump_wrr(skb, cl) < 0 ||
1527 cbq_dump_ovl(skb, cl) < 0 ||
c3bc7cff 1528#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1529 cbq_dump_police(skb, cl) < 0 ||
1530#endif
1531 cbq_dump_fopt(skb, cl) < 0)
1532 return -1;
1533 return 0;
1534}
1535
1536static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1537{
1538 struct cbq_sched_data *q = qdisc_priv(sch);
4b3550ef 1539 struct nlattr *nest;
1da177e4 1540
4b3550ef
PM
1541 nest = nla_nest_start(skb, TCA_OPTIONS);
1542 if (nest == NULL)
1543 goto nla_put_failure;
1da177e4 1544 if (cbq_dump_attr(skb, &q->link) < 0)
1e90474c 1545 goto nla_put_failure;
d59b7d80 1546 return nla_nest_end(skb, nest);
1da177e4 1547
1e90474c 1548nla_put_failure:
4b3550ef 1549 nla_nest_cancel(skb, nest);
1da177e4
LT
1550 return -1;
1551}
1552
1553static int
1554cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1555{
1556 struct cbq_sched_data *q = qdisc_priv(sch);
1557
1558 q->link.xstats.avgidle = q->link.avgidle;
1559 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1560}
1561
1562static int
1563cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1564 struct sk_buff *skb, struct tcmsg *tcm)
1565{
cc7ec456 1566 struct cbq_class *cl = (struct cbq_class *)arg;
4b3550ef 1567 struct nlattr *nest;
1da177e4
LT
1568
1569 if (cl->tparent)
d77fea2e 1570 tcm->tcm_parent = cl->tparent->common.classid;
1da177e4
LT
1571 else
1572 tcm->tcm_parent = TC_H_ROOT;
d77fea2e 1573 tcm->tcm_handle = cl->common.classid;
1da177e4
LT
1574 tcm->tcm_info = cl->q->handle;
1575
4b3550ef
PM
1576 nest = nla_nest_start(skb, TCA_OPTIONS);
1577 if (nest == NULL)
1578 goto nla_put_failure;
1da177e4 1579 if (cbq_dump_attr(skb, cl) < 0)
1e90474c 1580 goto nla_put_failure;
d59b7d80 1581 return nla_nest_end(skb, nest);
1da177e4 1582
1e90474c 1583nla_put_failure:
4b3550ef 1584 nla_nest_cancel(skb, nest);
1da177e4
LT
1585 return -1;
1586}
1587
1588static int
1589cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1590 struct gnet_dump *d)
1591{
1592 struct cbq_sched_data *q = qdisc_priv(sch);
cc7ec456 1593 struct cbq_class *cl = (struct cbq_class *)arg;
1da177e4
LT
1594
1595 cl->qstats.qlen = cl->q->q.qlen;
1596 cl->xstats.avgidle = cl->avgidle;
1597 cl->xstats.undertime = 0;
1598
a084980d 1599 if (cl->undertime != PSCHED_PASTPERFECT)
8edc0c31 1600 cl->xstats.undertime = cl->undertime - q->now;
1da177e4
LT
1601
1602 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
d250a5f9 1603 gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
1da177e4
LT
1604 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1605 return -1;
1606
1607 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1608}
1609
1610static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1611 struct Qdisc **old)
1612{
cc7ec456 1613 struct cbq_class *cl = (struct cbq_class *)arg;
1da177e4 1614
5b9a9ccf 1615 if (new == NULL) {
3511c913 1616 new = qdisc_create_dflt(sch->dev_queue,
5b9a9ccf
PM
1617 &pfifo_qdisc_ops, cl->common.classid);
1618 if (new == NULL)
1619 return -ENOBUFS;
1620 } else {
c3bc7cff 1621#ifdef CONFIG_NET_CLS_ACT
5b9a9ccf
PM
1622 if (cl->police == TC_POLICE_RECLASSIFY)
1623 new->reshape_fail = cbq_reshape_fail;
1da177e4 1624#endif
1da177e4 1625 }
5b9a9ccf
PM
1626 sch_tree_lock(sch);
1627 *old = cl->q;
1628 cl->q = new;
1629 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1630 qdisc_reset(*old);
1631 sch_tree_unlock(sch);
1632
1633 return 0;
1da177e4
LT
1634}
1635
cc7ec456 1636static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
1da177e4 1637{
cc7ec456 1638 struct cbq_class *cl = (struct cbq_class *)arg;
1da177e4 1639
5b9a9ccf 1640 return cl->q;
1da177e4
LT
1641}
1642
a37ef2e3
JP
1643static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1644{
1645 struct cbq_class *cl = (struct cbq_class *)arg;
1646
1647 if (cl->q->q.qlen == 0)
1648 cbq_deactivate_class(cl);
1649}
1650
1da177e4
LT
1651static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1652{
1653 struct cbq_sched_data *q = qdisc_priv(sch);
1654 struct cbq_class *cl = cbq_class_lookup(q, classid);
1655
1656 if (cl) {
1657 cl->refcnt++;
1658 return (unsigned long)cl;
1659 }
1660 return 0;
1661}
1662
1da177e4
LT
1663static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1664{
1665 struct cbq_sched_data *q = qdisc_priv(sch);
1666
547b792c 1667 WARN_ON(cl->filters);
1da177e4 1668
ff31ab56 1669 tcf_destroy_chain(&cl->filter_list);
1da177e4
LT
1670 qdisc_destroy(cl->q);
1671 qdisc_put_rtab(cl->R_tab);
1da177e4 1672 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1da177e4
LT
1673 if (cl != &q->link)
1674 kfree(cl);
1675}
1676
cc7ec456 1677static void cbq_destroy(struct Qdisc *sch)
1da177e4
LT
1678{
1679 struct cbq_sched_data *q = qdisc_priv(sch);
b67bfe0d 1680 struct hlist_node *next;
1da177e4 1681 struct cbq_class *cl;
cc7ec456 1682 unsigned int h;
1da177e4 1683
c3bc7cff 1684#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1685 q->rx_class = NULL;
1686#endif
1687 /*
1688 * Filters must be destroyed first because we don't destroy the
1689 * classes from root to leafs which means that filters can still
1690 * be bound to classes which have been destroyed already. --TGR '04
1691 */
d77fea2e 1692 for (h = 0; h < q->clhash.hashsize; h++) {
b67bfe0d 1693 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode)
ff31ab56 1694 tcf_destroy_chain(&cl->filter_list);
b00b4bf9 1695 }
d77fea2e 1696 for (h = 0; h < q->clhash.hashsize; h++) {
b67bfe0d 1697 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
d77fea2e 1698 common.hnode)
1da177e4 1699 cbq_destroy_class(sch, cl);
1da177e4 1700 }
d77fea2e 1701 qdisc_class_hash_destroy(&q->clhash);
1da177e4
LT
1702}
1703
1704static void cbq_put(struct Qdisc *sch, unsigned long arg)
1705{
cc7ec456 1706 struct cbq_class *cl = (struct cbq_class *)arg;
1da177e4
LT
1707
1708 if (--cl->refcnt == 0) {
c3bc7cff 1709#ifdef CONFIG_NET_CLS_ACT
102396ae 1710 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1da177e4
LT
1711 struct cbq_sched_data *q = qdisc_priv(sch);
1712
7698b4fc 1713 spin_lock_bh(root_lock);
1da177e4
LT
1714 if (q->rx_class == cl)
1715 q->rx_class = NULL;
7698b4fc 1716 spin_unlock_bh(root_lock);
1da177e4
LT
1717#endif
1718
1719 cbq_destroy_class(sch, cl);
1720 }
1721}
1722
1723static int
1e90474c 1724cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1da177e4
LT
1725 unsigned long *arg)
1726{
1727 int err;
1728 struct cbq_sched_data *q = qdisc_priv(sch);
cc7ec456 1729 struct cbq_class *cl = (struct cbq_class *)*arg;
1e90474c
PM
1730 struct nlattr *opt = tca[TCA_OPTIONS];
1731 struct nlattr *tb[TCA_CBQ_MAX + 1];
1da177e4
LT
1732 struct cbq_class *parent;
1733 struct qdisc_rate_table *rtab = NULL;
1734
cee63723 1735 if (opt == NULL)
1da177e4
LT
1736 return -EINVAL;
1737
27a3421e 1738 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
cee63723
PM
1739 if (err < 0)
1740 return err;
1741
1da177e4
LT
1742 if (cl) {
1743 /* Check parent */
1744 if (parentid) {
d77fea2e
PM
1745 if (cl->tparent &&
1746 cl->tparent->common.classid != parentid)
1da177e4
LT
1747 return -EINVAL;
1748 if (!cl->tparent && parentid != TC_H_ROOT)
1749 return -EINVAL;
1750 }
1751
1e90474c 1752 if (tb[TCA_CBQ_RATE]) {
71bcb09a
SH
1753 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1754 tb[TCA_CBQ_RTAB]);
1da177e4
LT
1755 if (rtab == NULL)
1756 return -EINVAL;
1757 }
1758
71bcb09a
SH
1759 if (tca[TCA_RATE]) {
1760 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1761 qdisc_root_sleeping_lock(sch),
1762 tca[TCA_RATE]);
1763 if (err) {
79c11f2e 1764 qdisc_put_rtab(rtab);
71bcb09a
SH
1765 return err;
1766 }
1767 }
1768
1da177e4
LT
1769 /* Change class parameters */
1770 sch_tree_lock(sch);
1771
1772 if (cl->next_alive != NULL)
1773 cbq_deactivate_class(cl);
1774
1775 if (rtab) {
b94c8afc
PM
1776 qdisc_put_rtab(cl->R_tab);
1777 cl->R_tab = rtab;
1da177e4
LT
1778 }
1779
1e90474c
PM
1780 if (tb[TCA_CBQ_LSSOPT])
1781 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1da177e4 1782
1e90474c 1783 if (tb[TCA_CBQ_WRROPT]) {
1da177e4 1784 cbq_rmprio(q, cl);
1e90474c 1785 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1da177e4
LT
1786 }
1787
1e90474c
PM
1788 if (tb[TCA_CBQ_OVL_STRATEGY])
1789 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1da177e4 1790
c3bc7cff 1791#ifdef CONFIG_NET_CLS_ACT
1e90474c
PM
1792 if (tb[TCA_CBQ_POLICE])
1793 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1da177e4
LT
1794#endif
1795
1e90474c
PM
1796 if (tb[TCA_CBQ_FOPT])
1797 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1da177e4
LT
1798
1799 if (cl->q->q.qlen)
1800 cbq_activate_class(cl);
1801
1802 sch_tree_unlock(sch);
1803
1da177e4
LT
1804 return 0;
1805 }
1806
1807 if (parentid == TC_H_ROOT)
1808 return -EINVAL;
1809
1e90474c
PM
1810 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1811 tb[TCA_CBQ_LSSOPT] == NULL)
1da177e4
LT
1812 return -EINVAL;
1813
1e90474c 1814 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1da177e4
LT
1815 if (rtab == NULL)
1816 return -EINVAL;
1817
1818 if (classid) {
1819 err = -EINVAL;
cc7ec456
ED
1820 if (TC_H_MAJ(classid ^ sch->handle) ||
1821 cbq_class_lookup(q, classid))
1da177e4
LT
1822 goto failure;
1823 } else {
1824 int i;
cc7ec456 1825 classid = TC_H_MAKE(sch->handle, 0x8000);
1da177e4 1826
cc7ec456 1827 for (i = 0; i < 0x8000; i++) {
1da177e4
LT
1828 if (++q->hgenerator >= 0x8000)
1829 q->hgenerator = 1;
1830 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1831 break;
1832 }
1833 err = -ENOSR;
1834 if (i >= 0x8000)
1835 goto failure;
1836 classid = classid|q->hgenerator;
1837 }
1838
1839 parent = &q->link;
1840 if (parentid) {
1841 parent = cbq_class_lookup(q, parentid);
1842 err = -EINVAL;
1843 if (parent == NULL)
1844 goto failure;
1845 }
1846
1847 err = -ENOBUFS;
0da974f4 1848 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1da177e4
LT
1849 if (cl == NULL)
1850 goto failure;
71bcb09a
SH
1851
1852 if (tca[TCA_RATE]) {
1853 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1854 qdisc_root_sleeping_lock(sch),
1855 tca[TCA_RATE]);
1856 if (err) {
1857 kfree(cl);
1858 goto failure;
1859 }
1860 }
1861
1da177e4
LT
1862 cl->R_tab = rtab;
1863 rtab = NULL;
1864 cl->refcnt = 1;
3511c913
CG
1865 cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1866 if (!cl->q)
1da177e4 1867 cl->q = &noop_qdisc;
d77fea2e 1868 cl->common.classid = classid;
1da177e4
LT
1869 cl->tparent = parent;
1870 cl->qdisc = sch;
1871 cl->allot = parent->allot;
1872 cl->quantum = cl->allot;
1873 cl->weight = cl->R_tab->rate.rate;
1da177e4
LT
1874
1875 sch_tree_lock(sch);
1876 cbq_link_class(cl);
1877 cl->borrow = cl->tparent;
1878 if (cl->tparent != &q->link)
1879 cl->share = cl->tparent;
1880 cbq_adjust_levels(parent);
1881 cl->minidle = -0x7FFFFFFF;
1e90474c
PM
1882 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1883 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
cc7ec456 1884 if (cl->ewma_log == 0)
1da177e4 1885 cl->ewma_log = q->link.ewma_log;
cc7ec456 1886 if (cl->maxidle == 0)
1da177e4 1887 cl->maxidle = q->link.maxidle;
cc7ec456 1888 if (cl->avpkt == 0)
1da177e4
LT
1889 cl->avpkt = q->link.avpkt;
1890 cl->overlimit = cbq_ovl_classic;
1e90474c
PM
1891 if (tb[TCA_CBQ_OVL_STRATEGY])
1892 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
c3bc7cff 1893#ifdef CONFIG_NET_CLS_ACT
1e90474c
PM
1894 if (tb[TCA_CBQ_POLICE])
1895 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1da177e4 1896#endif
1e90474c
PM
1897 if (tb[TCA_CBQ_FOPT])
1898 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1da177e4
LT
1899 sch_tree_unlock(sch);
1900
d77fea2e
PM
1901 qdisc_class_hash_grow(sch, &q->clhash);
1902
1da177e4
LT
1903 *arg = (unsigned long)cl;
1904 return 0;
1905
1906failure:
1907 qdisc_put_rtab(rtab);
1908 return err;
1909}
1910
1911static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1912{
1913 struct cbq_sched_data *q = qdisc_priv(sch);
cc7ec456 1914 struct cbq_class *cl = (struct cbq_class *)arg;
a37ef2e3 1915 unsigned int qlen;
1da177e4
LT
1916
1917 if (cl->filters || cl->children || cl == &q->link)
1918 return -EBUSY;
1919
1920 sch_tree_lock(sch);
1921
a37ef2e3
JP
1922 qlen = cl->q->q.qlen;
1923 qdisc_reset(cl->q);
1924 qdisc_tree_decrease_qlen(cl->q, qlen);
1925
1da177e4
LT
1926 if (cl->next_alive)
1927 cbq_deactivate_class(cl);
1928
1929 if (q->tx_borrowed == cl)
1930 q->tx_borrowed = q->tx_class;
1931 if (q->tx_class == cl) {
1932 q->tx_class = NULL;
1933 q->tx_borrowed = NULL;
1934 }
c3bc7cff 1935#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1936 if (q->rx_class == cl)
1937 q->rx_class = NULL;
1938#endif
1939
1940 cbq_unlink_class(cl);
1941 cbq_adjust_levels(cl->tparent);
1942 cl->defmap = 0;
1943 cbq_sync_defmap(cl);
1944
1945 cbq_rmprio(q, cl);
1946 sch_tree_unlock(sch);
1947
7cd0a638
JP
1948 BUG_ON(--cl->refcnt == 0);
1949 /*
1950 * This shouldn't happen: we "hold" one cops->get() when called
1951 * from tc_ctl_tclass; the destroy method is done from cops->put().
1952 */
1da177e4
LT
1953
1954 return 0;
1955}
1956
1957static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1958{
1959 struct cbq_sched_data *q = qdisc_priv(sch);
1960 struct cbq_class *cl = (struct cbq_class *)arg;
1961
1962 if (cl == NULL)
1963 cl = &q->link;
1964
1965 return &cl->filter_list;
1966}
1967
1968static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1969 u32 classid)
1970{
1971 struct cbq_sched_data *q = qdisc_priv(sch);
cc7ec456 1972 struct cbq_class *p = (struct cbq_class *)parent;
1da177e4
LT
1973 struct cbq_class *cl = cbq_class_lookup(q, classid);
1974
1975 if (cl) {
1976 if (p && p->level <= cl->level)
1977 return 0;
1978 cl->filters++;
1979 return (unsigned long)cl;
1980 }
1981 return 0;
1982}
1983
1984static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
1985{
cc7ec456 1986 struct cbq_class *cl = (struct cbq_class *)arg;
1da177e4
LT
1987
1988 cl->filters--;
1989}
1990
1991static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1992{
1993 struct cbq_sched_data *q = qdisc_priv(sch);
d77fea2e 1994 struct cbq_class *cl;
cc7ec456 1995 unsigned int h;
1da177e4
LT
1996
1997 if (arg->stop)
1998 return;
1999
d77fea2e 2000 for (h = 0; h < q->clhash.hashsize; h++) {
b67bfe0d 2001 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
1da177e4
LT
2002 if (arg->count < arg->skip) {
2003 arg->count++;
2004 continue;
2005 }
2006 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2007 arg->stop = 1;
2008 return;
2009 }
2010 arg->count++;
2011 }
2012 }
2013}
2014
20fea08b 2015static const struct Qdisc_class_ops cbq_class_ops = {
1da177e4
LT
2016 .graft = cbq_graft,
2017 .leaf = cbq_leaf,
a37ef2e3 2018 .qlen_notify = cbq_qlen_notify,
1da177e4
LT
2019 .get = cbq_get,
2020 .put = cbq_put,
2021 .change = cbq_change_class,
2022 .delete = cbq_delete,
2023 .walk = cbq_walk,
2024 .tcf_chain = cbq_find_tcf,
2025 .bind_tcf = cbq_bind_filter,
2026 .unbind_tcf = cbq_unbind_filter,
2027 .dump = cbq_dump_class,
2028 .dump_stats = cbq_dump_class_stats,
2029};
2030
20fea08b 2031static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
1da177e4
LT
2032 .next = NULL,
2033 .cl_ops = &cbq_class_ops,
2034 .id = "cbq",
2035 .priv_size = sizeof(struct cbq_sched_data),
2036 .enqueue = cbq_enqueue,
2037 .dequeue = cbq_dequeue,
77be155c 2038 .peek = qdisc_peek_dequeued,
1da177e4
LT
2039 .drop = cbq_drop,
2040 .init = cbq_init,
2041 .reset = cbq_reset,
2042 .destroy = cbq_destroy,
2043 .change = NULL,
2044 .dump = cbq_dump,
2045 .dump_stats = cbq_dump_stats,
2046 .owner = THIS_MODULE,
2047};
2048
2049static int __init cbq_module_init(void)
2050{
2051 return register_qdisc(&cbq_qdisc_ops);
2052}
10297b99 2053static void __exit cbq_module_exit(void)
1da177e4
LT
2054{
2055 unregister_qdisc(&cbq_qdisc_ops);
2056}
2057module_init(cbq_module_init)
2058module_exit(cbq_module_exit)
2059MODULE_LICENSE("GPL");