net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/module.h>
  14 #include <linux/types.h>
  15 #include <linux/kernel.h>
  16 #include <linux/string.h>
  17 #include <linux/errno.h>
  18 #include <linux/skbuff.h>
  19 #include <net/netlink.h>
  20 #include <net/pkt_sched.h>
  21
  22
  23 /*      Class-Based Queueing (CBQ) algorithm.
  24         =======================================
  25
  26         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  27                  Management Models for Packet Networks",
  28                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  29
  30                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  31
  32                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  33                  Parameters", 1996
  34
  35                  [4] Sally Floyd and Michael Speer, "Experimental Results
  36                  for Class-Based Queueing", 1998, not published.
  37
  38         -----------------------------------------------------------------------
  39
  40         Algorithm skeleton was taken from NS simulator cbq.cc.
  41         If someone wants to check this code against the LBL version,
  42         he should take into account that ONLY the skeleton was borrowed,
  43         the implementation is different. Particularly:
  44
  45         --- The WRR algorithm is different. Our version looks more
  46         reasonable (I hope) and works when quanta are allowed to be
  47         less than MTU, which is always the case when real time classes
  48         have small rates. Note, that the statement of [3] is
  49         incomplete, delay may actually be estimated even if class
  50         per-round allotment is less than MTU. Namely, if per-round
  51         allotment is W*r_i, and r_1+...+r_k = r < 1
  52
  53         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  54
  55         In the worst case we have IntServ estimate with D = W*r+k*MTU
  56         and C = MTU*r. The proof (if correct at all) is trivial.
  57
  58
  59         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  60         interpret some places, which look like wrong translations
  61         from NS. Anyone is advised to find these differences
  62         and explain to me, why I am wrong 8).
  63
  64         --- Linux has no EOI event, so that we cannot estimate true class
  65         idle time. Workaround is to consider the next dequeue event
  66         as sign that previous packet is finished. This is wrong because of
  67         internal device queueing, but on a permanently loaded link it is true.
  68         Moreover, combined with clock integrator, this scheme looks
  69         very close to an ideal solution.  */
  70
  71 struct cbq_sched_data;
  72
  73
  74 struct cbq_class
  75 {
  76         struct Qdisc_class_common common;
  77         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  78
  79 /* Parameters */
  80         unsigned char           priority;       /* class priority */
  81         unsigned char           priority2;      /* priority to be used after overlimit */
  82         unsigned char           ewma_log;       /* time constant for idle time calculation */
  83         unsigned char           ovl_strategy;
  84 #ifdef CONFIG_NET_CLS_ACT
  85         unsigned char           police;
  86 #endif
  87
  88         u32                     defmap;
  89
  90         /* Link-sharing scheduler parameters */
  91         long                    maxidle;        /* Class parameters: see below. */
  92         long                    offtime;
  93         long                    minidle;
  94         u32                     avpkt;
  95         struct qdisc_rate_table *R_tab;
  96
  97         /* Overlimit strategy parameters */
  98         void                    (*overlimit)(struct cbq_class *cl);
  99         psched_tdiff_t          penalty;
 100
 101         /* General scheduler (WRR) parameters */
 102         long                    allot;
 103         long                    quantum;        /* Allotment per WRR round */
 104         long                    weight;         /* Relative allotment: see below */
 105
 106         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 107         struct cbq_class        *split;         /* Ptr to split node */
 108         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 109         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 110         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 111                                                    parent otherwise */
 112         struct cbq_class        *sibling;       /* Sibling chain */
 113         struct cbq_class        *children;      /* Pointer to children chain */
 114
 115         struct Qdisc            *q;             /* Elementary queueing discipline */
 116
 117
 118 /* Variables */
 119         unsigned char           cpriority;      /* Effective priority */
 120         unsigned char           delayed;
 121         unsigned char           level;          /* level of the class in hierarchy:
 122                                                    0 for leaf classes, and maximal
 123                                                    level of children + 1 for nodes.
 124                                                  */
 125
 126         psched_time_t           last;           /* Last end of service */
 127         psched_time_t           undertime;
 128         long                    avgidle;
 129         long                    deficit;        /* Saved deficit for WRR */
 130         psched_time_t           penalized;
 131         struct gnet_stats_basic_packed bstats;
 132         struct gnet_stats_queue qstats;
 133         struct gnet_stats_rate_est rate_est;
 134         struct tc_cbq_xstats    xstats;
 135
 136         struct tcf_proto        *filter_list;
 137
 138         int                     refcnt;
 139         int                     filters;
 140
 141         struct cbq_class        *defaults[TC_PRIO_MAX+1];
 142 };
 143
 144 struct cbq_sched_data
 145 {
 146         struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
 147         int                     nclasses[TC_CBQ_MAXPRIO+1];
 148         unsigned                quanta[TC_CBQ_MAXPRIO+1];
 149
 150         struct cbq_class        link;
 151
 152         unsigned                activemask;
 153         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
 154                                                                    with backlog */
 155
 156 #ifdef CONFIG_NET_CLS_ACT
 157         struct cbq_class        *rx_class;
 158 #endif
 159         struct cbq_class        *tx_class;
 160         struct cbq_class        *tx_borrowed;
 161         int                     tx_len;
 162         psched_time_t           now;            /* Cached timestamp */
 163         psched_time_t           now_rt;         /* Cached real time */
 164         unsigned                pmask;
 165
 166         struct tasklet_hrtimer  delay_timer;
 167         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 168                                                    started when CBQ has
 169                                                    backlog, but cannot
 170                                                    transmit just now */
 171         psched_tdiff_t          wd_expires;
 172         int                     toplevel;
 173         u32                     hgenerator;
 174 };
 175
 176
 177 #define L2T(cl,len)     qdisc_l2t((cl)->R_tab,len)
 178
 179 static __inline__ struct cbq_class *
 180 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 181 {
 182         struct Qdisc_class_common *clc;
 183
 184         clc = qdisc_class_find(&q->clhash, classid);
 185         if (clc == NULL)
 186                 return NULL;
 187         return container_of(clc, struct cbq_class, common);
 188 }
 189
 190 #ifdef CONFIG_NET_CLS_ACT
 191
 192 static struct cbq_class *
 193 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 194 {
 195         struct cbq_class *cl, *new;
 196
 197         for (cl = this->tparent; cl; cl = cl->tparent)
 198                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
 199                         return new;
 200
 201         return NULL;
 202 }
 203
 204 #endif
 205
 206 /* Classify packet. The procedure is pretty complicated, but
 207    it allows us to combine link sharing and priority scheduling
 208    transparently.
 209
 210    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 211    so that it resolves to split nodes. Then packets are classified
 212    by logical priority, or a more specific classifier may be attached
 213    to the split node.
 214  */
 215
 216 static struct cbq_class *
 217 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 218 {
 219         struct cbq_sched_data *q = qdisc_priv(sch);
 220         struct cbq_class *head = &q->link;
 221         struct cbq_class **defmap;
 222         struct cbq_class *cl = NULL;
 223         u32 prio = skb->priority;
 224         struct tcf_result res;
 225
 226         /*
 227          *  Step 1. If skb->priority points to one of our classes, use it.
 228          */
 229         if (TC_H_MAJ(prio^sch->handle) == 0 &&
 230             (cl = cbq_class_lookup(q, prio)) != NULL)
 231                 return cl;
 232
 233         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 234         for (;;) {
 235                 int result = 0;
 236                 defmap = head->defaults;
 237
 238                 /*
 239                  * Step 2+n. Apply classifier.
 240                  */
 241                 if (!head->filter_list ||
 242                     (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 243                         goto fallback;
 244
 245                 if ((cl = (void*)res.class) == NULL) {
 246                         if (TC_H_MAJ(res.classid))
 247                                 cl = cbq_class_lookup(q, res.classid);
 248                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
 249                                 cl = defmap[TC_PRIO_BESTEFFORT];
 250
 251                         if (cl == NULL || cl->level >= head->level)
 252                                 goto fallback;
 253                 }
 254
 255 #ifdef CONFIG_NET_CLS_ACT
 256                 switch (result) {
 257                 case TC_ACT_QUEUED:
 258                 case TC_ACT_STOLEN:
 259                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 260                 case TC_ACT_SHOT:
 261                         return NULL;
 262                 case TC_ACT_RECLASSIFY:
 263                         return cbq_reclassify(skb, cl);
 264                 }
 265 #endif
 266                 if (cl->level == 0)
 267                         return cl;
 268
 269                 /*
 270                  * Step 3+n. If classifier selected a link sharing class,
 271                  *         apply agency specific classifier.
 272                  *         Repeat this procdure until we hit a leaf node.
 273                  */
 274                 head = cl;
 275         }
 276
 277 fallback:
 278         cl = head;
 279
 280         /*
 281          * Step 4. No success...
 282          */
 283         if (TC_H_MAJ(prio) == 0 &&
 284             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
 285             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 286                 return head;
 287
 288         return cl;
 289 }
 290
 291 /*
 292    A packet has just been enqueued on the empty class.
 293    cbq_activate_class adds it to the tail of active class list
 294    of its priority band.
 295  */
 296
 297 static __inline__ void cbq_activate_class(struct cbq_class *cl)
 298 {
 299         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 300         int prio = cl->cpriority;
 301         struct cbq_class *cl_tail;
 302
 303         cl_tail = q->active[prio];
 304         q->active[prio] = cl;
 305
 306         if (cl_tail != NULL) {
 307                 cl->next_alive = cl_tail->next_alive;
 308                 cl_tail->next_alive = cl;
 309         } else {
 310                 cl->next_alive = cl;
 311                 q->activemask |= (1<<prio);
 312         }
 313 }
 314
 315 /*
 316    Unlink class from active chain.
 317    Note that this same procedure is done directly in cbq_dequeue*
 318    during round-robin procedure.
 319  */
 320
 321 static void cbq_deactivate_class(struct cbq_class *this)
 322 {
 323         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 324         int prio = this->cpriority;
 325         struct cbq_class *cl;
 326         struct cbq_class *cl_prev = q->active[prio];
 327
 328         do {
 329                 cl = cl_prev->next_alive;
 330                 if (cl == this) {
 331                         cl_prev->next_alive = cl->next_alive;
 332                         cl->next_alive = NULL;
 333
 334                         if (cl == q->active[prio]) {
 335                                 q->active[prio] = cl_prev;
 336                                 if (cl == q->active[prio]) {
 337                                         q->active[prio] = NULL;
 338                                         q->activemask &= ~(1<<prio);
 339                                         return;
 340                                 }
 341                         }
 342                         return;
 343                 }
 344         } while ((cl_prev = cl) != q->active[prio]);
 345 }
 346
 347 static void
 348 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 349 {
 350         int toplevel = q->toplevel;
 351
 352         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
 353                 psched_time_t now;
 354                 psched_tdiff_t incr;
 355
 356                 now = psched_get_time();
 357                 incr = now - q->now_rt;
 358                 now = q->now + incr;
 359
 360                 do {
 361                         if (cl->undertime < now) {
 362                                 q->toplevel = cl->level;
 363                                 return;
 364                         }
 365                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
 366         }
 367 }
 368
 369 static int
 370 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 371 {
 372         struct cbq_sched_data *q = qdisc_priv(sch);
 373         int uninitialized_var(ret);
 374         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 375
 376 #ifdef CONFIG_NET_CLS_ACT
 377         q->rx_class = cl;
 378 #endif
 379         if (cl == NULL) {
 380                 if (ret & __NET_XMIT_BYPASS)
 381                         sch->qstats.drops++;
 382                 kfree_skb(skb);
 383                 return ret;
 384         }
 385
 386 #ifdef CONFIG_NET_CLS_ACT
 387         cl->q->__parent = sch;
 388 #endif
 389         ret = qdisc_enqueue(skb, cl->q);
 390         if (ret == NET_XMIT_SUCCESS) {
 391                 sch->q.qlen++;
 392                 sch->bstats.packets++;
 393                 sch->bstats.bytes += qdisc_pkt_len(skb);
 394                 cbq_mark_toplevel(q, cl);
 395                 if (!cl->next_alive)
 396                         cbq_activate_class(cl);
 397                 return ret;
 398         }
 399
 400         if (net_xmit_drop_count(ret)) {
 401                 sch->qstats.drops++;
 402                 cbq_mark_toplevel(q, cl);
 403                 cl->qstats.drops++;
 404         }
 405         return ret;
 406 }
 407
 408 /* Overlimit actions */
 409
 410 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 411
 412 static void cbq_ovl_classic(struct cbq_class *cl)
 413 {
 414         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 415         psched_tdiff_t delay = cl->undertime - q->now;
 416
 417         if (!cl->delayed) {
 418                 delay += cl->offtime;
 419
 420                 /*
 421                    Class goes to sleep, so that it will have no
 422                    chance to work avgidle. Let's forgive it 8)
 423
 424                    BTW cbq-2.0 has a crap in this
 425                    place, apparently they forgot to shift it by cl->ewma_log.
 426                  */
 427                 if (cl->avgidle < 0)
 428                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 429                 if (cl->avgidle < cl->minidle)
 430                         cl->avgidle = cl->minidle;
 431                 if (delay <= 0)
 432                         delay = 1;
 433                 cl->undertime = q->now + delay;
 434
 435                 cl->xstats.overactions++;
 436                 cl->delayed = 1;
 437         }
 438         if (q->wd_expires == 0 || q->wd_expires > delay)
 439                 q->wd_expires = delay;
 440
 441         /* Dirty work! We must schedule wakeups based on
 442            real available rate, rather than leaf rate,
 443            which may be tiny (even zero).
 444          */
 445         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 446                 struct cbq_class *b;
 447                 psched_tdiff_t base_delay = q->wd_expires;
 448
 449                 for (b = cl->borrow; b; b = b->borrow) {
 450                         delay = b->undertime - q->now;
 451                         if (delay < base_delay) {
 452                                 if (delay <= 0)
 453                                         delay = 1;
 454                                 base_delay = delay;
 455                         }
 456                 }
 457
 458                 q->wd_expires = base_delay;
 459         }
 460 }
 461
 462 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 463    they go overlimit
 464  */
 465
 466 static void cbq_ovl_rclassic(struct cbq_class *cl)
 467 {
 468         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 469         struct cbq_class *this = cl;
 470
 471         do {
 472                 if (cl->level > q->toplevel) {
 473                         cl = NULL;
 474                         break;
 475                 }
 476         } while ((cl = cl->borrow) != NULL);
 477
 478         if (cl == NULL)
 479                 cl = this;
 480         cbq_ovl_classic(cl);
 481 }
 482
 483 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 484
 485 static void cbq_ovl_delay(struct cbq_class *cl)
 486 {
 487         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 488         psched_tdiff_t delay = cl->undertime - q->now;
 489
 490         if (test_bit(__QDISC_STATE_DEACTIVATED,
 491                      &qdisc_root_sleeping(cl->qdisc)->state))
 492                 return;
 493
 494         if (!cl->delayed) {
 495                 psched_time_t sched = q->now;
 496                 ktime_t expires;
 497
 498                 delay += cl->offtime;
 499                 if (cl->avgidle < 0)
 500                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 501                 if (cl->avgidle < cl->minidle)
 502                         cl->avgidle = cl->minidle;
 503                 cl->undertime = q->now + delay;
 504
 505                 if (delay > 0) {
 506                         struct hrtimer *ht;
 507
 508                         sched += delay + cl->penalty;
 509                         cl->penalized = sched;
 510                         cl->cpriority = TC_CBQ_MAXPRIO;
 511                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 512
 513                         expires = ktime_set(0, 0);
 514                         expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
 515                         ht = &q->delay_timer.timer;
 516                         if (hrtimer_try_to_cancel(ht) &&
 517                             ktime_to_ns(ktime_sub(hrtimer_get_expires(ht),
 518                                                   expires)) > 0)
 519                                 hrtimer_set_expires(ht, expires);
 520                         hrtimer_restart(ht);
 521                         cl->delayed = 1;
 522                         cl->xstats.overactions++;
 523                         return;
 524                 }
 525                 delay = 1;
 526         }
 527         if (q->wd_expires == 0 || q->wd_expires > delay)
 528                 q->wd_expires = delay;
 529 }
 530
 531 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 532
 533 static void cbq_ovl_lowprio(struct cbq_class *cl)
 534 {
 535         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 536
 537         cl->penalized = q->now + cl->penalty;
 538
 539         if (cl->cpriority != cl->priority2) {
 540                 cl->cpriority = cl->priority2;
 541                 q->pmask |= (1<<cl->cpriority);
 542                 cl->xstats.overactions++;
 543         }
 544         cbq_ovl_classic(cl);
 545 }
 546
 547 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 548
 549 static void cbq_ovl_drop(struct cbq_class *cl)
 550 {
 551         if (cl->q->ops->drop)
 552                 if (cl->q->ops->drop(cl->q))
 553                         cl->qdisc->q.qlen--;
 554         cl->xstats.overactions++;
 555         cbq_ovl_classic(cl);
 556 }
 557
 558 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 559                                        psched_time_t now)
 560 {
 561         struct cbq_class *cl;
 562         struct cbq_class *cl_prev = q->active[prio];
 563         psched_time_t sched = now;
 564
 565         if (cl_prev == NULL)
 566                 return 0;
 567
 568         do {
 569                 cl = cl_prev->next_alive;
 570                 if (now - cl->penalized > 0) {
 571                         cl_prev->next_alive = cl->next_alive;
 572                         cl->next_alive = NULL;
 573                         cl->cpriority = cl->priority;
 574                         cl->delayed = 0;
 575                         cbq_activate_class(cl);
 576
 577                         if (cl == q->active[prio]) {
 578                                 q->active[prio] = cl_prev;
 579                                 if (cl == q->active[prio]) {
 580                                         q->active[prio] = NULL;
 581                                         return 0;
 582                                 }
 583                         }
 584
 585                         cl = cl_prev->next_alive;
 586                 } else if (sched - cl->penalized > 0)
 587                         sched = cl->penalized;
 588         } while ((cl_prev = cl) != q->active[prio]);
 589
 590         return sched - now;
 591 }
 592
 593 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 594 {
 595         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 596                                                 delay_timer.timer);
 597         struct Qdisc *sch = q->watchdog.qdisc;
 598         psched_time_t now;
 599         psched_tdiff_t delay = 0;
 600         unsigned pmask;
 601
 602         now = psched_get_time();
 603
 604         pmask = q->pmask;
 605         q->pmask = 0;
 606
 607         while (pmask) {
 608                 int prio = ffz(~pmask);
 609                 psched_tdiff_t tmp;
 610
 611                 pmask &= ~(1<<prio);
 612
 613                 tmp = cbq_undelay_prio(q, prio, now);
 614                 if (tmp > 0) {
 615                         q->pmask |= 1<<prio;
 616                         if (tmp < delay || delay == 0)
 617                                 delay = tmp;
 618                 }
 619         }
 620
 621         if (delay) {
 622                 ktime_t time;
 623
 624                 time = ktime_set(0, 0);
 625                 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
 626                 tasklet_hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 627         }
 628
 629         sch->flags &= ~TCQ_F_THROTTLED;
 630         __netif_schedule(qdisc_root(sch));
 631         return HRTIMER_NORESTART;
 632 }
 633
 634 #ifdef CONFIG_NET_CLS_ACT
 635 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 636 {
 637         struct Qdisc *sch = child->__parent;
 638         struct cbq_sched_data *q = qdisc_priv(sch);
 639         struct cbq_class *cl = q->rx_class;
 640
 641         q->rx_class = NULL;
 642
 643         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 644                 int ret;
 645
 646                 cbq_mark_toplevel(q, cl);
 647
 648                 q->rx_class = cl;
 649                 cl->q->__parent = sch;
 650
 651                 ret = qdisc_enqueue(skb, cl->q);
 652                 if (ret == NET_XMIT_SUCCESS) {
 653                         sch->q.qlen++;
 654                         sch->bstats.packets++;
 655                         sch->bstats.bytes += qdisc_pkt_len(skb);
 656                         if (!cl->next_alive)
 657                                 cbq_activate_class(cl);
 658                         return 0;
 659                 }
 660                 if (net_xmit_drop_count(ret))
 661                         sch->qstats.drops++;
 662                 return 0;
 663         }
 664
 665         sch->qstats.drops++;
 666         return -1;
 667 }
 668 #endif
 669
 670 /*
 671    It is mission critical procedure.
 672
 673    We "regenerate" toplevel cutoff, if transmitting class
 674    has backlog and it is not regulated. It is not part of
 675    original CBQ description, but looks more reasonable.
 676    Probably, it is wrong. This question needs further investigation.
 677 */
 678
 679 static __inline__ void
 680 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 681                     struct cbq_class *borrowed)
 682 {
 683         if (cl && q->toplevel >= borrowed->level) {
 684                 if (cl->q->q.qlen > 1) {
 685                         do {
 686                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
 687                                         q->toplevel = borrowed->level;
 688                                         return;
 689                                 }
 690                         } while ((borrowed=borrowed->borrow) != NULL);
 691                 }
 692 #if 0
 693         /* It is not necessary now. Uncommenting it
 694            will save CPU cycles, but decrease fairness.
 695          */
 696                 q->toplevel = TC_CBQ_MAXLEVEL;
 697 #endif
 698         }
 699 }
 700
 701 static void
 702 cbq_update(struct cbq_sched_data *q)
 703 {
 704         struct cbq_class *this = q->tx_class;
 705         struct cbq_class *cl = this;
 706         int len = q->tx_len;
 707
 708         q->tx_class = NULL;
 709
 710         for ( ; cl; cl = cl->share) {
 711                 long avgidle = cl->avgidle;
 712                 long idle;
 713
 714                 cl->bstats.packets++;
 715                 cl->bstats.bytes += len;
 716
 717                 /*
 718                    (now - last) is total time between packet right edges.
 719                    (last_pktlen/rate) is "virtual" busy time, so that
 720
 721                          idle = (now - last) - last_pktlen/rate
 722                  */
 723
 724                 idle = q->now - cl->last;
 725                 if ((unsigned long)idle > 128*1024*1024) {
 726                         avgidle = cl->maxidle;
 727                 } else {
 728                         idle -= L2T(cl, len);
 729
 730                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 731                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
 732                    cl->avgidle == true_avgidle/W,
 733                    hence:
 734                  */
 735                         avgidle += idle - (avgidle>>cl->ewma_log);
 736                 }
 737
 738                 if (avgidle <= 0) {
 739                         /* Overlimit or at-limit */
 740
 741                         if (avgidle < cl->minidle)
 742                                 avgidle = cl->minidle;
 743
 744                         cl->avgidle = avgidle;
 745
 746                         /* Calculate expected time, when this class
 747                            will be allowed to send.
 748                            It will occur, when:
 749                            (1-W)*true_avgidle + W*delay = 0, i.e.
 750                            idle = (1/W - 1)*(-true_avgidle)
 751                            or
 752                            idle = (1 - W)*(-cl->avgidle);
 753                          */
 754                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 755
 756                         /*
 757                            That is not all.
 758                            To maintain the rate allocated to the class,
 759                            we add to undertime virtual clock,
 760                            necessary to complete transmitted packet.
 761                            (len/phys_bandwidth has been already passed
 762                            to the moment of cbq_update)
 763                          */
 764
 765                         idle -= L2T(&q->link, len);
 766                         idle += L2T(cl, len);
 767
 768                         cl->undertime = q->now + idle;
 769                 } else {
 770                         /* Underlimit */
 771
 772                         cl->undertime = PSCHED_PASTPERFECT;
 773                         if (avgidle > cl->maxidle)
 774                                 cl->avgidle = cl->maxidle;
 775                         else
 776                                 cl->avgidle = avgidle;
 777                 }
 778                 cl->last = q->now;
 779         }
 780
 781         cbq_update_toplevel(q, this, q->tx_borrowed);
 782 }
 783
 784 static __inline__ struct cbq_class *
 785 cbq_under_limit(struct cbq_class *cl)
 786 {
 787         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 788         struct cbq_class *this_cl = cl;
 789
 790         if (cl->tparent == NULL)
 791                 return cl;
 792
 793         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 794                 cl->delayed = 0;
 795                 return cl;
 796         }
 797
 798         do {
 799                 /* It is very suspicious place. Now overlimit
 800                    action is generated for not bounded classes
 801                    only if link is completely congested.
 802                    Though it is in agree with ancestor-only paradigm,
 803                    it looks very stupid. Particularly,
 804                    it means that this chunk of code will either
 805                    never be called or result in strong amplification
 806                    of burstiness. Dangerous, silly, and, however,
 807                    no another solution exists.
 808                  */
 809                 if ((cl = cl->borrow) == NULL) {
 810                         this_cl->qstats.overlimits++;
 811                         this_cl->overlimit(this_cl);
 812                         return NULL;
 813                 }
 814                 if (cl->level > q->toplevel)
 815                         return NULL;
 816         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 817
 818         cl->delayed = 0;
 819         return cl;
 820 }
 821
 822 static __inline__ struct sk_buff *
 823 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 824 {
 825         struct cbq_sched_data *q = qdisc_priv(sch);
 826         struct cbq_class *cl_tail, *cl_prev, *cl;
 827         struct sk_buff *skb;
 828         int deficit;
 829
 830         cl_tail = cl_prev = q->active[prio];
 831         cl = cl_prev->next_alive;
 832
 833         do {
 834                 deficit = 0;
 835
 836                 /* Start round */
 837                 do {
 838                         struct cbq_class *borrow = cl;
 839
 840                         if (cl->q->q.qlen &&
 841                             (borrow = cbq_under_limit(cl)) == NULL)
 842                                 goto skip_class;
 843
 844                         if (cl->deficit <= 0) {
 845                                 /* Class exhausted its allotment per
 846                                    this round. Switch to the next one.
 847                                  */
 848                                 deficit = 1;
 849                                 cl->deficit += cl->quantum;
 850                                 goto next_class;
 851                         }
 852
 853                         skb = cl->q->dequeue(cl->q);
 854
 855                         /* Class did not give us any skb :-(
 856                            It could occur even if cl->q->q.qlen != 0
 857                            f.e. if cl->q == "tbf"
 858                          */
 859                         if (skb == NULL)
 860                                 goto skip_class;
 861
 862                         cl->deficit -= qdisc_pkt_len(skb);
 863                         q->tx_class = cl;
 864                         q->tx_borrowed = borrow;
 865                         if (borrow != cl) {
 866 #ifndef CBQ_XSTATS_BORROWS_BYTES
 867                                 borrow->xstats.borrows++;
 868                                 cl->xstats.borrows++;
 869 #else
 870                                 borrow->xstats.borrows += qdisc_pkt_len(skb);
 871                                 cl->xstats.borrows += qdisc_pkt_len(skb);
 872 #endif
 873                         }
 874                         q->tx_len = qdisc_pkt_len(skb);
 875
 876                         if (cl->deficit <= 0) {
 877                                 q->active[prio] = cl;
 878                                 cl = cl->next_alive;
 879                                 cl->deficit += cl->quantum;
 880                         }
 881                         return skb;
 882
 883 skip_class:
 884                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 885                                 /* Class is empty or penalized.
 886                                    Unlink it from active chain.
 887                                  */
 888                                 cl_prev->next_alive = cl->next_alive;
 889                                 cl->next_alive = NULL;
 890
 891                                 /* Did cl_tail point to it? */
 892                                 if (cl == cl_tail) {
 893                                         /* Repair it! */
 894                                         cl_tail = cl_prev;
 895
 896                                         /* Was it the last class in this band? */
 897                                         if (cl == cl_tail) {
 898                                                 /* Kill the band! */
 899                                                 q->active[prio] = NULL;
 900                                                 q->activemask &= ~(1<<prio);
 901                                                 if (cl->q->q.qlen)
 902                                                         cbq_activate_class(cl);
 903                                                 return NULL;
 904                                         }
 905
 906                                         q->active[prio] = cl_tail;
 907                                 }
 908                                 if (cl->q->q.qlen)
 909                                         cbq_activate_class(cl);
 910
 911                                 cl = cl_prev;
 912                         }
 913
 914 next_class:
 915                         cl_prev = cl;
 916                         cl = cl->next_alive;
 917                 } while (cl_prev != cl_tail);
 918         } while (deficit);
 919
 920         q->active[prio] = cl_prev;
 921
 922         return NULL;
 923 }
 924
 925 static __inline__ struct sk_buff *
 926 cbq_dequeue_1(struct Qdisc *sch)
 927 {
 928         struct cbq_sched_data *q = qdisc_priv(sch);
 929         struct sk_buff *skb;
 930         unsigned activemask;
 931
 932         activemask = q->activemask&0xFF;
 933         while (activemask) {
 934                 int prio = ffz(~activemask);
 935                 activemask &= ~(1<<prio);
 936                 skb = cbq_dequeue_prio(sch, prio);
 937                 if (skb)
 938                         return skb;
 939         }
 940         return NULL;
 941 }
 942
 943 static struct sk_buff *
 944 cbq_dequeue(struct Qdisc *sch)
 945 {
 946         struct sk_buff *skb;
 947         struct cbq_sched_data *q = qdisc_priv(sch);
 948         psched_time_t now;
 949         psched_tdiff_t incr;
 950
 951         now = psched_get_time();
 952         incr = now - q->now_rt;
 953
 954         if (q->tx_class) {
 955                 psched_tdiff_t incr2;
 956                 /* Time integrator. We calculate EOS time
 957                    by adding expected packet transmission time.
 958                    If real time is greater, we warp artificial clock,
 959                    so that:
 960
 961                    cbq_time = max(real_time, work);
 962                  */
 963                 incr2 = L2T(&q->link, q->tx_len);
 964                 q->now += incr2;
 965                 cbq_update(q);
 966                 if ((incr -= incr2) < 0)
 967                         incr = 0;
 968         }
 969         q->now += incr;
 970         q->now_rt = now;
 971
 972         for (;;) {
 973                 q->wd_expires = 0;
 974
 975                 skb = cbq_dequeue_1(sch);
 976                 if (skb) {
 977                         sch->q.qlen--;
 978                         sch->flags &= ~TCQ_F_THROTTLED;
 979                         return skb;
 980                 }
 981
 982                 /* All the classes are overlimit.
 983
 984                    It is possible, if:
 985
 986                    1. Scheduler is empty.
 987                    2. Toplevel cutoff inhibited borrowing.
 988                    3. Root class is overlimit.
 989
 990                    Reset 2d and 3d conditions and retry.
 991
 992                    Note, that NS and cbq-2.0 are buggy, peeking
 993                    an arbitrary class is appropriate for ancestor-only
 994                    sharing, but not for toplevel algorithm.
 995
 996                    Our version is better, but slower, because it requires
 997                    two passes, but it is unavoidable with top-level sharing.
 998                 */
 999
1000                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1001                     q->link.undertime == PSCHED_PASTPERFECT)
1002                         break;
1003
1004                 q->toplevel = TC_CBQ_MAXLEVEL;
1005                 q->link.undertime = PSCHED_PASTPERFECT;
1006         }
1007
1008         /* No packets in scheduler or nobody wants to give them to us :-(
1009            Sigh... start watchdog timer in the last case. */
1010
1011         if (sch->q.qlen) {
1012                 sch->qstats.overlimits++;
1013                 if (q->wd_expires)
1014                         qdisc_watchdog_schedule(&q->watchdog,
1015                                                 now + q->wd_expires);
1016         }
1017         return NULL;
1018 }
1019
1020 /* CBQ class maintanance routines */
1021
1022 static void cbq_adjust_levels(struct cbq_class *this)
1023 {
1024         if (this == NULL)
1025                 return;
1026
1027         do {
1028                 int level = 0;
1029                 struct cbq_class *cl;
1030
1031                 if ((cl = this->children) != NULL) {
1032                         do {
1033                                 if (cl->level > level)
1034                                         level = cl->level;
1035                         } while ((cl = cl->sibling) != this->children);
1036                 }
1037                 this->level = level+1;
1038         } while ((this = this->tparent) != NULL);
1039 }
1040
1041 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1042 {
1043         struct cbq_class *cl;
1044         struct hlist_node *n;
1045         unsigned int h;
1046
1047         if (q->quanta[prio] == 0)
1048                 return;
1049
1050         for (h = 0; h < q->clhash.hashsize; h++) {
1051                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1052                         /* BUGGGG... Beware! This expression suffer of
1053                            arithmetic overflows!
1054                          */
1055                         if (cl->priority == prio) {
1056                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1057                                         q->quanta[prio];
1058                         }
1059                         if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1060                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
1061                                 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1062                         }
1063                 }
1064         }
1065 }
1066
1067 static void cbq_sync_defmap(struct cbq_class *cl)
1068 {
1069         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1070         struct cbq_class *split = cl->split;
1071         unsigned h;
1072         int i;
1073
1074         if (split == NULL)
1075                 return;
1076
1077         for (i=0; i<=TC_PRIO_MAX; i++) {
1078                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1079                         split->defaults[i] = NULL;
1080         }
1081
1082         for (i=0; i<=TC_PRIO_MAX; i++) {
1083                 int level = split->level;
1084
1085                 if (split->defaults[i])
1086                         continue;
1087
1088                 for (h = 0; h < q->clhash.hashsize; h++) {
1089                         struct hlist_node *n;
1090                         struct cbq_class *c;
1091
1092                         hlist_for_each_entry(c, n, &q->clhash.hash[h],
1093                                              common.hnode) {
1094                                 if (c->split == split && c->level < level &&
1095                                     c->defmap&(1<<i)) {
1096                                         split->defaults[i] = c;
1097                                         level = c->level;
1098                                 }
1099                         }
1100                 }
1101         }
1102 }
1103
1104 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1105 {
1106         struct cbq_class *split = NULL;
1107
1108         if (splitid == 0) {
1109                 if ((split = cl->split) == NULL)
1110                         return;
1111                 splitid = split->common.classid;
1112         }
1113
1114         if (split == NULL || split->common.classid != splitid) {
1115                 for (split = cl->tparent; split; split = split->tparent)
1116                         if (split->common.classid == splitid)
1117                                 break;
1118         }
1119
1120         if (split == NULL)
1121                 return;
1122
1123         if (cl->split != split) {
1124                 cl->defmap = 0;
1125                 cbq_sync_defmap(cl);
1126                 cl->split = split;
1127                 cl->defmap = def&mask;
1128         } else
1129                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1130
1131         cbq_sync_defmap(cl);
1132 }
1133
1134 static void cbq_unlink_class(struct cbq_class *this)
1135 {
1136         struct cbq_class *cl, **clp;
1137         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1138
1139         qdisc_class_hash_remove(&q->clhash, &this->common);
1140
1141         if (this->tparent) {
1142                 clp=&this->sibling;
1143                 cl = *clp;
1144                 do {
1145                         if (cl == this) {
1146                                 *clp = cl->sibling;
1147                                 break;
1148                         }
1149                         clp = &cl->sibling;
1150                 } while ((cl = *clp) != this->sibling);
1151
1152                 if (this->tparent->children == this) {
1153                         this->tparent->children = this->sibling;
1154                         if (this->sibling == this)
1155                                 this->tparent->children = NULL;
1156                 }
1157         } else {
1158                 WARN_ON(this->sibling != this);
1159         }
1160 }
1161
1162 static void cbq_link_class(struct cbq_class *this)
1163 {
1164         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1165         struct cbq_class *parent = this->tparent;
1166
1167         this->sibling = this;
1168         qdisc_class_hash_insert(&q->clhash, &this->common);
1169
1170         if (parent == NULL)
1171                 return;
1172
1173         if (parent->children == NULL) {
1174                 parent->children = this;
1175         } else {
1176                 this->sibling = parent->children->sibling;
1177                 parent->children->sibling = this;
1178         }
1179 }
1180
1181 static unsigned int cbq_drop(struct Qdisc* sch)
1182 {
1183         struct cbq_sched_data *q = qdisc_priv(sch);
1184         struct cbq_class *cl, *cl_head;
1185         int prio;
1186         unsigned int len;
1187
1188         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1189                 if ((cl_head = q->active[prio]) == NULL)
1190                         continue;
1191
1192                 cl = cl_head;
1193                 do {
1194                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1195                                 sch->q.qlen--;
1196                                 if (!cl->q->q.qlen)
1197                                         cbq_deactivate_class(cl);
1198                                 return len;
1199                         }
1200                 } while ((cl = cl->next_alive) != cl_head);
1201         }
1202         return 0;
1203 }
1204
1205 static void
1206 cbq_reset(struct Qdisc* sch)
1207 {
1208         struct cbq_sched_data *q = qdisc_priv(sch);
1209         struct cbq_class *cl;
1210         struct hlist_node *n;
1211         int prio;
1212         unsigned h;
1213
1214         q->activemask = 0;
1215         q->pmask = 0;
1216         q->tx_class = NULL;
1217         q->tx_borrowed = NULL;
1218         qdisc_watchdog_cancel(&q->watchdog);
1219         tasklet_hrtimer_cancel(&q->delay_timer);
1220         q->toplevel = TC_CBQ_MAXLEVEL;
1221         q->now = psched_get_time();
1222         q->now_rt = q->now;
1223
1224         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1225                 q->active[prio] = NULL;
1226
1227         for (h = 0; h < q->clhash.hashsize; h++) {
1228                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1229                         qdisc_reset(cl->q);
1230
1231                         cl->next_alive = NULL;
1232                         cl->undertime = PSCHED_PASTPERFECT;
1233                         cl->avgidle = cl->maxidle;
1234                         cl->deficit = cl->quantum;
1235                         cl->cpriority = cl->priority;
1236                 }
1237         }
1238         sch->q.qlen = 0;
1239 }
1240
1241
1242 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1243 {
1244         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1245                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1246                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1247         }
1248         if (lss->change&TCF_CBQ_LSS_EWMA)
1249                 cl->ewma_log = lss->ewma_log;
1250         if (lss->change&TCF_CBQ_LSS_AVPKT)
1251                 cl->avpkt = lss->avpkt;
1252         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1253                 cl->minidle = -(long)lss->minidle;
1254         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1255                 cl->maxidle = lss->maxidle;
1256                 cl->avgidle = lss->maxidle;
1257         }
1258         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1259                 cl->offtime = lss->offtime;
1260         return 0;
1261 }
1262
1263 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1264 {
1265         q->nclasses[cl->priority]--;
1266         q->quanta[cl->priority] -= cl->weight;
1267         cbq_normalize_quanta(q, cl->priority);
1268 }
1269
1270 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1271 {
1272         q->nclasses[cl->priority]++;
1273         q->quanta[cl->priority] += cl->weight;
1274         cbq_normalize_quanta(q, cl->priority);
1275 }
1276
1277 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1278 {
1279         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1280
1281         if (wrr->allot)
1282                 cl->allot = wrr->allot;
1283         if (wrr->weight)
1284                 cl->weight = wrr->weight;
1285         if (wrr->priority) {
1286                 cl->priority = wrr->priority-1;
1287                 cl->cpriority = cl->priority;
1288                 if (cl->priority >= cl->priority2)
1289                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1290         }
1291
1292         cbq_addprio(q, cl);
1293         return 0;
1294 }
1295
1296 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1297 {
1298         switch (ovl->strategy) {
1299         case TC_CBQ_OVL_CLASSIC:
1300                 cl->overlimit = cbq_ovl_classic;
1301                 break;
1302         case TC_CBQ_OVL_DELAY:
1303                 cl->overlimit = cbq_ovl_delay;
1304                 break;
1305         case TC_CBQ_OVL_LOWPRIO:
1306                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1307                     ovl->priority2-1 <= cl->priority)
1308                         return -EINVAL;
1309                 cl->priority2 = ovl->priority2-1;
1310                 cl->overlimit = cbq_ovl_lowprio;
1311                 break;
1312         case TC_CBQ_OVL_DROP:
1313                 cl->overlimit = cbq_ovl_drop;
1314                 break;
1315         case TC_CBQ_OVL_RCLASSIC:
1316                 cl->overlimit = cbq_ovl_rclassic;
1317                 break;
1318         default:
1319                 return -EINVAL;
1320         }
1321         cl->penalty = ovl->penalty;
1322         return 0;
1323 }
1324
1325 #ifdef CONFIG_NET_CLS_ACT
1326 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1327 {
1328         cl->police = p->police;
1329
1330         if (cl->q->handle) {
1331                 if (p->police == TC_POLICE_RECLASSIFY)
1332                         cl->q->reshape_fail = cbq_reshape_fail;
1333                 else
1334                         cl->q->reshape_fail = NULL;
1335         }
1336         return 0;
1337 }
1338 #endif
1339
1340 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1341 {
1342         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1343         return 0;
1344 }
1345
1346 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1347         [TCA_CBQ_LSSOPT]        = { .len = sizeof(struct tc_cbq_lssopt) },
1348         [TCA_CBQ_WRROPT]        = { .len = sizeof(struct tc_cbq_wrropt) },
1349         [TCA_CBQ_FOPT]          = { .len = sizeof(struct tc_cbq_fopt) },
1350         [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1351         [TCA_CBQ_RATE]          = { .len = sizeof(struct tc_ratespec) },
1352         [TCA_CBQ_RTAB]          = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1353         [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
1354 };
1355
1356 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1357 {
1358         struct cbq_sched_data *q = qdisc_priv(sch);
1359         struct nlattr *tb[TCA_CBQ_MAX + 1];
1360         struct tc_ratespec *r;
1361         int err;
1362
1363         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1364         if (err < 0)
1365                 return err;
1366
1367         if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1368                 return -EINVAL;
1369
1370         r = nla_data(tb[TCA_CBQ_RATE]);
1371
1372         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1373                 return -EINVAL;
1374
1375         err = qdisc_class_hash_init(&q->clhash);
1376         if (err < 0)
1377                 goto put_rtab;
1378
1379         q->link.refcnt = 1;
1380         q->link.sibling = &q->link;
1381         q->link.common.classid = sch->handle;
1382         q->link.qdisc = sch;
1383         if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1384                                             &pfifo_qdisc_ops,
1385                                             sch->handle)))
1386                 q->link.q = &noop_qdisc;
1387
1388         q->link.priority = TC_CBQ_MAXPRIO-1;
1389         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1390         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1391         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1392         q->link.overlimit = cbq_ovl_classic;
1393         q->link.allot = psched_mtu(qdisc_dev(sch));
1394         q->link.quantum = q->link.allot;
1395         q->link.weight = q->link.R_tab->rate.rate;
1396
1397         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1398         q->link.avpkt = q->link.allot/2;
1399         q->link.minidle = -0x7FFFFFFF;
1400
1401         qdisc_watchdog_init(&q->watchdog, sch);
1402         tasklet_hrtimer_init(&q->delay_timer, cbq_undelay,
1403                              CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1404         q->delay_timer.function = cbq_undelay;
1405         q->toplevel = TC_CBQ_MAXLEVEL;
1406         q->now = psched_get_time();
1407         q->now_rt = q->now;
1408
1409         cbq_link_class(&q->link);
1410
1411         if (tb[TCA_CBQ_LSSOPT])
1412                 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1413
1414         cbq_addprio(q, &q->link);
1415         return 0;
1416
1417 put_rtab:
1418         qdisc_put_rtab(q->link.R_tab);
1419         return err;
1420 }
1421
1422 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1423 {
1424         unsigned char *b = skb_tail_pointer(skb);
1425
1426         NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1427         return skb->len;
1428
1429 nla_put_failure:
1430         nlmsg_trim(skb, b);
1431         return -1;
1432 }
1433
1434 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1435 {
1436         unsigned char *b = skb_tail_pointer(skb);
1437         struct tc_cbq_lssopt opt;
1438
1439         opt.flags = 0;
1440         if (cl->borrow == NULL)
1441                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1442         if (cl->share == NULL)
1443                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1444         opt.ewma_log = cl->ewma_log;
1445         opt.level = cl->level;
1446         opt.avpkt = cl->avpkt;
1447         opt.maxidle = cl->maxidle;
1448         opt.minidle = (u32)(-cl->minidle);
1449         opt.offtime = cl->offtime;
1450         opt.change = ~0;
1451         NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1452         return skb->len;
1453
1454 nla_put_failure:
1455         nlmsg_trim(skb, b);
1456         return -1;
1457 }
1458
1459 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1460 {
1461         unsigned char *b = skb_tail_pointer(skb);
1462         struct tc_cbq_wrropt opt;
1463
1464         opt.flags = 0;
1465         opt.allot = cl->allot;
1466         opt.priority = cl->priority+1;
1467         opt.cpriority = cl->cpriority+1;
1468         opt.weight = cl->weight;
1469         NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1470         return skb->len;
1471
1472 nla_put_failure:
1473         nlmsg_trim(skb, b);
1474         return -1;
1475 }
1476
1477 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1478 {
1479         unsigned char *b = skb_tail_pointer(skb);
1480         struct tc_cbq_ovl opt;
1481
1482         opt.strategy = cl->ovl_strategy;
1483         opt.priority2 = cl->priority2+1;
1484         opt.pad = 0;
1485         opt.penalty = cl->penalty;
1486         NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1487         return skb->len;
1488
1489 nla_put_failure:
1490         nlmsg_trim(skb, b);
1491         return -1;
1492 }
1493
1494 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1495 {
1496         unsigned char *b = skb_tail_pointer(skb);
1497         struct tc_cbq_fopt opt;
1498
1499         if (cl->split || cl->defmap) {
1500                 opt.split = cl->split ? cl->split->common.classid : 0;
1501                 opt.defmap = cl->defmap;
1502                 opt.defchange = ~0;
1503                 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1504         }
1505         return skb->len;
1506
1507 nla_put_failure:
1508         nlmsg_trim(skb, b);
1509         return -1;
1510 }
1511
1512 #ifdef CONFIG_NET_CLS_ACT
1513 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1514 {
1515         unsigned char *b = skb_tail_pointer(skb);
1516         struct tc_cbq_police opt;
1517
1518         if (cl->police) {
1519                 opt.police = cl->police;
1520                 opt.__res1 = 0;
1521                 opt.__res2 = 0;
1522                 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1523         }
1524         return skb->len;
1525
1526 nla_put_failure:
1527         nlmsg_trim(skb, b);
1528         return -1;
1529 }
1530 #endif
1531
1532 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1533 {
1534         if (cbq_dump_lss(skb, cl) < 0 ||
1535             cbq_dump_rate(skb, cl) < 0 ||
1536             cbq_dump_wrr(skb, cl) < 0 ||
1537             cbq_dump_ovl(skb, cl) < 0 ||
1538 #ifdef CONFIG_NET_CLS_ACT
1539             cbq_dump_police(skb, cl) < 0 ||
1540 #endif
1541             cbq_dump_fopt(skb, cl) < 0)
1542                 return -1;
1543         return 0;
1544 }
1545
1546 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1547 {
1548         struct cbq_sched_data *q = qdisc_priv(sch);
1549         struct nlattr *nest;
1550
1551         nest = nla_nest_start(skb, TCA_OPTIONS);
1552         if (nest == NULL)
1553                 goto nla_put_failure;
1554         if (cbq_dump_attr(skb, &q->link) < 0)
1555                 goto nla_put_failure;
1556         nla_nest_end(skb, nest);
1557         return skb->len;
1558
1559 nla_put_failure:
1560         nla_nest_cancel(skb, nest);
1561         return -1;
1562 }
1563
1564 static int
1565 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1566 {
1567         struct cbq_sched_data *q = qdisc_priv(sch);
1568
1569         q->link.xstats.avgidle = q->link.avgidle;
1570         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1571 }
1572
1573 static int
1574 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1575                struct sk_buff *skb, struct tcmsg *tcm)
1576 {
1577         struct cbq_class *cl = (struct cbq_class*)arg;
1578         struct nlattr *nest;
1579
1580         if (cl->tparent)
1581                 tcm->tcm_parent = cl->tparent->common.classid;
1582         else
1583                 tcm->tcm_parent = TC_H_ROOT;
1584         tcm->tcm_handle = cl->common.classid;
1585         tcm->tcm_info = cl->q->handle;
1586
1587         nest = nla_nest_start(skb, TCA_OPTIONS);
1588         if (nest == NULL)
1589                 goto nla_put_failure;
1590         if (cbq_dump_attr(skb, cl) < 0)
1591                 goto nla_put_failure;
1592         nla_nest_end(skb, nest);
1593         return skb->len;
1594
1595 nla_put_failure:
1596         nla_nest_cancel(skb, nest);
1597         return -1;
1598 }
1599
1600 static int
1601 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1602         struct gnet_dump *d)
1603 {
1604         struct cbq_sched_data *q = qdisc_priv(sch);
1605         struct cbq_class *cl = (struct cbq_class*)arg;
1606
1607         cl->qstats.qlen = cl->q->q.qlen;
1608         cl->xstats.avgidle = cl->avgidle;
1609         cl->xstats.undertime = 0;
1610
1611         if (cl->undertime != PSCHED_PASTPERFECT)
1612                 cl->xstats.undertime = cl->undertime - q->now;
1613
1614         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1615             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1616             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1617                 return -1;
1618
1619         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1620 }
1621
1622 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1623                      struct Qdisc **old)
1624 {
1625         struct cbq_class *cl = (struct cbq_class*)arg;
1626
1627         if (cl) {
1628                 if (new == NULL) {
1629                         new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1630                                                 &pfifo_qdisc_ops,
1631                                                 cl->common.classid);
1632                         if (new == NULL)
1633                                 return -ENOBUFS;
1634                 } else {
1635 #ifdef CONFIG_NET_CLS_ACT
1636                         if (cl->police == TC_POLICE_RECLASSIFY)
1637                                 new->reshape_fail = cbq_reshape_fail;
1638 #endif
1639                 }
1640                 sch_tree_lock(sch);
1641                 *old = cl->q;
1642                 cl->q = new;
1643                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1644                 qdisc_reset(*old);
1645                 sch_tree_unlock(sch);
1646
1647                 return 0;
1648         }
1649         return -ENOENT;
1650 }
1651
1652 static struct Qdisc *
1653 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1654 {
1655         struct cbq_class *cl = (struct cbq_class*)arg;
1656
1657         return cl ? cl->q : NULL;
1658 }
1659
1660 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1661 {
1662         struct cbq_class *cl = (struct cbq_class *)arg;
1663
1664         if (cl->q->q.qlen == 0)
1665                 cbq_deactivate_class(cl);
1666 }
1667
1668 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1669 {
1670         struct cbq_sched_data *q = qdisc_priv(sch);
1671         struct cbq_class *cl = cbq_class_lookup(q, classid);
1672
1673         if (cl) {
1674                 cl->refcnt++;
1675                 return (unsigned long)cl;
1676         }
1677         return 0;
1678 }
1679
1680 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1681 {
1682         struct cbq_sched_data *q = qdisc_priv(sch);
1683
1684         WARN_ON(cl->filters);
1685
1686         tcf_destroy_chain(&cl->filter_list);
1687         qdisc_destroy(cl->q);
1688         qdisc_put_rtab(cl->R_tab);
1689         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1690         if (cl != &q->link)
1691                 kfree(cl);
1692 }
1693
1694 static void
1695 cbq_destroy(struct Qdisc* sch)
1696 {
1697         struct cbq_sched_data *q = qdisc_priv(sch);
1698         struct hlist_node *n, *next;
1699         struct cbq_class *cl;
1700         unsigned h;
1701
1702 #ifdef CONFIG_NET_CLS_ACT
1703         q->rx_class = NULL;
1704 #endif
1705         /*
1706          * Filters must be destroyed first because we don't destroy the
1707          * classes from root to leafs which means that filters can still
1708          * be bound to classes which have been destroyed already. --TGR '04
1709          */
1710         for (h = 0; h < q->clhash.hashsize; h++) {
1711                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1712                         tcf_destroy_chain(&cl->filter_list);
1713         }
1714         for (h = 0; h < q->clhash.hashsize; h++) {
1715                 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1716                                           common.hnode)
1717                         cbq_destroy_class(sch, cl);
1718         }
1719         qdisc_class_hash_destroy(&q->clhash);
1720 }
1721
1722 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1723 {
1724         struct cbq_class *cl = (struct cbq_class*)arg;
1725
1726         if (--cl->refcnt == 0) {
1727 #ifdef CONFIG_NET_CLS_ACT
1728                 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1729                 struct cbq_sched_data *q = qdisc_priv(sch);
1730
1731                 spin_lock_bh(root_lock);
1732                 if (q->rx_class == cl)
1733                         q->rx_class = NULL;
1734                 spin_unlock_bh(root_lock);
1735 #endif
1736
1737                 cbq_destroy_class(sch, cl);
1738         }
1739 }
1740
1741 static int
1742 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1743                  unsigned long *arg)
1744 {
1745         int err;
1746         struct cbq_sched_data *q = qdisc_priv(sch);
1747         struct cbq_class *cl = (struct cbq_class*)*arg;
1748         struct nlattr *opt = tca[TCA_OPTIONS];
1749         struct nlattr *tb[TCA_CBQ_MAX + 1];
1750         struct cbq_class *parent;
1751         struct qdisc_rate_table *rtab = NULL;
1752
1753         if (opt == NULL)
1754                 return -EINVAL;
1755
1756         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1757         if (err < 0)
1758                 return err;
1759
1760         if (cl) {
1761                 /* Check parent */
1762                 if (parentid) {
1763                         if (cl->tparent &&
1764                             cl->tparent->common.classid != parentid)
1765                                 return -EINVAL;
1766                         if (!cl->tparent && parentid != TC_H_ROOT)
1767                                 return -EINVAL;
1768                 }
1769
1770                 if (tb[TCA_CBQ_RATE]) {
1771                         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1772                                               tb[TCA_CBQ_RTAB]);
1773                         if (rtab == NULL)
1774                                 return -EINVAL;
1775                 }
1776
1777                 if (tca[TCA_RATE]) {
1778                         err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1779                                                     qdisc_root_sleeping_lock(sch),
1780                                                     tca[TCA_RATE]);
1781                         if (err) {
1782                                 if (rtab)
1783                                         qdisc_put_rtab(rtab);
1784                                 return err;
1785                         }
1786                 }
1787
1788                 /* Change class parameters */
1789                 sch_tree_lock(sch);
1790
1791                 if (cl->next_alive != NULL)
1792                         cbq_deactivate_class(cl);
1793
1794                 if (rtab) {
1795                         qdisc_put_rtab(cl->R_tab);
1796                         cl->R_tab = rtab;
1797                 }
1798
1799                 if (tb[TCA_CBQ_LSSOPT])
1800                         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1801
1802                 if (tb[TCA_CBQ_WRROPT]) {
1803                         cbq_rmprio(q, cl);
1804                         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1805                 }
1806
1807                 if (tb[TCA_CBQ_OVL_STRATEGY])
1808                         cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1809
1810 #ifdef CONFIG_NET_CLS_ACT
1811                 if (tb[TCA_CBQ_POLICE])
1812                         cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1813 #endif
1814
1815                 if (tb[TCA_CBQ_FOPT])
1816                         cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1817
1818                 if (cl->q->q.qlen)
1819                         cbq_activate_class(cl);
1820
1821                 sch_tree_unlock(sch);
1822
1823                 return 0;
1824         }
1825
1826         if (parentid == TC_H_ROOT)
1827                 return -EINVAL;
1828
1829         if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1830             tb[TCA_CBQ_LSSOPT] == NULL)
1831                 return -EINVAL;
1832
1833         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1834         if (rtab == NULL)
1835                 return -EINVAL;
1836
1837         if (classid) {
1838                 err = -EINVAL;
1839                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1840                         goto failure;
1841         } else {
1842                 int i;
1843                 classid = TC_H_MAKE(sch->handle,0x8000);
1844
1845                 for (i=0; i<0x8000; i++) {
1846                         if (++q->hgenerator >= 0x8000)
1847                                 q->hgenerator = 1;
1848                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1849                                 break;
1850                 }
1851                 err = -ENOSR;
1852                 if (i >= 0x8000)
1853                         goto failure;
1854                 classid = classid|q->hgenerator;
1855         }
1856
1857         parent = &q->link;
1858         if (parentid) {
1859                 parent = cbq_class_lookup(q, parentid);
1860                 err = -EINVAL;
1861                 if (parent == NULL)
1862                         goto failure;
1863         }
1864
1865         err = -ENOBUFS;
1866         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1867         if (cl == NULL)
1868                 goto failure;
1869
1870         if (tca[TCA_RATE]) {
1871                 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1872                                         qdisc_root_sleeping_lock(sch),
1873                                         tca[TCA_RATE]);
1874                 if (err) {
1875                         kfree(cl);
1876                         goto failure;
1877                 }
1878         }
1879
1880         cl->R_tab = rtab;
1881         rtab = NULL;
1882         cl->refcnt = 1;
1883         if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1884                                         &pfifo_qdisc_ops, classid)))
1885                 cl->q = &noop_qdisc;
1886         cl->common.classid = classid;
1887         cl->tparent = parent;
1888         cl->qdisc = sch;
1889         cl->allot = parent->allot;
1890         cl->quantum = cl->allot;
1891         cl->weight = cl->R_tab->rate.rate;
1892
1893         sch_tree_lock(sch);
1894         cbq_link_class(cl);
1895         cl->borrow = cl->tparent;
1896         if (cl->tparent != &q->link)
1897                 cl->share = cl->tparent;
1898         cbq_adjust_levels(parent);
1899         cl->minidle = -0x7FFFFFFF;
1900         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1901         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1902         if (cl->ewma_log==0)
1903                 cl->ewma_log = q->link.ewma_log;
1904         if (cl->maxidle==0)
1905                 cl->maxidle = q->link.maxidle;
1906         if (cl->avpkt==0)
1907                 cl->avpkt = q->link.avpkt;
1908         cl->overlimit = cbq_ovl_classic;
1909         if (tb[TCA_CBQ_OVL_STRATEGY])
1910                 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1911 #ifdef CONFIG_NET_CLS_ACT
1912         if (tb[TCA_CBQ_POLICE])
1913                 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1914 #endif
1915         if (tb[TCA_CBQ_FOPT])
1916                 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1917         sch_tree_unlock(sch);
1918
1919         qdisc_class_hash_grow(sch, &q->clhash);
1920
1921         *arg = (unsigned long)cl;
1922         return 0;
1923
1924 failure:
1925         qdisc_put_rtab(rtab);
1926         return err;
1927 }
1928
1929 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1930 {
1931         struct cbq_sched_data *q = qdisc_priv(sch);
1932         struct cbq_class *cl = (struct cbq_class*)arg;
1933         unsigned int qlen;
1934
1935         if (cl->filters || cl->children || cl == &q->link)
1936                 return -EBUSY;
1937
1938         sch_tree_lock(sch);
1939
1940         qlen = cl->q->q.qlen;
1941         qdisc_reset(cl->q);
1942         qdisc_tree_decrease_qlen(cl->q, qlen);
1943
1944         if (cl->next_alive)
1945                 cbq_deactivate_class(cl);
1946
1947         if (q->tx_borrowed == cl)
1948                 q->tx_borrowed = q->tx_class;
1949         if (q->tx_class == cl) {
1950                 q->tx_class = NULL;
1951                 q->tx_borrowed = NULL;
1952         }
1953 #ifdef CONFIG_NET_CLS_ACT
1954         if (q->rx_class == cl)
1955                 q->rx_class = NULL;
1956 #endif
1957
1958         cbq_unlink_class(cl);
1959         cbq_adjust_levels(cl->tparent);
1960         cl->defmap = 0;
1961         cbq_sync_defmap(cl);
1962
1963         cbq_rmprio(q, cl);
1964         sch_tree_unlock(sch);
1965
1966         BUG_ON(--cl->refcnt == 0);
1967         /*
1968          * This shouldn't happen: we "hold" one cops->get() when called
1969          * from tc_ctl_tclass; the destroy method is done from cops->put().
1970          */
1971
1972         return 0;
1973 }
1974
1975 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1976 {
1977         struct cbq_sched_data *q = qdisc_priv(sch);
1978         struct cbq_class *cl = (struct cbq_class *)arg;
1979
1980         if (cl == NULL)
1981                 cl = &q->link;
1982
1983         return &cl->filter_list;
1984 }
1985
1986 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1987                                      u32 classid)
1988 {
1989         struct cbq_sched_data *q = qdisc_priv(sch);
1990         struct cbq_class *p = (struct cbq_class*)parent;
1991         struct cbq_class *cl = cbq_class_lookup(q, classid);
1992
1993         if (cl) {
1994                 if (p && p->level <= cl->level)
1995                         return 0;
1996                 cl->filters++;
1997                 return (unsigned long)cl;
1998         }
1999         return 0;
2000 }
2001
2002 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2003 {
2004         struct cbq_class *cl = (struct cbq_class*)arg;
2005
2006         cl->filters--;
2007 }
2008
2009 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2010 {
2011         struct cbq_sched_data *q = qdisc_priv(sch);
2012         struct cbq_class *cl;
2013         struct hlist_node *n;
2014         unsigned h;
2015
2016         if (arg->stop)
2017                 return;
2018
2019         for (h = 0; h < q->clhash.hashsize; h++) {
2020                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2021                         if (arg->count < arg->skip) {
2022                                 arg->count++;
2023                                 continue;
2024                         }
2025                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2026                                 arg->stop = 1;
2027                                 return;
2028                         }
2029                         arg->count++;
2030                 }
2031         }
2032 }
2033
2034 static const struct Qdisc_class_ops cbq_class_ops = {
2035         .graft          =       cbq_graft,
2036         .leaf           =       cbq_leaf,
2037         .qlen_notify    =       cbq_qlen_notify,
2038         .get            =       cbq_get,
2039         .put            =       cbq_put,
2040         .change         =       cbq_change_class,
2041         .delete         =       cbq_delete,
2042         .walk           =       cbq_walk,
2043         .tcf_chain      =       cbq_find_tcf,
2044         .bind_tcf       =       cbq_bind_filter,
2045         .unbind_tcf     =       cbq_unbind_filter,
2046         .dump           =       cbq_dump_class,
2047         .dump_stats     =       cbq_dump_class_stats,
2048 };
2049
2050 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2051         .next           =       NULL,
2052         .cl_ops         =       &cbq_class_ops,
2053         .id             =       "cbq",
2054         .priv_size      =       sizeof(struct cbq_sched_data),
2055         .enqueue        =       cbq_enqueue,
2056         .dequeue        =       cbq_dequeue,
2057         .peek           =       qdisc_peek_dequeued,
2058         .drop           =       cbq_drop,
2059         .init           =       cbq_init,
2060         .reset          =       cbq_reset,
2061         .destroy        =       cbq_destroy,
2062         .change         =       NULL,
2063         .dump           =       cbq_dump,
2064         .dump_stats     =       cbq_dump_stats,
2065         .owner          =       THIS_MODULE,
2066 };
2067
2068 static int __init cbq_module_init(void)
2069 {
2070         return register_qdisc(&cbq_qdisc_ops);
2071 }
2072 static void __exit cbq_module_exit(void)
2073 {
2074         unregister_qdisc(&cbq_qdisc_ops);
2075 }
2076 module_init(cbq_module_init)
2077 module_exit(cbq_module_exit)
2078 MODULE_LICENSE("GPL");