net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/module.h>
  14 #include <linux/types.h>
  15 #include <linux/kernel.h>
  16 #include <linux/string.h>
  17 #include <linux/errno.h>
  18 #include <linux/skbuff.h>
  19 #include <net/netlink.h>
  20 #include <net/pkt_sched.h>
  21
  22
  23 /*      Class-Based Queueing (CBQ) algorithm.
  24         =======================================
  25
  26         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  27                  Management Models for Packet Networks",
  28                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  29
  30                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  31
  32                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  33                  Parameters", 1996
  34
  35                  [4] Sally Floyd and Michael Speer, "Experimental Results
  36                  for Class-Based Queueing", 1998, not published.
  37
  38         -----------------------------------------------------------------------
  39
  40         Algorithm skeleton was taken from NS simulator cbq.cc.
  41         If someone wants to check this code against the LBL version,
  42         he should take into account that ONLY the skeleton was borrowed,
  43         the implementation is different. Particularly:
  44
  45         --- The WRR algorithm is different. Our version looks more
  46         reasonable (I hope) and works when quanta are allowed to be
  47         less than MTU, which is always the case when real time classes
  48         have small rates. Note, that the statement of [3] is
  49         incomplete, delay may actually be estimated even if class
  50         per-round allotment is less than MTU. Namely, if per-round
  51         allotment is W*r_i, and r_1+...+r_k = r < 1
  52
  53         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  54
  55         In the worst case we have IntServ estimate with D = W*r+k*MTU
  56         and C = MTU*r. The proof (if correct at all) is trivial.
  57
  58
  59         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  60         interpret some places, which look like wrong translations
  61         from NS. Anyone is advised to find these differences
  62         and explain to me, why I am wrong 8).
  63
  64         --- Linux has no EOI event, so that we cannot estimate true class
  65         idle time. Workaround is to consider the next dequeue event
  66         as sign that previous packet is finished. This is wrong because of
  67         internal device queueing, but on a permanently loaded link it is true.
  68         Moreover, combined with clock integrator, this scheme looks
  69         very close to an ideal solution.  */
  70
  71 struct cbq_sched_data;
  72
  73
  74 struct cbq_class
  75 {
  76         struct Qdisc_class_common common;
  77         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  78
  79 /* Parameters */
  80         unsigned char           priority;       /* class priority */
  81         unsigned char           priority2;      /* priority to be used after overlimit */
  82         unsigned char           ewma_log;       /* time constant for idle time calculation */
  83         unsigned char           ovl_strategy;
  84 #ifdef CONFIG_NET_CLS_ACT
  85         unsigned char           police;
  86 #endif
  87
  88         u32                     defmap;
  89
  90         /* Link-sharing scheduler parameters */
  91         long                    maxidle;        /* Class parameters: see below. */
  92         long                    offtime;
  93         long                    minidle;
  94         u32                     avpkt;
  95         struct qdisc_rate_table *R_tab;
  96
  97         /* Overlimit strategy parameters */
  98         void                    (*overlimit)(struct cbq_class *cl);
  99         psched_tdiff_t          penalty;
 100
 101         /* General scheduler (WRR) parameters */
 102         long                    allot;
 103         long                    quantum;        /* Allotment per WRR round */
 104         long                    weight;         /* Relative allotment: see below */
 105
 106         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 107         struct cbq_class        *split;         /* Ptr to split node */
 108         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 109         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 110         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 111                                                    parent otherwise */
 112         struct cbq_class        *sibling;       /* Sibling chain */
 113         struct cbq_class        *children;      /* Pointer to children chain */
 114
 115         struct Qdisc            *q;             /* Elementary queueing discipline */
 116
 117
 118 /* Variables */
 119         unsigned char           cpriority;      /* Effective priority */
 120         unsigned char           delayed;
 121         unsigned char           level;          /* level of the class in hierarchy:
 122                                                    0 for leaf classes, and maximal
 123                                                    level of children + 1 for nodes.
 124                                                  */
 125
 126         psched_time_t           last;           /* Last end of service */
 127         psched_time_t           undertime;
 128         long                    avgidle;
 129         long                    deficit;        /* Saved deficit for WRR */
 130         psched_time_t           penalized;
 131         struct gnet_stats_basic bstats;
 132         struct gnet_stats_queue qstats;
 133         struct gnet_stats_rate_est rate_est;
 134         struct tc_cbq_xstats    xstats;
 135
 136         struct tcf_proto        *filter_list;
 137
 138         int                     refcnt;
 139         int                     filters;
 140
 141         struct cbq_class        *defaults[TC_PRIO_MAX+1];
 142 };
 143
 144 struct cbq_sched_data
 145 {
 146         struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
 147         int                     nclasses[TC_CBQ_MAXPRIO+1];
 148         unsigned                quanta[TC_CBQ_MAXPRIO+1];
 149
 150         struct cbq_class        link;
 151
 152         unsigned                activemask;
 153         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
 154                                                                    with backlog */
 155
 156 #ifdef CONFIG_NET_CLS_ACT
 157         struct cbq_class        *rx_class;
 158 #endif
 159         struct cbq_class        *tx_class;
 160         struct cbq_class        *tx_borrowed;
 161         int                     tx_len;
 162         psched_time_t           now;            /* Cached timestamp */
 163         psched_time_t           now_rt;         /* Cached real time */
 164         unsigned                pmask;
 165
 166         struct hrtimer          delay_timer;
 167         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 168                                                    started when CBQ has
 169                                                    backlog, but cannot
 170                                                    transmit just now */
 171         psched_tdiff_t          wd_expires;
 172         int                     toplevel;
 173         u32                     hgenerator;
 174 };
 175
 176
 177 #define L2T(cl,len)     qdisc_l2t((cl)->R_tab,len)
 178
 179 static __inline__ struct cbq_class *
 180 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 181 {
 182         struct Qdisc_class_common *clc;
 183
 184         clc = qdisc_class_find(&q->clhash, classid);
 185         if (clc == NULL)
 186                 return NULL;
 187         return container_of(clc, struct cbq_class, common);
 188 }
 189
 190 #ifdef CONFIG_NET_CLS_ACT
 191
 192 static struct cbq_class *
 193 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 194 {
 195         struct cbq_class *cl, *new;
 196
 197         for (cl = this->tparent; cl; cl = cl->tparent)
 198                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
 199                         return new;
 200
 201         return NULL;
 202 }
 203
 204 #endif
 205
 206 /* Classify packet. The procedure is pretty complicated, but
 207    it allows us to combine link sharing and priority scheduling
 208    transparently.
 209
 210    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 211    so that it resolves to split nodes. Then packets are classified
 212    by logical priority, or a more specific classifier may be attached
 213    to the split node.
 214  */
 215
 216 static struct cbq_class *
 217 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 218 {
 219         struct cbq_sched_data *q = qdisc_priv(sch);
 220         struct cbq_class *head = &q->link;
 221         struct cbq_class **defmap;
 222         struct cbq_class *cl = NULL;
 223         u32 prio = skb->priority;
 224         struct tcf_result res;
 225
 226         /*
 227          *  Step 1. If skb->priority points to one of our classes, use it.
 228          */
 229         if (TC_H_MAJ(prio^sch->handle) == 0 &&
 230             (cl = cbq_class_lookup(q, prio)) != NULL)
 231                 return cl;
 232
 233         *qerr = NET_XMIT_BYPASS;
 234         for (;;) {
 235                 int result = 0;
 236                 defmap = head->defaults;
 237
 238                 /*
 239                  * Step 2+n. Apply classifier.
 240                  */
 241                 if (!head->filter_list ||
 242                     (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 243                         goto fallback;
 244
 245                 if ((cl = (void*)res.class) == NULL) {
 246                         if (TC_H_MAJ(res.classid))
 247                                 cl = cbq_class_lookup(q, res.classid);
 248                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
 249                                 cl = defmap[TC_PRIO_BESTEFFORT];
 250
 251                         if (cl == NULL || cl->level >= head->level)
 252                                 goto fallback;
 253                 }
 254
 255 #ifdef CONFIG_NET_CLS_ACT
 256                 switch (result) {
 257                 case TC_ACT_QUEUED:
 258                 case TC_ACT_STOLEN:
 259                         *qerr = NET_XMIT_SUCCESS;
 260                 case TC_ACT_SHOT:
 261                         return NULL;
 262                 case TC_ACT_RECLASSIFY:
 263                         return cbq_reclassify(skb, cl);
 264                 }
 265 #endif
 266                 if (cl->level == 0)
 267                         return cl;
 268
 269                 /*
 270                  * Step 3+n. If classifier selected a link sharing class,
 271                  *         apply agency specific classifier.
 272                  *         Repeat this procdure until we hit a leaf node.
 273                  */
 274                 head = cl;
 275         }
 276
 277 fallback:
 278         cl = head;
 279
 280         /*
 281          * Step 4. No success...
 282          */
 283         if (TC_H_MAJ(prio) == 0 &&
 284             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
 285             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 286                 return head;
 287
 288         return cl;
 289 }
 290
 291 /*
 292    A packet has just been enqueued on the empty class.
 293    cbq_activate_class adds it to the tail of active class list
 294    of its priority band.
 295  */
 296
 297 static __inline__ void cbq_activate_class(struct cbq_class *cl)
 298 {
 299         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 300         int prio = cl->cpriority;
 301         struct cbq_class *cl_tail;
 302
 303         cl_tail = q->active[prio];
 304         q->active[prio] = cl;
 305
 306         if (cl_tail != NULL) {
 307                 cl->next_alive = cl_tail->next_alive;
 308                 cl_tail->next_alive = cl;
 309         } else {
 310                 cl->next_alive = cl;
 311                 q->activemask |= (1<<prio);
 312         }
 313 }
 314
 315 /*
 316    Unlink class from active chain.
 317    Note that this same procedure is done directly in cbq_dequeue*
 318    during round-robin procedure.
 319  */
 320
 321 static void cbq_deactivate_class(struct cbq_class *this)
 322 {
 323         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 324         int prio = this->cpriority;
 325         struct cbq_class *cl;
 326         struct cbq_class *cl_prev = q->active[prio];
 327
 328         do {
 329                 cl = cl_prev->next_alive;
 330                 if (cl == this) {
 331                         cl_prev->next_alive = cl->next_alive;
 332                         cl->next_alive = NULL;
 333
 334                         if (cl == q->active[prio]) {
 335                                 q->active[prio] = cl_prev;
 336                                 if (cl == q->active[prio]) {
 337                                         q->active[prio] = NULL;
 338                                         q->activemask &= ~(1<<prio);
 339                                         return;
 340                                 }
 341                         }
 342                         return;
 343                 }
 344         } while ((cl_prev = cl) != q->active[prio]);
 345 }
 346
 347 static void
 348 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 349 {
 350         int toplevel = q->toplevel;
 351
 352         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
 353                 psched_time_t now;
 354                 psched_tdiff_t incr;
 355
 356                 now = psched_get_time();
 357                 incr = now - q->now_rt;
 358                 now = q->now + incr;
 359
 360                 do {
 361                         if (cl->undertime < now) {
 362                                 q->toplevel = cl->level;
 363                                 return;
 364                         }
 365                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
 366         }
 367 }
 368
 369 static int
 370 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 371 {
 372         struct cbq_sched_data *q = qdisc_priv(sch);
 373         int len = skb->len;
 374         int uninitialized_var(ret);
 375         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 376
 377 #ifdef CONFIG_NET_CLS_ACT
 378         q->rx_class = cl;
 379 #endif
 380         if (cl == NULL) {
 381                 if (ret == NET_XMIT_BYPASS)
 382                         sch->qstats.drops++;
 383                 kfree_skb(skb);
 384                 return ret;
 385         }
 386
 387 #ifdef CONFIG_NET_CLS_ACT
 388         cl->q->__parent = sch;
 389 #endif
 390         if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
 391                 sch->q.qlen++;
 392                 sch->bstats.packets++;
 393                 sch->bstats.bytes+=len;
 394                 cbq_mark_toplevel(q, cl);
 395                 if (!cl->next_alive)
 396                         cbq_activate_class(cl);
 397                 return ret;
 398         }
 399
 400         sch->qstats.drops++;
 401         cbq_mark_toplevel(q, cl);
 402         cl->qstats.drops++;
 403         return ret;
 404 }
 405
 406 static int
 407 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 408 {
 409         struct cbq_sched_data *q = qdisc_priv(sch);
 410         struct cbq_class *cl;
 411         int ret;
 412
 413         if ((cl = q->tx_class) == NULL) {
 414                 kfree_skb(skb);
 415                 sch->qstats.drops++;
 416                 return NET_XMIT_CN;
 417         }
 418         q->tx_class = NULL;
 419
 420         cbq_mark_toplevel(q, cl);
 421
 422 #ifdef CONFIG_NET_CLS_ACT
 423         q->rx_class = cl;
 424         cl->q->__parent = sch;
 425 #endif
 426         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
 427                 sch->q.qlen++;
 428                 sch->qstats.requeues++;
 429                 if (!cl->next_alive)
 430                         cbq_activate_class(cl);
 431                 return 0;
 432         }
 433         sch->qstats.drops++;
 434         cl->qstats.drops++;
 435         return ret;
 436 }
 437
 438 /* Overlimit actions */
 439
 440 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 441
 442 static void cbq_ovl_classic(struct cbq_class *cl)
 443 {
 444         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 445         psched_tdiff_t delay = cl->undertime - q->now;
 446
 447         if (!cl->delayed) {
 448                 delay += cl->offtime;
 449
 450                 /*
 451                    Class goes to sleep, so that it will have no
 452                    chance to work avgidle. Let's forgive it 8)
 453
 454                    BTW cbq-2.0 has a crap in this
 455                    place, apparently they forgot to shift it by cl->ewma_log.
 456                  */
 457                 if (cl->avgidle < 0)
 458                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 459                 if (cl->avgidle < cl->minidle)
 460                         cl->avgidle = cl->minidle;
 461                 if (delay <= 0)
 462                         delay = 1;
 463                 cl->undertime = q->now + delay;
 464
 465                 cl->xstats.overactions++;
 466                 cl->delayed = 1;
 467         }
 468         if (q->wd_expires == 0 || q->wd_expires > delay)
 469                 q->wd_expires = delay;
 470
 471         /* Dirty work! We must schedule wakeups based on
 472            real available rate, rather than leaf rate,
 473            which may be tiny (even zero).
 474          */
 475         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 476                 struct cbq_class *b;
 477                 psched_tdiff_t base_delay = q->wd_expires;
 478
 479                 for (b = cl->borrow; b; b = b->borrow) {
 480                         delay = b->undertime - q->now;
 481                         if (delay < base_delay) {
 482                                 if (delay <= 0)
 483                                         delay = 1;
 484                                 base_delay = delay;
 485                         }
 486                 }
 487
 488                 q->wd_expires = base_delay;
 489         }
 490 }
 491
 492 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 493    they go overlimit
 494  */
 495
 496 static void cbq_ovl_rclassic(struct cbq_class *cl)
 497 {
 498         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 499         struct cbq_class *this = cl;
 500
 501         do {
 502                 if (cl->level > q->toplevel) {
 503                         cl = NULL;
 504                         break;
 505                 }
 506         } while ((cl = cl->borrow) != NULL);
 507
 508         if (cl == NULL)
 509                 cl = this;
 510         cbq_ovl_classic(cl);
 511 }
 512
 513 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 514
 515 static void cbq_ovl_delay(struct cbq_class *cl)
 516 {
 517         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 518         psched_tdiff_t delay = cl->undertime - q->now;
 519
 520         if (!cl->delayed) {
 521                 psched_time_t sched = q->now;
 522                 ktime_t expires;
 523
 524                 delay += cl->offtime;
 525                 if (cl->avgidle < 0)
 526                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 527                 if (cl->avgidle < cl->minidle)
 528                         cl->avgidle = cl->minidle;
 529                 cl->undertime = q->now + delay;
 530
 531                 if (delay > 0) {
 532                         sched += delay + cl->penalty;
 533                         cl->penalized = sched;
 534                         cl->cpriority = TC_CBQ_MAXPRIO;
 535                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 536
 537                         expires = ktime_set(0, 0);
 538                         expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
 539                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
 540                             ktime_to_ns(ktime_sub(q->delay_timer.expires,
 541                                                   expires)) > 0)
 542                                 q->delay_timer.expires = expires;
 543                         hrtimer_restart(&q->delay_timer);
 544                         cl->delayed = 1;
 545                         cl->xstats.overactions++;
 546                         return;
 547                 }
 548                 delay = 1;
 549         }
 550         if (q->wd_expires == 0 || q->wd_expires > delay)
 551                 q->wd_expires = delay;
 552 }
 553
 554 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 555
 556 static void cbq_ovl_lowprio(struct cbq_class *cl)
 557 {
 558         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 559
 560         cl->penalized = q->now + cl->penalty;
 561
 562         if (cl->cpriority != cl->priority2) {
 563                 cl->cpriority = cl->priority2;
 564                 q->pmask |= (1<<cl->cpriority);
 565                 cl->xstats.overactions++;
 566         }
 567         cbq_ovl_classic(cl);
 568 }
 569
 570 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 571
 572 static void cbq_ovl_drop(struct cbq_class *cl)
 573 {
 574         if (cl->q->ops->drop)
 575                 if (cl->q->ops->drop(cl->q))
 576                         cl->qdisc->q.qlen--;
 577         cl->xstats.overactions++;
 578         cbq_ovl_classic(cl);
 579 }
 580
 581 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 582                                        psched_time_t now)
 583 {
 584         struct cbq_class *cl;
 585         struct cbq_class *cl_prev = q->active[prio];
 586         psched_time_t sched = now;
 587
 588         if (cl_prev == NULL)
 589                 return 0;
 590
 591         do {
 592                 cl = cl_prev->next_alive;
 593                 if (now - cl->penalized > 0) {
 594                         cl_prev->next_alive = cl->next_alive;
 595                         cl->next_alive = NULL;
 596                         cl->cpriority = cl->priority;
 597                         cl->delayed = 0;
 598                         cbq_activate_class(cl);
 599
 600                         if (cl == q->active[prio]) {
 601                                 q->active[prio] = cl_prev;
 602                                 if (cl == q->active[prio]) {
 603                                         q->active[prio] = NULL;
 604                                         return 0;
 605                                 }
 606                         }
 607
 608                         cl = cl_prev->next_alive;
 609                 } else if (sched - cl->penalized > 0)
 610                         sched = cl->penalized;
 611         } while ((cl_prev = cl) != q->active[prio]);
 612
 613         return sched - now;
 614 }
 615
 616 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 617 {
 618         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 619                                                 delay_timer);
 620         struct Qdisc *sch = q->watchdog.qdisc;
 621         psched_time_t now;
 622         psched_tdiff_t delay = 0;
 623         unsigned pmask;
 624
 625         now = psched_get_time();
 626
 627         pmask = q->pmask;
 628         q->pmask = 0;
 629
 630         while (pmask) {
 631                 int prio = ffz(~pmask);
 632                 psched_tdiff_t tmp;
 633
 634                 pmask &= ~(1<<prio);
 635
 636                 tmp = cbq_undelay_prio(q, prio, now);
 637                 if (tmp > 0) {
 638                         q->pmask |= 1<<prio;
 639                         if (tmp < delay || delay == 0)
 640                                 delay = tmp;
 641                 }
 642         }
 643
 644         if (delay) {
 645                 ktime_t time;
 646
 647                 time = ktime_set(0, 0);
 648                 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
 649                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 650         }
 651
 652         sch->flags &= ~TCQ_F_THROTTLED;
 653         netif_schedule_queue(sch->dev_queue);
 654         return HRTIMER_NORESTART;
 655 }
 656
 657 #ifdef CONFIG_NET_CLS_ACT
 658 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 659 {
 660         int len = skb->len;
 661         struct Qdisc *sch = child->__parent;
 662         struct cbq_sched_data *q = qdisc_priv(sch);
 663         struct cbq_class *cl = q->rx_class;
 664
 665         q->rx_class = NULL;
 666
 667         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 668
 669                 cbq_mark_toplevel(q, cl);
 670
 671                 q->rx_class = cl;
 672                 cl->q->__parent = sch;
 673
 674                 if (cl->q->enqueue(skb, cl->q) == 0) {
 675                         sch->q.qlen++;
 676                         sch->bstats.packets++;
 677                         sch->bstats.bytes+=len;
 678                         if (!cl->next_alive)
 679                                 cbq_activate_class(cl);
 680                         return 0;
 681                 }
 682                 sch->qstats.drops++;
 683                 return 0;
 684         }
 685
 686         sch->qstats.drops++;
 687         return -1;
 688 }
 689 #endif
 690
 691 /*
 692    It is mission critical procedure.
 693
 694    We "regenerate" toplevel cutoff, if transmitting class
 695    has backlog and it is not regulated. It is not part of
 696    original CBQ description, but looks more reasonable.
 697    Probably, it is wrong. This question needs further investigation.
 698 */
 699
 700 static __inline__ void
 701 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 702                     struct cbq_class *borrowed)
 703 {
 704         if (cl && q->toplevel >= borrowed->level) {
 705                 if (cl->q->q.qlen > 1) {
 706                         do {
 707                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
 708                                         q->toplevel = borrowed->level;
 709                                         return;
 710                                 }
 711                         } while ((borrowed=borrowed->borrow) != NULL);
 712                 }
 713 #if 0
 714         /* It is not necessary now. Uncommenting it
 715            will save CPU cycles, but decrease fairness.
 716          */
 717                 q->toplevel = TC_CBQ_MAXLEVEL;
 718 #endif
 719         }
 720 }
 721
 722 static void
 723 cbq_update(struct cbq_sched_data *q)
 724 {
 725         struct cbq_class *this = q->tx_class;
 726         struct cbq_class *cl = this;
 727         int len = q->tx_len;
 728
 729         q->tx_class = NULL;
 730
 731         for ( ; cl; cl = cl->share) {
 732                 long avgidle = cl->avgidle;
 733                 long idle;
 734
 735                 cl->bstats.packets++;
 736                 cl->bstats.bytes += len;
 737
 738                 /*
 739                    (now - last) is total time between packet right edges.
 740                    (last_pktlen/rate) is "virtual" busy time, so that
 741
 742                          idle = (now - last) - last_pktlen/rate
 743                  */
 744
 745                 idle = q->now - cl->last;
 746                 if ((unsigned long)idle > 128*1024*1024) {
 747                         avgidle = cl->maxidle;
 748                 } else {
 749                         idle -= L2T(cl, len);
 750
 751                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 752                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
 753                    cl->avgidle == true_avgidle/W,
 754                    hence:
 755                  */
 756                         avgidle += idle - (avgidle>>cl->ewma_log);
 757                 }
 758
 759                 if (avgidle <= 0) {
 760                         /* Overlimit or at-limit */
 761
 762                         if (avgidle < cl->minidle)
 763                                 avgidle = cl->minidle;
 764
 765                         cl->avgidle = avgidle;
 766
 767                         /* Calculate expected time, when this class
 768                            will be allowed to send.
 769                            It will occur, when:
 770                            (1-W)*true_avgidle + W*delay = 0, i.e.
 771                            idle = (1/W - 1)*(-true_avgidle)
 772                            or
 773                            idle = (1 - W)*(-cl->avgidle);
 774                          */
 775                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 776
 777                         /*
 778                            That is not all.
 779                            To maintain the rate allocated to the class,
 780                            we add to undertime virtual clock,
 781                            necessary to complete transmitted packet.
 782                            (len/phys_bandwidth has been already passed
 783                            to the moment of cbq_update)
 784                          */
 785
 786                         idle -= L2T(&q->link, len);
 787                         idle += L2T(cl, len);
 788
 789                         cl->undertime = q->now + idle;
 790                 } else {
 791                         /* Underlimit */
 792
 793                         cl->undertime = PSCHED_PASTPERFECT;
 794                         if (avgidle > cl->maxidle)
 795                                 cl->avgidle = cl->maxidle;
 796                         else
 797                                 cl->avgidle = avgidle;
 798                 }
 799                 cl->last = q->now;
 800         }
 801
 802         cbq_update_toplevel(q, this, q->tx_borrowed);
 803 }
 804
 805 static __inline__ struct cbq_class *
 806 cbq_under_limit(struct cbq_class *cl)
 807 {
 808         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 809         struct cbq_class *this_cl = cl;
 810
 811         if (cl->tparent == NULL)
 812                 return cl;
 813
 814         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 815                 cl->delayed = 0;
 816                 return cl;
 817         }
 818
 819         do {
 820                 /* It is very suspicious place. Now overlimit
 821                    action is generated for not bounded classes
 822                    only if link is completely congested.
 823                    Though it is in agree with ancestor-only paradigm,
 824                    it looks very stupid. Particularly,
 825                    it means that this chunk of code will either
 826                    never be called or result in strong amplification
 827                    of burstiness. Dangerous, silly, and, however,
 828                    no another solution exists.
 829                  */
 830                 if ((cl = cl->borrow) == NULL) {
 831                         this_cl->qstats.overlimits++;
 832                         this_cl->overlimit(this_cl);
 833                         return NULL;
 834                 }
 835                 if (cl->level > q->toplevel)
 836                         return NULL;
 837         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 838
 839         cl->delayed = 0;
 840         return cl;
 841 }
 842
 843 static __inline__ struct sk_buff *
 844 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 845 {
 846         struct cbq_sched_data *q = qdisc_priv(sch);
 847         struct cbq_class *cl_tail, *cl_prev, *cl;
 848         struct sk_buff *skb;
 849         int deficit;
 850
 851         cl_tail = cl_prev = q->active[prio];
 852         cl = cl_prev->next_alive;
 853
 854         do {
 855                 deficit = 0;
 856
 857                 /* Start round */
 858                 do {
 859                         struct cbq_class *borrow = cl;
 860
 861                         if (cl->q->q.qlen &&
 862                             (borrow = cbq_under_limit(cl)) == NULL)
 863                                 goto skip_class;
 864
 865                         if (cl->deficit <= 0) {
 866                                 /* Class exhausted its allotment per
 867                                    this round. Switch to the next one.
 868                                  */
 869                                 deficit = 1;
 870                                 cl->deficit += cl->quantum;
 871                                 goto next_class;
 872                         }
 873
 874                         skb = cl->q->dequeue(cl->q);
 875
 876                         /* Class did not give us any skb :-(
 877                            It could occur even if cl->q->q.qlen != 0
 878                            f.e. if cl->q == "tbf"
 879                          */
 880                         if (skb == NULL)
 881                                 goto skip_class;
 882
 883                         cl->deficit -= skb->len;
 884                         q->tx_class = cl;
 885                         q->tx_borrowed = borrow;
 886                         if (borrow != cl) {
 887 #ifndef CBQ_XSTATS_BORROWS_BYTES
 888                                 borrow->xstats.borrows++;
 889                                 cl->xstats.borrows++;
 890 #else
 891                                 borrow->xstats.borrows += skb->len;
 892                                 cl->xstats.borrows += skb->len;
 893 #endif
 894                         }
 895                         q->tx_len = skb->len;
 896
 897                         if (cl->deficit <= 0) {
 898                                 q->active[prio] = cl;
 899                                 cl = cl->next_alive;
 900                                 cl->deficit += cl->quantum;
 901                         }
 902                         return skb;
 903
 904 skip_class:
 905                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 906                                 /* Class is empty or penalized.
 907                                    Unlink it from active chain.
 908                                  */
 909                                 cl_prev->next_alive = cl->next_alive;
 910                                 cl->next_alive = NULL;
 911
 912                                 /* Did cl_tail point to it? */
 913                                 if (cl == cl_tail) {
 914                                         /* Repair it! */
 915                                         cl_tail = cl_prev;
 916
 917                                         /* Was it the last class in this band? */
 918                                         if (cl == cl_tail) {
 919                                                 /* Kill the band! */
 920                                                 q->active[prio] = NULL;
 921                                                 q->activemask &= ~(1<<prio);
 922                                                 if (cl->q->q.qlen)
 923                                                         cbq_activate_class(cl);
 924                                                 return NULL;
 925                                         }
 926
 927                                         q->active[prio] = cl_tail;
 928                                 }
 929                                 if (cl->q->q.qlen)
 930                                         cbq_activate_class(cl);
 931
 932                                 cl = cl_prev;
 933                         }
 934
 935 next_class:
 936                         cl_prev = cl;
 937                         cl = cl->next_alive;
 938                 } while (cl_prev != cl_tail);
 939         } while (deficit);
 940
 941         q->active[prio] = cl_prev;
 942
 943         return NULL;
 944 }
 945
 946 static __inline__ struct sk_buff *
 947 cbq_dequeue_1(struct Qdisc *sch)
 948 {
 949         struct cbq_sched_data *q = qdisc_priv(sch);
 950         struct sk_buff *skb;
 951         unsigned activemask;
 952
 953         activemask = q->activemask&0xFF;
 954         while (activemask) {
 955                 int prio = ffz(~activemask);
 956                 activemask &= ~(1<<prio);
 957                 skb = cbq_dequeue_prio(sch, prio);
 958                 if (skb)
 959                         return skb;
 960         }
 961         return NULL;
 962 }
 963
 964 static struct sk_buff *
 965 cbq_dequeue(struct Qdisc *sch)
 966 {
 967         struct sk_buff *skb;
 968         struct cbq_sched_data *q = qdisc_priv(sch);
 969         psched_time_t now;
 970         psched_tdiff_t incr;
 971
 972         now = psched_get_time();
 973         incr = now - q->now_rt;
 974
 975         if (q->tx_class) {
 976                 psched_tdiff_t incr2;
 977                 /* Time integrator. We calculate EOS time
 978                    by adding expected packet transmission time.
 979                    If real time is greater, we warp artificial clock,
 980                    so that:
 981
 982                    cbq_time = max(real_time, work);
 983                  */
 984                 incr2 = L2T(&q->link, q->tx_len);
 985                 q->now += incr2;
 986                 cbq_update(q);
 987                 if ((incr -= incr2) < 0)
 988                         incr = 0;
 989         }
 990         q->now += incr;
 991         q->now_rt = now;
 992
 993         for (;;) {
 994                 q->wd_expires = 0;
 995
 996                 skb = cbq_dequeue_1(sch);
 997                 if (skb) {
 998                         sch->q.qlen--;
 999                         sch->flags &= ~TCQ_F_THROTTLED;
1000                         return skb;
1001                 }
1002
1003                 /* All the classes are overlimit.
1004
1005                    It is possible, if:
1006
1007                    1. Scheduler is empty.
1008                    2. Toplevel cutoff inhibited borrowing.
1009                    3. Root class is overlimit.
1010
1011                    Reset 2d and 3d conditions and retry.
1012
1013                    Note, that NS and cbq-2.0 are buggy, peeking
1014                    an arbitrary class is appropriate for ancestor-only
1015                    sharing, but not for toplevel algorithm.
1016
1017                    Our version is better, but slower, because it requires
1018                    two passes, but it is unavoidable with top-level sharing.
1019                 */
1020
1021                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1022                     q->link.undertime == PSCHED_PASTPERFECT)
1023                         break;
1024
1025                 q->toplevel = TC_CBQ_MAXLEVEL;
1026                 q->link.undertime = PSCHED_PASTPERFECT;
1027         }
1028
1029         /* No packets in scheduler or nobody wants to give them to us :-(
1030            Sigh... start watchdog timer in the last case. */
1031
1032         if (sch->q.qlen) {
1033                 sch->qstats.overlimits++;
1034                 if (q->wd_expires)
1035                         qdisc_watchdog_schedule(&q->watchdog,
1036                                                 now + q->wd_expires);
1037         }
1038         return NULL;
1039 }
1040
1041 /* CBQ class maintanance routines */
1042
1043 static void cbq_adjust_levels(struct cbq_class *this)
1044 {
1045         if (this == NULL)
1046                 return;
1047
1048         do {
1049                 int level = 0;
1050                 struct cbq_class *cl;
1051
1052                 if ((cl = this->children) != NULL) {
1053                         do {
1054                                 if (cl->level > level)
1055                                         level = cl->level;
1056                         } while ((cl = cl->sibling) != this->children);
1057                 }
1058                 this->level = level+1;
1059         } while ((this = this->tparent) != NULL);
1060 }
1061
1062 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1063 {
1064         struct cbq_class *cl;
1065         struct hlist_node *n;
1066         unsigned int h;
1067
1068         if (q->quanta[prio] == 0)
1069                 return;
1070
1071         for (h = 0; h < q->clhash.hashsize; h++) {
1072                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1073                         /* BUGGGG... Beware! This expression suffer of
1074                            arithmetic overflows!
1075                          */
1076                         if (cl->priority == prio) {
1077                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1078                                         q->quanta[prio];
1079                         }
1080                         if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1081                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
1082                                 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1083                         }
1084                 }
1085         }
1086 }
1087
1088 static void cbq_sync_defmap(struct cbq_class *cl)
1089 {
1090         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1091         struct cbq_class *split = cl->split;
1092         unsigned h;
1093         int i;
1094
1095         if (split == NULL)
1096                 return;
1097
1098         for (i=0; i<=TC_PRIO_MAX; i++) {
1099                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1100                         split->defaults[i] = NULL;
1101         }
1102
1103         for (i=0; i<=TC_PRIO_MAX; i++) {
1104                 int level = split->level;
1105
1106                 if (split->defaults[i])
1107                         continue;
1108
1109                 for (h = 0; h < q->clhash.hashsize; h++) {
1110                         struct hlist_node *n;
1111                         struct cbq_class *c;
1112
1113                         hlist_for_each_entry(c, n, &q->clhash.hash[h],
1114                                              common.hnode) {
1115                                 if (c->split == split && c->level < level &&
1116                                     c->defmap&(1<<i)) {
1117                                         split->defaults[i] = c;
1118                                         level = c->level;
1119                                 }
1120                         }
1121                 }
1122         }
1123 }
1124
1125 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1126 {
1127         struct cbq_class *split = NULL;
1128
1129         if (splitid == 0) {
1130                 if ((split = cl->split) == NULL)
1131                         return;
1132                 splitid = split->common.classid;
1133         }
1134
1135         if (split == NULL || split->common.classid != splitid) {
1136                 for (split = cl->tparent; split; split = split->tparent)
1137                         if (split->common.classid == splitid)
1138                                 break;
1139         }
1140
1141         if (split == NULL)
1142                 return;
1143
1144         if (cl->split != split) {
1145                 cl->defmap = 0;
1146                 cbq_sync_defmap(cl);
1147                 cl->split = split;
1148                 cl->defmap = def&mask;
1149         } else
1150                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1151
1152         cbq_sync_defmap(cl);
1153 }
1154
1155 static void cbq_unlink_class(struct cbq_class *this)
1156 {
1157         struct cbq_class *cl, **clp;
1158         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1159
1160         qdisc_class_hash_remove(&q->clhash, &this->common);
1161
1162         if (this->tparent) {
1163                 clp=&this->sibling;
1164                 cl = *clp;
1165                 do {
1166                         if (cl == this) {
1167                                 *clp = cl->sibling;
1168                                 break;
1169                         }
1170                         clp = &cl->sibling;
1171                 } while ((cl = *clp) != this->sibling);
1172
1173                 if (this->tparent->children == this) {
1174                         this->tparent->children = this->sibling;
1175                         if (this->sibling == this)
1176                                 this->tparent->children = NULL;
1177                 }
1178         } else {
1179                 BUG_TRAP(this->sibling == this);
1180         }
1181 }
1182
1183 static void cbq_link_class(struct cbq_class *this)
1184 {
1185         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1186         struct cbq_class *parent = this->tparent;
1187
1188         this->sibling = this;
1189         qdisc_class_hash_insert(&q->clhash, &this->common);
1190
1191         if (parent == NULL)
1192                 return;
1193
1194         if (parent->children == NULL) {
1195                 parent->children = this;
1196         } else {
1197                 this->sibling = parent->children->sibling;
1198                 parent->children->sibling = this;
1199         }
1200 }
1201
1202 static unsigned int cbq_drop(struct Qdisc* sch)
1203 {
1204         struct cbq_sched_data *q = qdisc_priv(sch);
1205         struct cbq_class *cl, *cl_head;
1206         int prio;
1207         unsigned int len;
1208
1209         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1210                 if ((cl_head = q->active[prio]) == NULL)
1211                         continue;
1212
1213                 cl = cl_head;
1214                 do {
1215                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1216                                 sch->q.qlen--;
1217                                 if (!cl->q->q.qlen)
1218                                         cbq_deactivate_class(cl);
1219                                 return len;
1220                         }
1221                 } while ((cl = cl->next_alive) != cl_head);
1222         }
1223         return 0;
1224 }
1225
1226 static void
1227 cbq_reset(struct Qdisc* sch)
1228 {
1229         struct cbq_sched_data *q = qdisc_priv(sch);
1230         struct cbq_class *cl;
1231         struct hlist_node *n;
1232         int prio;
1233         unsigned h;
1234
1235         q->activemask = 0;
1236         q->pmask = 0;
1237         q->tx_class = NULL;
1238         q->tx_borrowed = NULL;
1239         qdisc_watchdog_cancel(&q->watchdog);
1240         hrtimer_cancel(&q->delay_timer);
1241         q->toplevel = TC_CBQ_MAXLEVEL;
1242         q->now = psched_get_time();
1243         q->now_rt = q->now;
1244
1245         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1246                 q->active[prio] = NULL;
1247
1248         for (h = 0; h < q->clhash.hashsize; h++) {
1249                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1250                         qdisc_reset(cl->q);
1251
1252                         cl->next_alive = NULL;
1253                         cl->undertime = PSCHED_PASTPERFECT;
1254                         cl->avgidle = cl->maxidle;
1255                         cl->deficit = cl->quantum;
1256                         cl->cpriority = cl->priority;
1257                 }
1258         }
1259         sch->q.qlen = 0;
1260 }
1261
1262
1263 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1264 {
1265         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1266                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1267                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1268         }
1269         if (lss->change&TCF_CBQ_LSS_EWMA)
1270                 cl->ewma_log = lss->ewma_log;
1271         if (lss->change&TCF_CBQ_LSS_AVPKT)
1272                 cl->avpkt = lss->avpkt;
1273         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1274                 cl->minidle = -(long)lss->minidle;
1275         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1276                 cl->maxidle = lss->maxidle;
1277                 cl->avgidle = lss->maxidle;
1278         }
1279         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1280                 cl->offtime = lss->offtime;
1281         return 0;
1282 }
1283
1284 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1285 {
1286         q->nclasses[cl->priority]--;
1287         q->quanta[cl->priority] -= cl->weight;
1288         cbq_normalize_quanta(q, cl->priority);
1289 }
1290
1291 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1292 {
1293         q->nclasses[cl->priority]++;
1294         q->quanta[cl->priority] += cl->weight;
1295         cbq_normalize_quanta(q, cl->priority);
1296 }
1297
1298 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1299 {
1300         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1301
1302         if (wrr->allot)
1303                 cl->allot = wrr->allot;
1304         if (wrr->weight)
1305                 cl->weight = wrr->weight;
1306         if (wrr->priority) {
1307                 cl->priority = wrr->priority-1;
1308                 cl->cpriority = cl->priority;
1309                 if (cl->priority >= cl->priority2)
1310                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1311         }
1312
1313         cbq_addprio(q, cl);
1314         return 0;
1315 }
1316
1317 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1318 {
1319         switch (ovl->strategy) {
1320         case TC_CBQ_OVL_CLASSIC:
1321                 cl->overlimit = cbq_ovl_classic;
1322                 break;
1323         case TC_CBQ_OVL_DELAY:
1324                 cl->overlimit = cbq_ovl_delay;
1325                 break;
1326         case TC_CBQ_OVL_LOWPRIO:
1327                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1328                     ovl->priority2-1 <= cl->priority)
1329                         return -EINVAL;
1330                 cl->priority2 = ovl->priority2-1;
1331                 cl->overlimit = cbq_ovl_lowprio;
1332                 break;
1333         case TC_CBQ_OVL_DROP:
1334                 cl->overlimit = cbq_ovl_drop;
1335                 break;
1336         case TC_CBQ_OVL_RCLASSIC:
1337                 cl->overlimit = cbq_ovl_rclassic;
1338                 break;
1339         default:
1340                 return -EINVAL;
1341         }
1342         cl->penalty = ovl->penalty;
1343         return 0;
1344 }
1345
1346 #ifdef CONFIG_NET_CLS_ACT
1347 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1348 {
1349         cl->police = p->police;
1350
1351         if (cl->q->handle) {
1352                 if (p->police == TC_POLICE_RECLASSIFY)
1353                         cl->q->reshape_fail = cbq_reshape_fail;
1354                 else
1355                         cl->q->reshape_fail = NULL;
1356         }
1357         return 0;
1358 }
1359 #endif
1360
1361 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1362 {
1363         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1364         return 0;
1365 }
1366
1367 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1368         [TCA_CBQ_LSSOPT]        = { .len = sizeof(struct tc_cbq_lssopt) },
1369         [TCA_CBQ_WRROPT]        = { .len = sizeof(struct tc_cbq_wrropt) },
1370         [TCA_CBQ_FOPT]          = { .len = sizeof(struct tc_cbq_fopt) },
1371         [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1372         [TCA_CBQ_RATE]          = { .len = sizeof(struct tc_ratespec) },
1373         [TCA_CBQ_RTAB]          = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1374         [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
1375 };
1376
1377 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1378 {
1379         struct cbq_sched_data *q = qdisc_priv(sch);
1380         struct nlattr *tb[TCA_CBQ_MAX + 1];
1381         struct tc_ratespec *r;
1382         int err;
1383
1384         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1385         if (err < 0)
1386                 return err;
1387
1388         if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1389                 return -EINVAL;
1390
1391         r = nla_data(tb[TCA_CBQ_RATE]);
1392
1393         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1394                 return -EINVAL;
1395
1396         err = qdisc_class_hash_init(&q->clhash);
1397         if (err < 0)
1398                 goto put_rtab;
1399
1400         q->link.refcnt = 1;
1401         q->link.sibling = &q->link;
1402         q->link.common.classid = sch->handle;
1403         q->link.qdisc = sch;
1404         if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1405                                             &pfifo_qdisc_ops,
1406                                             sch->handle)))
1407                 q->link.q = &noop_qdisc;
1408
1409         q->link.priority = TC_CBQ_MAXPRIO-1;
1410         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1411         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1412         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1413         q->link.overlimit = cbq_ovl_classic;
1414         q->link.allot = psched_mtu(qdisc_dev(sch));
1415         q->link.quantum = q->link.allot;
1416         q->link.weight = q->link.R_tab->rate.rate;
1417
1418         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1419         q->link.avpkt = q->link.allot/2;
1420         q->link.minidle = -0x7FFFFFFF;
1421
1422         qdisc_watchdog_init(&q->watchdog, sch);
1423         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1424         q->delay_timer.function = cbq_undelay;
1425         q->toplevel = TC_CBQ_MAXLEVEL;
1426         q->now = psched_get_time();
1427         q->now_rt = q->now;
1428
1429         cbq_link_class(&q->link);
1430
1431         if (tb[TCA_CBQ_LSSOPT])
1432                 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1433
1434         cbq_addprio(q, &q->link);
1435         return 0;
1436
1437 put_rtab:
1438         qdisc_put_rtab(q->link.R_tab);
1439         return err;
1440 }
1441
1442 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1443 {
1444         unsigned char *b = skb_tail_pointer(skb);
1445
1446         NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1447         return skb->len;
1448
1449 nla_put_failure:
1450         nlmsg_trim(skb, b);
1451         return -1;
1452 }
1453
1454 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1455 {
1456         unsigned char *b = skb_tail_pointer(skb);
1457         struct tc_cbq_lssopt opt;
1458
1459         opt.flags = 0;
1460         if (cl->borrow == NULL)
1461                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1462         if (cl->share == NULL)
1463                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1464         opt.ewma_log = cl->ewma_log;
1465         opt.level = cl->level;
1466         opt.avpkt = cl->avpkt;
1467         opt.maxidle = cl->maxidle;
1468         opt.minidle = (u32)(-cl->minidle);
1469         opt.offtime = cl->offtime;
1470         opt.change = ~0;
1471         NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1472         return skb->len;
1473
1474 nla_put_failure:
1475         nlmsg_trim(skb, b);
1476         return -1;
1477 }
1478
1479 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1480 {
1481         unsigned char *b = skb_tail_pointer(skb);
1482         struct tc_cbq_wrropt opt;
1483
1484         opt.flags = 0;
1485         opt.allot = cl->allot;
1486         opt.priority = cl->priority+1;
1487         opt.cpriority = cl->cpriority+1;
1488         opt.weight = cl->weight;
1489         NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1490         return skb->len;
1491
1492 nla_put_failure:
1493         nlmsg_trim(skb, b);
1494         return -1;
1495 }
1496
1497 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1498 {
1499         unsigned char *b = skb_tail_pointer(skb);
1500         struct tc_cbq_ovl opt;
1501
1502         opt.strategy = cl->ovl_strategy;
1503         opt.priority2 = cl->priority2+1;
1504         opt.pad = 0;
1505         opt.penalty = cl->penalty;
1506         NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1507         return skb->len;
1508
1509 nla_put_failure:
1510         nlmsg_trim(skb, b);
1511         return -1;
1512 }
1513
1514 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1515 {
1516         unsigned char *b = skb_tail_pointer(skb);
1517         struct tc_cbq_fopt opt;
1518
1519         if (cl->split || cl->defmap) {
1520                 opt.split = cl->split ? cl->split->common.classid : 0;
1521                 opt.defmap = cl->defmap;
1522                 opt.defchange = ~0;
1523                 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1524         }
1525         return skb->len;
1526
1527 nla_put_failure:
1528         nlmsg_trim(skb, b);
1529         return -1;
1530 }
1531
1532 #ifdef CONFIG_NET_CLS_ACT
1533 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1534 {
1535         unsigned char *b = skb_tail_pointer(skb);
1536         struct tc_cbq_police opt;
1537
1538         if (cl->police) {
1539                 opt.police = cl->police;
1540                 opt.__res1 = 0;
1541                 opt.__res2 = 0;
1542                 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1543         }
1544         return skb->len;
1545
1546 nla_put_failure:
1547         nlmsg_trim(skb, b);
1548         return -1;
1549 }
1550 #endif
1551
1552 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1553 {
1554         if (cbq_dump_lss(skb, cl) < 0 ||
1555             cbq_dump_rate(skb, cl) < 0 ||
1556             cbq_dump_wrr(skb, cl) < 0 ||
1557             cbq_dump_ovl(skb, cl) < 0 ||
1558 #ifdef CONFIG_NET_CLS_ACT
1559             cbq_dump_police(skb, cl) < 0 ||
1560 #endif
1561             cbq_dump_fopt(skb, cl) < 0)
1562                 return -1;
1563         return 0;
1564 }
1565
1566 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1567 {
1568         struct cbq_sched_data *q = qdisc_priv(sch);
1569         struct nlattr *nest;
1570
1571         nest = nla_nest_start(skb, TCA_OPTIONS);
1572         if (nest == NULL)
1573                 goto nla_put_failure;
1574         if (cbq_dump_attr(skb, &q->link) < 0)
1575                 goto nla_put_failure;
1576         nla_nest_end(skb, nest);
1577         return skb->len;
1578
1579 nla_put_failure:
1580         nla_nest_cancel(skb, nest);
1581         return -1;
1582 }
1583
1584 static int
1585 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1586 {
1587         struct cbq_sched_data *q = qdisc_priv(sch);
1588
1589         q->link.xstats.avgidle = q->link.avgidle;
1590         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1591 }
1592
1593 static int
1594 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1595                struct sk_buff *skb, struct tcmsg *tcm)
1596 {
1597         struct cbq_class *cl = (struct cbq_class*)arg;
1598         struct nlattr *nest;
1599
1600         if (cl->tparent)
1601                 tcm->tcm_parent = cl->tparent->common.classid;
1602         else
1603                 tcm->tcm_parent = TC_H_ROOT;
1604         tcm->tcm_handle = cl->common.classid;
1605         tcm->tcm_info = cl->q->handle;
1606
1607         nest = nla_nest_start(skb, TCA_OPTIONS);
1608         if (nest == NULL)
1609                 goto nla_put_failure;
1610         if (cbq_dump_attr(skb, cl) < 0)
1611                 goto nla_put_failure;
1612         nla_nest_end(skb, nest);
1613         return skb->len;
1614
1615 nla_put_failure:
1616         nla_nest_cancel(skb, nest);
1617         return -1;
1618 }
1619
1620 static int
1621 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1622         struct gnet_dump *d)
1623 {
1624         struct cbq_sched_data *q = qdisc_priv(sch);
1625         struct cbq_class *cl = (struct cbq_class*)arg;
1626
1627         cl->qstats.qlen = cl->q->q.qlen;
1628         cl->xstats.avgidle = cl->avgidle;
1629         cl->xstats.undertime = 0;
1630
1631         if (cl->undertime != PSCHED_PASTPERFECT)
1632                 cl->xstats.undertime = cl->undertime - q->now;
1633
1634         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1635             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1636             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1637                 return -1;
1638
1639         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1640 }
1641
1642 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1643                      struct Qdisc **old)
1644 {
1645         struct cbq_class *cl = (struct cbq_class*)arg;
1646
1647         if (cl) {
1648                 if (new == NULL) {
1649                         new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1650                                                 &pfifo_qdisc_ops,
1651                                                 cl->common.classid);
1652                         if (new == NULL)
1653                                 return -ENOBUFS;
1654                 } else {
1655 #ifdef CONFIG_NET_CLS_ACT
1656                         if (cl->police == TC_POLICE_RECLASSIFY)
1657                                 new->reshape_fail = cbq_reshape_fail;
1658 #endif
1659                 }
1660                 sch_tree_lock(sch);
1661                 *old = xchg(&cl->q, new);
1662                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1663                 qdisc_reset(*old);
1664                 sch_tree_unlock(sch);
1665
1666                 return 0;
1667         }
1668         return -ENOENT;
1669 }
1670
1671 static struct Qdisc *
1672 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1673 {
1674         struct cbq_class *cl = (struct cbq_class*)arg;
1675
1676         return cl ? cl->q : NULL;
1677 }
1678
1679 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1680 {
1681         struct cbq_class *cl = (struct cbq_class *)arg;
1682
1683         if (cl->q->q.qlen == 0)
1684                 cbq_deactivate_class(cl);
1685 }
1686
1687 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1688 {
1689         struct cbq_sched_data *q = qdisc_priv(sch);
1690         struct cbq_class *cl = cbq_class_lookup(q, classid);
1691
1692         if (cl) {
1693                 cl->refcnt++;
1694                 return (unsigned long)cl;
1695         }
1696         return 0;
1697 }
1698
1699 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1700 {
1701         struct cbq_sched_data *q = qdisc_priv(sch);
1702
1703         BUG_TRAP(!cl->filters);
1704
1705         tcf_destroy_chain(&cl->filter_list);
1706         qdisc_destroy(cl->q);
1707         qdisc_put_rtab(cl->R_tab);
1708         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1709         if (cl != &q->link)
1710                 kfree(cl);
1711 }
1712
1713 static void
1714 cbq_destroy(struct Qdisc* sch)
1715 {
1716         struct cbq_sched_data *q = qdisc_priv(sch);
1717         struct hlist_node *n, *next;
1718         struct cbq_class *cl;
1719         unsigned h;
1720
1721 #ifdef CONFIG_NET_CLS_ACT
1722         q->rx_class = NULL;
1723 #endif
1724         /*
1725          * Filters must be destroyed first because we don't destroy the
1726          * classes from root to leafs which means that filters can still
1727          * be bound to classes which have been destroyed already. --TGR '04
1728          */
1729         for (h = 0; h < q->clhash.hashsize; h++) {
1730                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1731                         tcf_destroy_chain(&cl->filter_list);
1732         }
1733         for (h = 0; h < q->clhash.hashsize; h++) {
1734                 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1735                                           common.hnode)
1736                         cbq_destroy_class(sch, cl);
1737         }
1738         qdisc_class_hash_destroy(&q->clhash);
1739 }
1740
1741 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1742 {
1743         struct cbq_class *cl = (struct cbq_class*)arg;
1744
1745         if (--cl->refcnt == 0) {
1746 #ifdef CONFIG_NET_CLS_ACT
1747                 struct cbq_sched_data *q = qdisc_priv(sch);
1748
1749                 spin_lock_bh(&sch->dev_queue->lock);
1750                 if (q->rx_class == cl)
1751                         q->rx_class = NULL;
1752                 spin_unlock_bh(&sch->dev_queue->lock);
1753 #endif
1754
1755                 cbq_destroy_class(sch, cl);
1756         }
1757 }
1758
1759 static int
1760 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1761                  unsigned long *arg)
1762 {
1763         int err;
1764         struct cbq_sched_data *q = qdisc_priv(sch);
1765         struct cbq_class *cl = (struct cbq_class*)*arg;
1766         struct nlattr *opt = tca[TCA_OPTIONS];
1767         struct nlattr *tb[TCA_CBQ_MAX + 1];
1768         struct cbq_class *parent;
1769         struct qdisc_rate_table *rtab = NULL;
1770
1771         if (opt == NULL)
1772                 return -EINVAL;
1773
1774         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1775         if (err < 0)
1776                 return err;
1777
1778         if (cl) {
1779                 /* Check parent */
1780                 if (parentid) {
1781                         if (cl->tparent &&
1782                             cl->tparent->common.classid != parentid)
1783                                 return -EINVAL;
1784                         if (!cl->tparent && parentid != TC_H_ROOT)
1785                                 return -EINVAL;
1786                 }
1787
1788                 if (tb[TCA_CBQ_RATE]) {
1789                         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1790                         if (rtab == NULL)
1791                                 return -EINVAL;
1792                 }
1793
1794                 /* Change class parameters */
1795                 sch_tree_lock(sch);
1796
1797                 if (cl->next_alive != NULL)
1798                         cbq_deactivate_class(cl);
1799
1800                 if (rtab) {
1801                         rtab = xchg(&cl->R_tab, rtab);
1802                         qdisc_put_rtab(rtab);
1803                 }
1804
1805                 if (tb[TCA_CBQ_LSSOPT])
1806                         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1807
1808                 if (tb[TCA_CBQ_WRROPT]) {
1809                         cbq_rmprio(q, cl);
1810                         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1811                 }
1812
1813                 if (tb[TCA_CBQ_OVL_STRATEGY])
1814                         cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1815
1816 #ifdef CONFIG_NET_CLS_ACT
1817                 if (tb[TCA_CBQ_POLICE])
1818                         cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1819 #endif
1820
1821                 if (tb[TCA_CBQ_FOPT])
1822                         cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1823
1824                 if (cl->q->q.qlen)
1825                         cbq_activate_class(cl);
1826
1827                 sch_tree_unlock(sch);
1828
1829                 if (tca[TCA_RATE])
1830                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1831                                               &sch->dev_queue->lock,
1832                                               tca[TCA_RATE]);
1833                 return 0;
1834         }
1835
1836         if (parentid == TC_H_ROOT)
1837                 return -EINVAL;
1838
1839         if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1840             tb[TCA_CBQ_LSSOPT] == NULL)
1841                 return -EINVAL;
1842
1843         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1844         if (rtab == NULL)
1845                 return -EINVAL;
1846
1847         if (classid) {
1848                 err = -EINVAL;
1849                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1850                         goto failure;
1851         } else {
1852                 int i;
1853                 classid = TC_H_MAKE(sch->handle,0x8000);
1854
1855                 for (i=0; i<0x8000; i++) {
1856                         if (++q->hgenerator >= 0x8000)
1857                                 q->hgenerator = 1;
1858                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1859                                 break;
1860                 }
1861                 err = -ENOSR;
1862                 if (i >= 0x8000)
1863                         goto failure;
1864                 classid = classid|q->hgenerator;
1865         }
1866
1867         parent = &q->link;
1868         if (parentid) {
1869                 parent = cbq_class_lookup(q, parentid);
1870                 err = -EINVAL;
1871                 if (parent == NULL)
1872                         goto failure;
1873         }
1874
1875         err = -ENOBUFS;
1876         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1877         if (cl == NULL)
1878                 goto failure;
1879         cl->R_tab = rtab;
1880         rtab = NULL;
1881         cl->refcnt = 1;
1882         if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1883                                         &pfifo_qdisc_ops, classid)))
1884                 cl->q = &noop_qdisc;
1885         cl->common.classid = classid;
1886         cl->tparent = parent;
1887         cl->qdisc = sch;
1888         cl->allot = parent->allot;
1889         cl->quantum = cl->allot;
1890         cl->weight = cl->R_tab->rate.rate;
1891
1892         sch_tree_lock(sch);
1893         cbq_link_class(cl);
1894         cl->borrow = cl->tparent;
1895         if (cl->tparent != &q->link)
1896                 cl->share = cl->tparent;
1897         cbq_adjust_levels(parent);
1898         cl->minidle = -0x7FFFFFFF;
1899         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1900         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1901         if (cl->ewma_log==0)
1902                 cl->ewma_log = q->link.ewma_log;
1903         if (cl->maxidle==0)
1904                 cl->maxidle = q->link.maxidle;
1905         if (cl->avpkt==0)
1906                 cl->avpkt = q->link.avpkt;
1907         cl->overlimit = cbq_ovl_classic;
1908         if (tb[TCA_CBQ_OVL_STRATEGY])
1909                 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1910 #ifdef CONFIG_NET_CLS_ACT
1911         if (tb[TCA_CBQ_POLICE])
1912                 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1913 #endif
1914         if (tb[TCA_CBQ_FOPT])
1915                 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1916         sch_tree_unlock(sch);
1917
1918         qdisc_class_hash_grow(sch, &q->clhash);
1919
1920         if (tca[TCA_RATE])
1921                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1922                                   &sch->dev_queue->lock, tca[TCA_RATE]);
1923
1924         *arg = (unsigned long)cl;
1925         return 0;
1926
1927 failure:
1928         qdisc_put_rtab(rtab);
1929         return err;
1930 }
1931
1932 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1933 {
1934         struct cbq_sched_data *q = qdisc_priv(sch);
1935         struct cbq_class *cl = (struct cbq_class*)arg;
1936         unsigned int qlen;
1937
1938         if (cl->filters || cl->children || cl == &q->link)
1939                 return -EBUSY;
1940
1941         sch_tree_lock(sch);
1942
1943         qlen = cl->q->q.qlen;
1944         qdisc_reset(cl->q);
1945         qdisc_tree_decrease_qlen(cl->q, qlen);
1946
1947         if (cl->next_alive)
1948                 cbq_deactivate_class(cl);
1949
1950         if (q->tx_borrowed == cl)
1951                 q->tx_borrowed = q->tx_class;
1952         if (q->tx_class == cl) {
1953                 q->tx_class = NULL;
1954                 q->tx_borrowed = NULL;
1955         }
1956 #ifdef CONFIG_NET_CLS_ACT
1957         if (q->rx_class == cl)
1958                 q->rx_class = NULL;
1959 #endif
1960
1961         cbq_unlink_class(cl);
1962         cbq_adjust_levels(cl->tparent);
1963         cl->defmap = 0;
1964         cbq_sync_defmap(cl);
1965
1966         cbq_rmprio(q, cl);
1967         sch_tree_unlock(sch);
1968
1969         if (--cl->refcnt == 0)
1970                 cbq_destroy_class(sch, cl);
1971
1972         return 0;
1973 }
1974
1975 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1976 {
1977         struct cbq_sched_data *q = qdisc_priv(sch);
1978         struct cbq_class *cl = (struct cbq_class *)arg;
1979
1980         if (cl == NULL)
1981                 cl = &q->link;
1982
1983         return &cl->filter_list;
1984 }
1985
1986 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1987                                      u32 classid)
1988 {
1989         struct cbq_sched_data *q = qdisc_priv(sch);
1990         struct cbq_class *p = (struct cbq_class*)parent;
1991         struct cbq_class *cl = cbq_class_lookup(q, classid);
1992
1993         if (cl) {
1994                 if (p && p->level <= cl->level)
1995                         return 0;
1996                 cl->filters++;
1997                 return (unsigned long)cl;
1998         }
1999         return 0;
2000 }
2001
2002 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2003 {
2004         struct cbq_class *cl = (struct cbq_class*)arg;
2005
2006         cl->filters--;
2007 }
2008
2009 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2010 {
2011         struct cbq_sched_data *q = qdisc_priv(sch);
2012         struct cbq_class *cl;
2013         struct hlist_node *n;
2014         unsigned h;
2015
2016         if (arg->stop)
2017                 return;
2018
2019         for (h = 0; h < q->clhash.hashsize; h++) {
2020                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2021                         if (arg->count < arg->skip) {
2022                                 arg->count++;
2023                                 continue;
2024                         }
2025                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2026                                 arg->stop = 1;
2027                                 return;
2028                         }
2029                         arg->count++;
2030                 }
2031         }
2032 }
2033
2034 static const struct Qdisc_class_ops cbq_class_ops = {
2035         .graft          =       cbq_graft,
2036         .leaf           =       cbq_leaf,
2037         .qlen_notify    =       cbq_qlen_notify,
2038         .get            =       cbq_get,
2039         .put            =       cbq_put,
2040         .change         =       cbq_change_class,
2041         .delete         =       cbq_delete,
2042         .walk           =       cbq_walk,
2043         .tcf_chain      =       cbq_find_tcf,
2044         .bind_tcf       =       cbq_bind_filter,
2045         .unbind_tcf     =       cbq_unbind_filter,
2046         .dump           =       cbq_dump_class,
2047         .dump_stats     =       cbq_dump_class_stats,
2048 };
2049
2050 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2051         .next           =       NULL,
2052         .cl_ops         =       &cbq_class_ops,
2053         .id             =       "cbq",
2054         .priv_size      =       sizeof(struct cbq_sched_data),
2055         .enqueue        =       cbq_enqueue,
2056         .dequeue        =       cbq_dequeue,
2057         .requeue        =       cbq_requeue,
2058         .drop           =       cbq_drop,
2059         .init           =       cbq_init,
2060         .reset          =       cbq_reset,
2061         .destroy        =       cbq_destroy,
2062         .change         =       NULL,
2063         .dump           =       cbq_dump,
2064         .dump_stats     =       cbq_dump_stats,
2065         .owner          =       THIS_MODULE,
2066 };
2067
2068 static int __init cbq_module_init(void)
2069 {
2070         return register_qdisc(&cbq_qdisc_ops);
2071 }
2072 static void __exit cbq_module_exit(void)
2073 {
2074         unregister_qdisc(&cbq_qdisc_ops);
2075 }
2076 module_init(cbq_module_init)
2077 module_exit(cbq_module_exit)
2078 MODULE_LICENSE("GPL");