]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/sched/sch_htb.c
netfilter 09/09: remove padding from struct xt_match on 64bit builds
[mirror_ubuntu-bionic-kernel.git] / net / sched / sch_htb.c
CommitLineData
87990467 1/*
1da177e4
LT
2 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Martin Devera, <devik@cdi.cz>
10 *
11 * Credits (in time order) for older HTB versions:
12 * Stef Coene <stef.coene@docum.org>
13 * HTB support at LARTC mailing list
10297b99 14 * Ondrej Kraus, <krauso@barr.cz>
1da177e4
LT
15 * found missing INIT_QDISC(htb)
16 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
17 * helped a lot to locate nasty class stall bug
18 * Andi Kleen, Jamal Hadi, Bert Hubert
19 * code review and helpful comments on shaping
20 * Tomasz Wrona, <tw@eter.tym.pl>
21 * created test case so that I was able to fix nasty bug
22 * Wilfried Weissmann
23 * spotted bug in dequeue code and helped with fix
24 * Jiri Fojtasek
25 * fixed requeue routine
26 * and many others. thanks.
1da177e4 27 */
1da177e4 28#include <linux/module.h>
47083fc0 29#include <linux/moduleparam.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/kernel.h>
1da177e4 32#include <linux/string.h>
1da177e4 33#include <linux/errno.h>
1da177e4
LT
34#include <linux/skbuff.h>
35#include <linux/list.h>
36#include <linux/compiler.h>
0ba48053 37#include <linux/rbtree.h>
dc5fc579 38#include <net/netlink.h>
1da177e4 39#include <net/pkt_sched.h>
1da177e4
LT
40
41/* HTB algorithm.
42 Author: devik@cdi.cz
43 ========================================================================
44 HTB is like TBF with multiple classes. It is also similar to CBQ because
10297b99 45 it allows to assign priority to each class in hierarchy.
1da177e4
LT
46 In fact it is another implementation of Floyd's formal sharing.
47
48 Levels:
10297b99 49 Each class is assigned level. Leaf has ALWAYS level 0 and root
1da177e4
LT
50 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51 one less than their parent.
52*/
53
47083fc0 54static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
87990467 55#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
1da177e4
LT
56
57#if HTB_VER >> 16 != TC_HTB_PROTOVER
58#error "Mismatched sch_htb.c and pkt_sch.h"
59#endif
60
47083fc0
JDB
61/* Module parameter and sysfs export */
62module_param (htb_hysteresis, int, 0640);
63MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64
1da177e4
LT
65/* used internaly to keep status of single class */
66enum htb_cmode {
87990467
SH
67 HTB_CANT_SEND, /* class can't send and can't borrow */
68 HTB_MAY_BORROW, /* class can't send but may borrow */
69 HTB_CAN_SEND /* class can send */
1da177e4
LT
70};
71
72/* interior & leaf nodes; props specific to leaves are marked L: */
87990467 73struct htb_class {
f4c1f3e0 74 struct Qdisc_class_common common;
87990467 75 /* general class parameters */
87990467
SH
76 struct gnet_stats_basic bstats;
77 struct gnet_stats_queue qstats;
78 struct gnet_stats_rate_est rate_est;
79 struct tc_htb_xstats xstats; /* our special stats */
80 int refcnt; /* usage count of this class */
1da177e4 81
87990467
SH
82 /* topology */
83 int level; /* our level (see above) */
42077599 84 unsigned int children;
87990467 85 struct htb_class *parent; /* parent class */
87990467 86
c19f7a34
JP
87 int prio; /* these two are used only by leaves... */
88 int quantum; /* but stored for parent-to-leaf return */
89
87990467
SH
90 union {
91 struct htb_class_leaf {
92 struct Qdisc *q;
87990467
SH
93 int deficit[TC_HTB_MAXDEPTH];
94 struct list_head drop_list;
95 } leaf;
96 struct htb_class_inner {
97 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
98 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
99 /* When class changes from state 1->2 and disconnects from
100 parent's feed then we lost ptr value and start from the
101 first child again. Here we store classid of the
102 last valid ptr (used when ptr is NULL). */
103 u32 last_ptr_id[TC_HTB_NUMPRIO];
104 } inner;
105 } un;
106 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
107 struct rb_node pq_node; /* node for event queue */
fb983d45 108 psched_time_t pq_key;
87990467
SH
109
110 int prio_activity; /* for which prios are we active */
111 enum htb_cmode cmode; /* current mode of the class */
112
113 /* class attached filters */
114 struct tcf_proto *filter_list;
115 int filter_cnt;
116
117 int warned; /* only one warning about non work conserving .. */
118
119 /* token bucket parameters */
120 struct qdisc_rate_table *rate; /* rate table of the class itself */
121 struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */
122 long buffer, cbuffer; /* token bucket depth/rate */
123 psched_tdiff_t mbuffer; /* max wait time */
124 long tokens, ctokens; /* current number of tokens */
125 psched_time_t t_c; /* checkpoint time */
1da177e4
LT
126};
127
87990467 128struct htb_sched {
f4c1f3e0 129 struct Qdisc_class_hash clhash;
0cef296d 130 struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
87990467
SH
131
132 /* self list - roots of self generating tree */
133 struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
134 int row_mask[TC_HTB_MAXDEPTH];
135 struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
136 u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
1da177e4 137
87990467
SH
138 /* self wait list - roots of wait PQs per row */
139 struct rb_root wait_pq[TC_HTB_MAXDEPTH];
1da177e4 140
87990467 141 /* time of nearest event per level (row) */
fb983d45 142 psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
1da177e4 143
87990467 144 int defcls; /* class where unclassified flows go to */
1da177e4 145
87990467
SH
146 /* filters for qdisc itself */
147 struct tcf_proto *filter_list;
1da177e4 148
87990467
SH
149 int rate2quantum; /* quant = rate / rate2quantum */
150 psched_time_t now; /* cached dequeue time */
fb983d45 151 struct qdisc_watchdog watchdog;
1da177e4 152
87990467
SH
153 /* non shaped skbs; let them go directly thru */
154 struct sk_buff_head direct_queue;
155 int direct_qlen; /* max qlen of above */
156
157 long direct_pkts;
1da177e4
LT
158};
159
1da177e4 160/* find class in global hash table using given handle */
87990467 161static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
1da177e4
LT
162{
163 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 164 struct Qdisc_class_common *clc;
0cef296d 165
f4c1f3e0
PM
166 clc = qdisc_class_find(&q->clhash, handle);
167 if (clc == NULL)
1da177e4 168 return NULL;
f4c1f3e0 169 return container_of(clc, struct htb_class, common);
1da177e4
LT
170}
171
172/**
173 * htb_classify - classify a packet into class
174 *
175 * It returns NULL if the packet should be dropped or -1 if the packet
176 * should be passed directly thru. In all other cases leaf class is returned.
177 * We allow direct class selection by classid in priority. The we examine
178 * filters in qdisc and in inner nodes (if higher filter points to the inner
179 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
10297b99 180 * internal fifo (direct). These packets then go directly thru. If we still
1da177e4
LT
181 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
182 * then finish and return direct queue.
183 */
184#define HTB_DIRECT (struct htb_class*)-1
1da177e4 185
87990467
SH
186static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
187 int *qerr)
1da177e4
LT
188{
189 struct htb_sched *q = qdisc_priv(sch);
190 struct htb_class *cl;
191 struct tcf_result res;
192 struct tcf_proto *tcf;
193 int result;
194
195 /* allow to select class by setting skb->priority to valid classid;
196 note that nfmark can be used too by attaching filter fw with no
197 rules in it */
198 if (skb->priority == sch->handle)
87990467
SH
199 return HTB_DIRECT; /* X:0 (direct flow) selected */
200 if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
1da177e4
LT
201 return cl;
202
c27f339a 203 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1da177e4
LT
204 tcf = q->filter_list;
205 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
206#ifdef CONFIG_NET_CLS_ACT
207 switch (result) {
208 case TC_ACT_QUEUED:
87990467 209 case TC_ACT_STOLEN:
378a2f09 210 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
1da177e4
LT
211 case TC_ACT_SHOT:
212 return NULL;
213 }
1da177e4 214#endif
87990467 215 if ((cl = (void *)res.class) == NULL) {
1da177e4 216 if (res.classid == sch->handle)
87990467
SH
217 return HTB_DIRECT; /* X:0 (direct flow) */
218 if ((cl = htb_find(res.classid, sch)) == NULL)
219 break; /* filter selected invalid classid */
1da177e4
LT
220 }
221 if (!cl->level)
87990467 222 return cl; /* we hit leaf; return it */
1da177e4
LT
223
224 /* we have got inner class; apply inner filter chain */
225 tcf = cl->filter_list;
226 }
227 /* classification failed; try to use default class */
87990467 228 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
1da177e4 229 if (!cl || cl->level)
87990467 230 return HTB_DIRECT; /* bad default .. this is safe bet */
1da177e4
LT
231 return cl;
232}
233
1da177e4
LT
234/**
235 * htb_add_to_id_tree - adds class to the round robin list
236 *
237 * Routine adds class to the list (actually tree) sorted by classid.
238 * Make sure that class is not already on such list for given prio.
239 */
87990467
SH
240static void htb_add_to_id_tree(struct rb_root *root,
241 struct htb_class *cl, int prio)
1da177e4
LT
242{
243 struct rb_node **p = &root->rb_node, *parent = NULL;
3bf72957 244
1da177e4 245 while (*p) {
87990467
SH
246 struct htb_class *c;
247 parent = *p;
1da177e4 248 c = rb_entry(parent, struct htb_class, node[prio]);
3bf72957 249
f4c1f3e0 250 if (cl->common.classid > c->common.classid)
1da177e4 251 p = &parent->rb_right;
87990467 252 else
1da177e4
LT
253 p = &parent->rb_left;
254 }
255 rb_link_node(&cl->node[prio], parent, p);
256 rb_insert_color(&cl->node[prio], root);
257}
258
259/**
260 * htb_add_to_wait_tree - adds class to the event queue with delay
261 *
262 * The class is added to priority event queue to indicate that class will
263 * change its mode in cl->pq_key microseconds. Make sure that class is not
264 * already in the queue.
265 */
87990467
SH
266static void htb_add_to_wait_tree(struct htb_sched *q,
267 struct htb_class *cl, long delay)
1da177e4
LT
268{
269 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
3bf72957 270
fb983d45
PM
271 cl->pq_key = q->now + delay;
272 if (cl->pq_key == q->now)
1da177e4
LT
273 cl->pq_key++;
274
275 /* update the nearest event cache */
fb983d45 276 if (q->near_ev_cache[cl->level] > cl->pq_key)
1da177e4 277 q->near_ev_cache[cl->level] = cl->pq_key;
87990467 278
1da177e4 279 while (*p) {
87990467
SH
280 struct htb_class *c;
281 parent = *p;
1da177e4 282 c = rb_entry(parent, struct htb_class, pq_node);
fb983d45 283 if (cl->pq_key >= c->pq_key)
1da177e4 284 p = &parent->rb_right;
87990467 285 else
1da177e4
LT
286 p = &parent->rb_left;
287 }
288 rb_link_node(&cl->pq_node, parent, p);
289 rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
290}
291
292/**
293 * htb_next_rb_node - finds next node in binary tree
294 *
295 * When we are past last key we return NULL.
296 * Average complexity is 2 steps per call.
297 */
3696f625 298static inline void htb_next_rb_node(struct rb_node **n)
1da177e4
LT
299{
300 *n = rb_next(*n);
301}
302
303/**
304 * htb_add_class_to_row - add class to its row
305 *
306 * The class is added to row at priorities marked in mask.
307 * It does nothing if mask == 0.
308 */
87990467
SH
309static inline void htb_add_class_to_row(struct htb_sched *q,
310 struct htb_class *cl, int mask)
1da177e4 311{
1da177e4
LT
312 q->row_mask[cl->level] |= mask;
313 while (mask) {
314 int prio = ffz(~mask);
315 mask &= ~(1 << prio);
87990467 316 htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
1da177e4
LT
317 }
318}
319
3696f625
SH
320/* If this triggers, it is a bug in this code, but it need not be fatal */
321static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
322{
81771b3b 323 if (RB_EMPTY_NODE(rb)) {
3696f625
SH
324 WARN_ON(1);
325 } else {
326 rb_erase(rb, root);
327 RB_CLEAR_NODE(rb);
328 }
329}
330
331
1da177e4
LT
332/**
333 * htb_remove_class_from_row - removes class from its row
334 *
335 * The class is removed from row at priorities marked in mask.
336 * It does nothing if mask == 0.
337 */
87990467
SH
338static inline void htb_remove_class_from_row(struct htb_sched *q,
339 struct htb_class *cl, int mask)
1da177e4
LT
340{
341 int m = 0;
3bf72957 342
1da177e4
LT
343 while (mask) {
344 int prio = ffz(~mask);
3696f625 345
1da177e4 346 mask &= ~(1 << prio);
87990467
SH
347 if (q->ptr[cl->level][prio] == cl->node + prio)
348 htb_next_rb_node(q->ptr[cl->level] + prio);
3696f625
SH
349
350 htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
87990467 351 if (!q->row[cl->level][prio].rb_node)
1da177e4
LT
352 m |= 1 << prio;
353 }
1da177e4
LT
354 q->row_mask[cl->level] &= ~m;
355}
356
357/**
358 * htb_activate_prios - creates active classe's feed chain
359 *
360 * The class is connected to ancestors and/or appropriate rows
10297b99 361 * for priorities it is participating on. cl->cmode must be new
1da177e4
LT
362 * (activated) mode. It does nothing if cl->prio_activity == 0.
363 */
87990467 364static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
1da177e4
LT
365{
366 struct htb_class *p = cl->parent;
87990467 367 long m, mask = cl->prio_activity;
1da177e4
LT
368
369 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
370 m = mask;
371 while (m) {
1da177e4
LT
372 int prio = ffz(~m);
373 m &= ~(1 << prio);
87990467 374
1da177e4
LT
375 if (p->un.inner.feed[prio].rb_node)
376 /* parent already has its feed in use so that
377 reset bit in mask as parent is already ok */
378 mask &= ~(1 << prio);
87990467
SH
379
380 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
1da177e4 381 }
1da177e4 382 p->prio_activity |= mask;
87990467
SH
383 cl = p;
384 p = cl->parent;
3bf72957 385
1da177e4
LT
386 }
387 if (cl->cmode == HTB_CAN_SEND && mask)
87990467 388 htb_add_class_to_row(q, cl, mask);
1da177e4
LT
389}
390
391/**
392 * htb_deactivate_prios - remove class from feed chain
393 *
10297b99 394 * cl->cmode must represent old mode (before deactivation). It does
1da177e4
LT
395 * nothing if cl->prio_activity == 0. Class is removed from all feed
396 * chains and rows.
397 */
398static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
399{
400 struct htb_class *p = cl->parent;
87990467 401 long m, mask = cl->prio_activity;
1da177e4
LT
402
403 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
404 m = mask;
405 mask = 0;
1da177e4
LT
406 while (m) {
407 int prio = ffz(~m);
408 m &= ~(1 << prio);
87990467
SH
409
410 if (p->un.inner.ptr[prio] == cl->node + prio) {
1da177e4
LT
411 /* we are removing child which is pointed to from
412 parent feed - forget the pointer but remember
413 classid */
f4c1f3e0 414 p->un.inner.last_ptr_id[prio] = cl->common.classid;
1da177e4
LT
415 p->un.inner.ptr[prio] = NULL;
416 }
87990467 417
3696f625 418 htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
87990467
SH
419
420 if (!p->un.inner.feed[prio].rb_node)
1da177e4
LT
421 mask |= 1 << prio;
422 }
3bf72957 423
1da177e4 424 p->prio_activity &= ~mask;
87990467
SH
425 cl = p;
426 p = cl->parent;
3bf72957 427
1da177e4 428 }
87990467
SH
429 if (cl->cmode == HTB_CAN_SEND && mask)
430 htb_remove_class_from_row(q, cl, mask);
1da177e4
LT
431}
432
18a63e86
SH
433static inline long htb_lowater(const struct htb_class *cl)
434{
47083fc0
JDB
435 if (htb_hysteresis)
436 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
437 else
438 return 0;
18a63e86
SH
439}
440static inline long htb_hiwater(const struct htb_class *cl)
441{
47083fc0
JDB
442 if (htb_hysteresis)
443 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
444 else
445 return 0;
18a63e86 446}
47083fc0 447
18a63e86 448
1da177e4
LT
449/**
450 * htb_class_mode - computes and returns current class mode
451 *
452 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
453 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
10297b99 454 * from now to time when cl will change its state.
1da177e4 455 * Also it is worth to note that class mode doesn't change simply
10297b99 456 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
1da177e4
LT
457 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
458 * mode transitions per time unit. The speed gain is about 1/6.
459 */
87990467
SH
460static inline enum htb_cmode
461htb_class_mode(struct htb_class *cl, long *diff)
1da177e4 462{
87990467 463 long toks;
1da177e4 464
87990467
SH
465 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
466 *diff = -toks;
467 return HTB_CANT_SEND;
468 }
18a63e86 469
87990467
SH
470 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
471 return HTB_CAN_SEND;
1da177e4 472
87990467
SH
473 *diff = -toks;
474 return HTB_MAY_BORROW;
1da177e4
LT
475}
476
477/**
478 * htb_change_class_mode - changes classe's mode
479 *
480 * This should be the only way how to change classe's mode under normal
481 * cirsumstances. Routine will update feed lists linkage, change mode
482 * and add class to the wait event queue if appropriate. New mode should
483 * be different from old one and cl->pq_key has to be valid if changing
484 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
485 */
87990467 486static void
1da177e4 487htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
87990467
SH
488{
489 enum htb_cmode new_mode = htb_class_mode(cl, diff);
1da177e4
LT
490
491 if (new_mode == cl->cmode)
87990467
SH
492 return;
493
494 if (cl->prio_activity) { /* not necessary: speed optimization */
495 if (cl->cmode != HTB_CANT_SEND)
496 htb_deactivate_prios(q, cl);
1da177e4 497 cl->cmode = new_mode;
87990467
SH
498 if (new_mode != HTB_CANT_SEND)
499 htb_activate_prios(q, cl);
500 } else
1da177e4
LT
501 cl->cmode = new_mode;
502}
503
504/**
10297b99 505 * htb_activate - inserts leaf cl into appropriate active feeds
1da177e4
LT
506 *
507 * Routine learns (new) priority of leaf and activates feed chain
508 * for the prio. It can be called on already active leaf safely.
509 * It also adds leaf into droplist.
510 */
87990467 511static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
1da177e4 512{
547b792c 513 WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
3bf72957 514
1da177e4 515 if (!cl->prio_activity) {
c19f7a34 516 cl->prio_activity = 1 << cl->prio;
87990467
SH
517 htb_activate_prios(q, cl);
518 list_add_tail(&cl->un.leaf.drop_list,
c19f7a34 519 q->drops + cl->prio);
1da177e4
LT
520 }
521}
522
523/**
10297b99 524 * htb_deactivate - remove leaf cl from active feeds
1da177e4
LT
525 *
526 * Make sure that leaf is active. In the other words it can't be called
527 * with non-active leaf. It also removes class from the drop list.
528 */
87990467 529static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
1da177e4 530{
547b792c 531 WARN_ON(!cl->prio_activity);
3bf72957 532
87990467 533 htb_deactivate_prios(q, cl);
1da177e4
LT
534 cl->prio_activity = 0;
535 list_del_init(&cl->un.leaf.drop_list);
536}
537
538static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
539{
f30ab418 540 int uninitialized_var(ret);
87990467
SH
541 struct htb_sched *q = qdisc_priv(sch);
542 struct htb_class *cl = htb_classify(skb, sch, &ret);
543
544 if (cl == HTB_DIRECT) {
545 /* enqueue to helper queue */
546 if (q->direct_queue.qlen < q->direct_qlen) {
547 __skb_queue_tail(&q->direct_queue, skb);
548 q->direct_pkts++;
549 } else {
550 kfree_skb(skb);
551 sch->qstats.drops++;
552 return NET_XMIT_DROP;
553 }
1da177e4 554#ifdef CONFIG_NET_CLS_ACT
87990467 555 } else if (!cl) {
c27f339a 556 if (ret & __NET_XMIT_BYPASS)
87990467
SH
557 sch->qstats.drops++;
558 kfree_skb(skb);
559 return ret;
1da177e4 560#endif
378a2f09
JP
561 } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
562 if (net_xmit_drop_count(ret)) {
563 sch->qstats.drops++;
564 cl->qstats.drops++;
565 }
69747650 566 return ret;
87990467 567 } else {
c9726d68
RM
568 cl->bstats.packets +=
569 skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
0abf77e5 570 cl->bstats.bytes += qdisc_pkt_len(skb);
87990467
SH
571 htb_activate(q, cl);
572 }
573
574 sch->q.qlen++;
c9726d68 575 sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
0abf77e5 576 sch->bstats.bytes += qdisc_pkt_len(skb);
87990467 577 return NET_XMIT_SUCCESS;
1da177e4
LT
578}
579
59e4220a
JP
580static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, long diff)
581{
582 long toks = diff + cl->tokens;
583
584 if (toks > cl->buffer)
585 toks = cl->buffer;
586 toks -= (long) qdisc_l2t(cl->rate, bytes);
587 if (toks <= -cl->mbuffer)
588 toks = 1 - cl->mbuffer;
589
590 cl->tokens = toks;
591}
592
593static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, long diff)
594{
595 long toks = diff + cl->ctokens;
596
597 if (toks > cl->cbuffer)
598 toks = cl->cbuffer;
599 toks -= (long) qdisc_l2t(cl->ceil, bytes);
600 if (toks <= -cl->mbuffer)
601 toks = 1 - cl->mbuffer;
602
603 cl->ctokens = toks;
604}
605
1da177e4
LT
606/**
607 * htb_charge_class - charges amount "bytes" to leaf and ancestors
608 *
609 * Routine assumes that packet "bytes" long was dequeued from leaf cl
610 * borrowing from "level". It accounts bytes to ceil leaky bucket for
611 * leaf and all ancestors and to rate bucket for ancestors at levels
612 * "level" and higher. It also handles possible change of mode resulting
613 * from the update. Note that mode can also increase here (MAY_BORROW to
614 * CAN_SEND) because we can use more precise clock that event queue here.
615 * In such case we remove class from event queue first.
616 */
87990467 617static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
c9726d68 618 int level, struct sk_buff *skb)
87990467 619{
0abf77e5 620 int bytes = qdisc_pkt_len(skb);
1da177e4 621 enum htb_cmode old_mode;
59e4220a 622 long diff;
1da177e4
LT
623
624 while (cl) {
03cc45c0 625 diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
1da177e4 626 if (cl->level >= level) {
87990467
SH
627 if (cl->level == level)
628 cl->xstats.lends++;
59e4220a 629 htb_accnt_tokens(cl, bytes, diff);
1da177e4
LT
630 } else {
631 cl->xstats.borrows++;
87990467 632 cl->tokens += diff; /* we moved t_c; update tokens */
1da177e4 633 }
59e4220a 634 htb_accnt_ctokens(cl, bytes, diff);
1da177e4 635 cl->t_c = q->now;
1da177e4 636
87990467
SH
637 old_mode = cl->cmode;
638 diff = 0;
639 htb_change_class_mode(q, cl, &diff);
1da177e4
LT
640 if (old_mode != cl->cmode) {
641 if (old_mode != HTB_CAN_SEND)
3696f625 642 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1da177e4 643 if (cl->cmode != HTB_CAN_SEND)
87990467 644 htb_add_to_wait_tree(q, cl, diff);
1da177e4 645 }
1da177e4
LT
646
647 /* update byte stats except for leaves which are already updated */
648 if (cl->level) {
649 cl->bstats.bytes += bytes;
c9726d68
RM
650 cl->bstats.packets += skb_is_gso(skb)?
651 skb_shinfo(skb)->gso_segs:1;
1da177e4
LT
652 }
653 cl = cl->parent;
654 }
655}
656
657/**
658 * htb_do_events - make mode changes to classes at the level
659 *
fb983d45 660 * Scans event queue for pending events and applies them. Returns time of
1da177e4 661 * next pending event (0 for no event in pq).
fb983d45 662 * Note: Applied are events whose have cl->pq_key <= q->now.
1da177e4 663 */
fb983d45 664static psched_time_t htb_do_events(struct htb_sched *q, int level)
1da177e4 665{
8f3ea33a
MD
666 /* don't run for longer than 2 jiffies; 2 is used instead of
667 1 to simplify things when jiffy is going to be incremented
668 too soon */
669 unsigned long stop_at = jiffies + 2;
670 while (time_before(jiffies, stop_at)) {
1da177e4
LT
671 struct htb_class *cl;
672 long diff;
30bdbe39
AM
673 struct rb_node *p = rb_first(&q->wait_pq[level]);
674
87990467
SH
675 if (!p)
676 return 0;
1da177e4
LT
677
678 cl = rb_entry(p, struct htb_class, pq_node);
fb983d45
PM
679 if (cl->pq_key > q->now)
680 return cl->pq_key;
681
3696f625 682 htb_safe_rb_erase(p, q->wait_pq + level);
03cc45c0 683 diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
87990467 684 htb_change_class_mode(q, cl, &diff);
1da177e4 685 if (cl->cmode != HTB_CAN_SEND)
87990467 686 htb_add_to_wait_tree(q, cl, diff);
1da177e4 687 }
8f3ea33a
MD
688 /* too much load - let's continue on next jiffie */
689 return q->now + PSCHED_TICKS_PER_SEC / HZ;
1da177e4
LT
690}
691
692/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
693 is no such one exists. */
87990467
SH
694static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
695 u32 id)
1da177e4
LT
696{
697 struct rb_node *r = NULL;
698 while (n) {
87990467
SH
699 struct htb_class *cl =
700 rb_entry(n, struct htb_class, node[prio]);
87990467 701
f4c1f3e0 702 if (id > cl->common.classid) {
1da177e4 703 n = n->rb_right;
1b5c0077 704 } else if (id < cl->common.classid) {
1da177e4
LT
705 r = n;
706 n = n->rb_left;
1b5c0077
JP
707 } else {
708 return n;
1da177e4
LT
709 }
710 }
711 return r;
712}
713
714/**
715 * htb_lookup_leaf - returns next leaf class in DRR order
716 *
717 * Find leaf where current feed pointers points to.
718 */
87990467
SH
719static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
720 struct rb_node **pptr, u32 * pid)
1da177e4
LT
721{
722 int i;
723 struct {
724 struct rb_node *root;
725 struct rb_node **pptr;
726 u32 *pid;
87990467
SH
727 } stk[TC_HTB_MAXDEPTH], *sp = stk;
728
512bb43e 729 BUG_ON(!tree->rb_node);
1da177e4
LT
730 sp->root = tree->rb_node;
731 sp->pptr = pptr;
732 sp->pid = pid;
733
734 for (i = 0; i < 65535; i++) {
87990467 735 if (!*sp->pptr && *sp->pid) {
10297b99 736 /* ptr was invalidated but id is valid - try to recover
1da177e4 737 the original or next ptr */
87990467
SH
738 *sp->pptr =
739 htb_id_find_next_upper(prio, sp->root, *sp->pid);
1da177e4 740 }
87990467
SH
741 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
742 can become out of date quickly */
743 if (!*sp->pptr) { /* we are at right end; rewind & go up */
1da177e4 744 *sp->pptr = sp->root;
87990467 745 while ((*sp->pptr)->rb_left)
1da177e4
LT
746 *sp->pptr = (*sp->pptr)->rb_left;
747 if (sp > stk) {
748 sp--;
512bb43e
JP
749 if (!*sp->pptr) {
750 WARN_ON(1);
87990467 751 return NULL;
512bb43e 752 }
87990467 753 htb_next_rb_node(sp->pptr);
1da177e4
LT
754 }
755 } else {
756 struct htb_class *cl;
87990467
SH
757 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
758 if (!cl->level)
1da177e4
LT
759 return cl;
760 (++sp)->root = cl->un.inner.feed[prio].rb_node;
87990467
SH
761 sp->pptr = cl->un.inner.ptr + prio;
762 sp->pid = cl->un.inner.last_ptr_id + prio;
1da177e4
LT
763 }
764 }
547b792c 765 WARN_ON(1);
1da177e4
LT
766 return NULL;
767}
768
769/* dequeues packet at given priority and level; call only if
770 you are sure that there is active class at prio/level */
87990467
SH
771static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
772 int level)
1da177e4
LT
773{
774 struct sk_buff *skb = NULL;
87990467 775 struct htb_class *cl, *start;
1da177e4 776 /* look initial class up in the row */
87990467
SH
777 start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
778 q->ptr[level] + prio,
779 q->last_ptr_id[level] + prio);
780
1da177e4
LT
781 do {
782next:
512bb43e 783 if (unlikely(!cl))
87990467 784 return NULL;
1da177e4
LT
785
786 /* class can be empty - it is unlikely but can be true if leaf
787 qdisc drops packets in enqueue routine or if someone used
10297b99 788 graft operation on the leaf since last dequeue;
1da177e4
LT
789 simply deactivate and skip such class */
790 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
791 struct htb_class *next;
87990467 792 htb_deactivate(q, cl);
1da177e4
LT
793
794 /* row/level might become empty */
795 if ((q->row_mask[level] & (1 << prio)) == 0)
87990467 796 return NULL;
1da177e4 797
87990467
SH
798 next = htb_lookup_leaf(q->row[level] + prio,
799 prio, q->ptr[level] + prio,
800 q->last_ptr_id[level] + prio);
801
802 if (cl == start) /* fix start if we just deleted it */
1da177e4
LT
803 start = next;
804 cl = next;
805 goto next;
806 }
87990467
SH
807
808 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
809 if (likely(skb != NULL))
1da177e4
LT
810 break;
811 if (!cl->warned) {
87990467
SH
812 printk(KERN_WARNING
813 "htb: class %X isn't work conserving ?!\n",
f4c1f3e0 814 cl->common.classid);
1da177e4
LT
815 cl->warned = 1;
816 }
633fe66e 817
87990467
SH
818 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
819 ptr[0]) + prio);
820 cl = htb_lookup_leaf(q->row[level] + prio, prio,
821 q->ptr[level] + prio,
822 q->last_ptr_id[level] + prio);
1da177e4
LT
823
824 } while (cl != start);
825
826 if (likely(skb != NULL)) {
0abf77e5
JK
827 cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
828 if (cl->un.leaf.deficit[level] < 0) {
c19f7a34 829 cl->un.leaf.deficit[level] += cl->quantum;
87990467
SH
830 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
831 ptr[0]) + prio);
1da177e4
LT
832 }
833 /* this used to be after charge_class but this constelation
834 gives us slightly better performance */
835 if (!cl->un.leaf.q->q.qlen)
87990467 836 htb_deactivate(q, cl);
c9726d68 837 htb_charge_class(q, cl, level, skb);
1da177e4
LT
838 }
839 return skb;
840}
841
1da177e4
LT
842static struct sk_buff *htb_dequeue(struct Qdisc *sch)
843{
844 struct sk_buff *skb = NULL;
845 struct htb_sched *q = qdisc_priv(sch);
846 int level;
fb983d45 847 psched_time_t next_event;
1da177e4
LT
848
849 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
87990467
SH
850 skb = __skb_dequeue(&q->direct_queue);
851 if (skb != NULL) {
1da177e4
LT
852 sch->flags &= ~TCQ_F_THROTTLED;
853 sch->q.qlen--;
854 return skb;
855 }
856
87990467
SH
857 if (!sch->q.qlen)
858 goto fin;
3bebcda2 859 q->now = psched_get_time();
1da177e4 860
fb983d45 861 next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
633fe66e 862
1da177e4
LT
863 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
864 /* common case optimization - skip event handler quickly */
865 int m;
fb983d45
PM
866 psched_time_t event;
867
868 if (q->now >= q->near_ev_cache[level]) {
869 event = htb_do_events(q, level);
2e4b3b0e
PM
870 if (!event)
871 event = q->now + PSCHED_TICKS_PER_SEC;
872 q->near_ev_cache[level] = event;
1da177e4 873 } else
fb983d45
PM
874 event = q->near_ev_cache[level];
875
876 if (event && next_event > event)
877 next_event = event;
87990467 878
1da177e4
LT
879 m = ~q->row_mask[level];
880 while (m != (int)(-1)) {
87990467 881 int prio = ffz(m);
1da177e4 882 m |= 1 << prio;
87990467 883 skb = htb_dequeue_tree(q, prio, level);
1da177e4
LT
884 if (likely(skb != NULL)) {
885 sch->q.qlen--;
886 sch->flags &= ~TCQ_F_THROTTLED;
887 goto fin;
888 }
889 }
890 }
fb983d45
PM
891 sch->qstats.overlimits++;
892 qdisc_watchdog_schedule(&q->watchdog, next_event);
1da177e4 893fin:
1da177e4
LT
894 return skb;
895}
896
897/* try to drop from each class (by prio) until one succeed */
87990467 898static unsigned int htb_drop(struct Qdisc *sch)
1da177e4
LT
899{
900 struct htb_sched *q = qdisc_priv(sch);
901 int prio;
902
903 for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
904 struct list_head *p;
87990467 905 list_for_each(p, q->drops + prio) {
1da177e4
LT
906 struct htb_class *cl = list_entry(p, struct htb_class,
907 un.leaf.drop_list);
908 unsigned int len;
87990467
SH
909 if (cl->un.leaf.q->ops->drop &&
910 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
1da177e4
LT
911 sch->q.qlen--;
912 if (!cl->un.leaf.q->q.qlen)
87990467 913 htb_deactivate(q, cl);
1da177e4
LT
914 return len;
915 }
916 }
917 }
918 return 0;
919}
920
921/* reset all classes */
922/* always caled under BH & queue lock */
87990467 923static void htb_reset(struct Qdisc *sch)
1da177e4
LT
924{
925 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0
PM
926 struct htb_class *cl;
927 struct hlist_node *n;
928 unsigned int i;
0cef296d 929
f4c1f3e0
PM
930 for (i = 0; i < q->clhash.hashsize; i++) {
931 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
1da177e4 932 if (cl->level)
87990467 933 memset(&cl->un.inner, 0, sizeof(cl->un.inner));
1da177e4 934 else {
87990467 935 if (cl->un.leaf.q)
1da177e4
LT
936 qdisc_reset(cl->un.leaf.q);
937 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
938 }
939 cl->prio_activity = 0;
940 cl->cmode = HTB_CAN_SEND;
1da177e4
LT
941
942 }
943 }
fb983d45 944 qdisc_watchdog_cancel(&q->watchdog);
1da177e4
LT
945 __skb_queue_purge(&q->direct_queue);
946 sch->q.qlen = 0;
87990467
SH
947 memset(q->row, 0, sizeof(q->row));
948 memset(q->row_mask, 0, sizeof(q->row_mask));
949 memset(q->wait_pq, 0, sizeof(q->wait_pq));
950 memset(q->ptr, 0, sizeof(q->ptr));
1da177e4 951 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 952 INIT_LIST_HEAD(q->drops + i);
1da177e4
LT
953}
954
27a3421e
PM
955static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
956 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
957 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
958 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
959 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
960};
961
1e90474c 962static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
963{
964 struct htb_sched *q = qdisc_priv(sch);
1e90474c 965 struct nlattr *tb[TCA_HTB_INIT + 1];
1da177e4 966 struct tc_htb_glob *gopt;
cee63723 967 int err;
1da177e4 968 int i;
cee63723
PM
969
970 if (!opt)
971 return -EINVAL;
972
27a3421e 973 err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
cee63723
PM
974 if (err < 0)
975 return err;
976
27a3421e 977 if (tb[TCA_HTB_INIT] == NULL) {
1da177e4
LT
978 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
979 return -EINVAL;
980 }
1e90474c 981 gopt = nla_data(tb[TCA_HTB_INIT]);
1da177e4 982 if (gopt->version != HTB_VER >> 16) {
87990467
SH
983 printk(KERN_ERR
984 "HTB: need tc/htb version %d (minor is %d), you have %d\n",
985 HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
1da177e4
LT
986 return -EINVAL;
987 }
1da177e4 988
f4c1f3e0
PM
989 err = qdisc_class_hash_init(&q->clhash);
990 if (err < 0)
991 return err;
1da177e4 992 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 993 INIT_LIST_HEAD(q->drops + i);
1da177e4 994
fb983d45 995 qdisc_watchdog_init(&q->watchdog, sch);
1da177e4
LT
996 skb_queue_head_init(&q->direct_queue);
997
5ce2d488 998 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
87990467 999 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1da177e4 1000 q->direct_qlen = 2;
1da177e4 1001
1da177e4
LT
1002 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1003 q->rate2quantum = 1;
1004 q->defcls = gopt->defcls;
1005
1006 return 0;
1007}
1008
1009static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1010{
102396ae 1011 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1da177e4 1012 struct htb_sched *q = qdisc_priv(sch);
4b3550ef 1013 struct nlattr *nest;
1da177e4 1014 struct tc_htb_glob gopt;
4b3550ef 1015
7698b4fc 1016 spin_lock_bh(root_lock);
1da177e4 1017
4b3550ef 1018 gopt.direct_pkts = q->direct_pkts;
1da177e4
LT
1019 gopt.version = HTB_VER;
1020 gopt.rate2quantum = q->rate2quantum;
1021 gopt.defcls = q->defcls;
3bf72957 1022 gopt.debug = 0;
4b3550ef
PM
1023
1024 nest = nla_nest_start(skb, TCA_OPTIONS);
1025 if (nest == NULL)
1026 goto nla_put_failure;
1e90474c 1027 NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
4b3550ef
PM
1028 nla_nest_end(skb, nest);
1029
7698b4fc 1030 spin_unlock_bh(root_lock);
1da177e4 1031 return skb->len;
4b3550ef 1032
1e90474c 1033nla_put_failure:
7698b4fc 1034 spin_unlock_bh(root_lock);
4b3550ef 1035 nla_nest_cancel(skb, nest);
1da177e4
LT
1036 return -1;
1037}
1038
1039static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
87990467 1040 struct sk_buff *skb, struct tcmsg *tcm)
1da177e4 1041{
87990467 1042 struct htb_class *cl = (struct htb_class *)arg;
102396ae 1043 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
4b3550ef 1044 struct nlattr *nest;
1da177e4
LT
1045 struct tc_htb_opt opt;
1046
7698b4fc 1047 spin_lock_bh(root_lock);
f4c1f3e0
PM
1048 tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1049 tcm->tcm_handle = cl->common.classid;
1da177e4
LT
1050 if (!cl->level && cl->un.leaf.q)
1051 tcm->tcm_info = cl->un.leaf.q->handle;
1052
4b3550ef
PM
1053 nest = nla_nest_start(skb, TCA_OPTIONS);
1054 if (nest == NULL)
1055 goto nla_put_failure;
1da177e4 1056
87990467 1057 memset(&opt, 0, sizeof(opt));
1da177e4 1058
87990467
SH
1059 opt.rate = cl->rate->rate;
1060 opt.buffer = cl->buffer;
1061 opt.ceil = cl->ceil->rate;
1062 opt.cbuffer = cl->cbuffer;
c19f7a34
JP
1063 opt.quantum = cl->quantum;
1064 opt.prio = cl->prio;
87990467 1065 opt.level = cl->level;
1e90474c 1066 NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
4b3550ef
PM
1067
1068 nla_nest_end(skb, nest);
7698b4fc 1069 spin_unlock_bh(root_lock);
1da177e4 1070 return skb->len;
4b3550ef 1071
1e90474c 1072nla_put_failure:
7698b4fc 1073 spin_unlock_bh(root_lock);
4b3550ef 1074 nla_nest_cancel(skb, nest);
1da177e4
LT
1075 return -1;
1076}
1077
1078static int
87990467 1079htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1da177e4 1080{
87990467 1081 struct htb_class *cl = (struct htb_class *)arg;
1da177e4 1082
1da177e4
LT
1083 if (!cl->level && cl->un.leaf.q)
1084 cl->qstats.qlen = cl->un.leaf.q->q.qlen;
1085 cl->xstats.tokens = cl->tokens;
1086 cl->xstats.ctokens = cl->ctokens;
1087
1088 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1089 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1090 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1091 return -1;
1092
1093 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1094}
1095
1096static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
87990467 1097 struct Qdisc **old)
1da177e4 1098{
87990467 1099 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1100
1101 if (cl && !cl->level) {
9f9afec4 1102 if (new == NULL &&
5ce2d488 1103 (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd 1104 &pfifo_qdisc_ops,
f4c1f3e0 1105 cl->common.classid))
87990467
SH
1106 == NULL)
1107 return -ENOBUFS;
1da177e4 1108 sch_tree_lock(sch);
b94c8afc
PM
1109 *old = cl->un.leaf.q;
1110 cl->un.leaf.q = new;
1111 if (*old != NULL) {
256d61b8 1112 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1da177e4
LT
1113 qdisc_reset(*old);
1114 }
1115 sch_tree_unlock(sch);
1116 return 0;
1117 }
1118 return -ENOENT;
1119}
1120
87990467 1121static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1da177e4 1122{
87990467 1123 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1124 return (cl && !cl->level) ? cl->un.leaf.q : NULL;
1125}
1126
256d61b8
PM
1127static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1128{
1129 struct htb_class *cl = (struct htb_class *)arg;
1130
1131 if (cl->un.leaf.q->q.qlen == 0)
1132 htb_deactivate(qdisc_priv(sch), cl);
1133}
1134
1da177e4
LT
1135static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1136{
87990467
SH
1137 struct htb_class *cl = htb_find(classid, sch);
1138 if (cl)
1da177e4
LT
1139 cl->refcnt++;
1140 return (unsigned long)cl;
1141}
1142
160d5e10
JP
1143static inline int htb_parent_last_child(struct htb_class *cl)
1144{
1145 if (!cl->parent)
1146 /* the root class */
1147 return 0;
42077599 1148 if (cl->parent->children > 1)
160d5e10
JP
1149 /* not the last child */
1150 return 0;
160d5e10
JP
1151 return 1;
1152}
1153
3ba08b00
JP
1154static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1155 struct Qdisc *new_q)
160d5e10
JP
1156{
1157 struct htb_class *parent = cl->parent;
1158
547b792c 1159 WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
160d5e10 1160
3ba08b00
JP
1161 if (parent->cmode != HTB_CAN_SEND)
1162 htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
1163
160d5e10
JP
1164 parent->level = 0;
1165 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1166 INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1167 parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
160d5e10
JP
1168 parent->tokens = parent->buffer;
1169 parent->ctokens = parent->cbuffer;
3bebcda2 1170 parent->t_c = psched_get_time();
160d5e10
JP
1171 parent->cmode = HTB_CAN_SEND;
1172}
1173
87990467 1174static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1da177e4 1175{
1da177e4 1176 if (!cl->level) {
547b792c 1177 WARN_ON(!cl->un.leaf.q);
1da177e4
LT
1178 qdisc_destroy(cl->un.leaf.q);
1179 }
ee39e10c 1180 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1da177e4
LT
1181 qdisc_put_rtab(cl->rate);
1182 qdisc_put_rtab(cl->ceil);
87990467 1183
ff31ab56 1184 tcf_destroy_chain(&cl->filter_list);
1da177e4
LT
1185 kfree(cl);
1186}
1187
1188/* always caled under BH & queue lock */
87990467 1189static void htb_destroy(struct Qdisc *sch)
1da177e4
LT
1190{
1191 struct htb_sched *q = qdisc_priv(sch);
fbd8f137
PM
1192 struct hlist_node *n, *next;
1193 struct htb_class *cl;
1194 unsigned int i;
1da177e4 1195
fb983d45 1196 qdisc_watchdog_cancel(&q->watchdog);
1da177e4 1197 /* This line used to be after htb_destroy_class call below
10297b99 1198 and surprisingly it worked in 2.4. But it must precede it
1da177e4
LT
1199 because filter need its target class alive to be able to call
1200 unbind_filter on it (without Oops). */
ff31ab56 1201 tcf_destroy_chain(&q->filter_list);
87990467 1202
f4c1f3e0
PM
1203 for (i = 0; i < q->clhash.hashsize; i++) {
1204 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
fbd8f137
PM
1205 tcf_destroy_chain(&cl->filter_list);
1206 }
f4c1f3e0
PM
1207 for (i = 0; i < q->clhash.hashsize; i++) {
1208 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
1209 common.hnode)
fbd8f137
PM
1210 htb_destroy_class(sch, cl);
1211 }
f4c1f3e0 1212 qdisc_class_hash_destroy(&q->clhash);
1da177e4
LT
1213 __skb_queue_purge(&q->direct_queue);
1214}
1215
1216static int htb_delete(struct Qdisc *sch, unsigned long arg)
1217{
1218 struct htb_sched *q = qdisc_priv(sch);
87990467 1219 struct htb_class *cl = (struct htb_class *)arg;
256d61b8 1220 unsigned int qlen;
160d5e10
JP
1221 struct Qdisc *new_q = NULL;
1222 int last_child = 0;
1da177e4
LT
1223
1224 // TODO: why don't allow to delete subtree ? references ? does
1225 // tc subsys quarantee us that in htb_destroy it holds no class
1226 // refs so that we can remove children safely there ?
42077599 1227 if (cl->children || cl->filter_cnt)
1da177e4 1228 return -EBUSY;
87990467 1229
160d5e10 1230 if (!cl->level && htb_parent_last_child(cl)) {
5ce2d488 1231 new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd
DM
1232 &pfifo_qdisc_ops,
1233 cl->parent->common.classid);
160d5e10
JP
1234 last_child = 1;
1235 }
1236
1da177e4 1237 sch_tree_lock(sch);
87990467 1238
814a175e 1239 if (!cl->level) {
256d61b8 1240 qlen = cl->un.leaf.q->q.qlen;
814a175e 1241 qdisc_reset(cl->un.leaf.q);
256d61b8 1242 qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
814a175e
PM
1243 }
1244
f4c1f3e0
PM
1245 /* delete from hash and active; remainder in destroy_class */
1246 qdisc_class_hash_remove(&q->clhash, &cl->common);
26b284de
JP
1247 if (cl->parent)
1248 cl->parent->children--;
c38c83cb 1249
1da177e4 1250 if (cl->prio_activity)
87990467 1251 htb_deactivate(q, cl);
1da177e4 1252
fbd8f137
PM
1253 if (cl->cmode != HTB_CAN_SEND)
1254 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1255
160d5e10 1256 if (last_child)
3ba08b00 1257 htb_parent_to_leaf(q, cl, new_q);
160d5e10 1258
1da177e4 1259 if (--cl->refcnt == 0)
87990467 1260 htb_destroy_class(sch, cl);
1da177e4
LT
1261
1262 sch_tree_unlock(sch);
1263 return 0;
1264}
1265
1266static void htb_put(struct Qdisc *sch, unsigned long arg)
1267{
87990467 1268 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1269
1270 if (--cl->refcnt == 0)
87990467 1271 htb_destroy_class(sch, cl);
1da177e4
LT
1272}
1273
87990467 1274static int htb_change_class(struct Qdisc *sch, u32 classid,
1e90474c 1275 u32 parentid, struct nlattr **tca,
87990467 1276 unsigned long *arg)
1da177e4
LT
1277{
1278 int err = -EINVAL;
1279 struct htb_sched *q = qdisc_priv(sch);
87990467 1280 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1e90474c 1281 struct nlattr *opt = tca[TCA_OPTIONS];
1da177e4 1282 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1e90474c 1283 struct nlattr *tb[TCA_HTB_RTAB + 1];
1da177e4
LT
1284 struct tc_htb_opt *hopt;
1285
1286 /* extract all subattrs from opt attr */
cee63723
PM
1287 if (!opt)
1288 goto failure;
1289
27a3421e 1290 err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, htb_policy);
cee63723
PM
1291 if (err < 0)
1292 goto failure;
1293
1294 err = -EINVAL;
27a3421e 1295 if (tb[TCA_HTB_PARMS] == NULL)
1da177e4 1296 goto failure;
1da177e4 1297
87990467
SH
1298 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1299
1e90474c 1300 hopt = nla_data(tb[TCA_HTB_PARMS]);
3bf72957 1301
1e90474c
PM
1302 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
1303 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
87990467
SH
1304 if (!rtab || !ctab)
1305 goto failure;
1da177e4 1306
87990467 1307 if (!cl) { /* new class */
1da177e4 1308 struct Qdisc *new_q;
3696f625 1309 int prio;
ee39e10c 1310 struct {
1e90474c 1311 struct nlattr nla;
ee39e10c
PM
1312 struct gnet_estimator opt;
1313 } est = {
1e90474c
PM
1314 .nla = {
1315 .nla_len = nla_attr_size(sizeof(est.opt)),
1316 .nla_type = TCA_RATE,
ee39e10c
PM
1317 },
1318 .opt = {
1319 /* 4s interval, 16s averaging constant */
1320 .interval = 2,
1321 .ewma_log = 2,
1322 },
1323 };
3696f625 1324
1da177e4 1325 /* check for valid classid */
87990467
SH
1326 if (!classid || TC_H_MAJ(classid ^ sch->handle)
1327 || htb_find(classid, sch))
1da177e4
LT
1328 goto failure;
1329
1330 /* check maximal depth */
1331 if (parent && parent->parent && parent->parent->level < 2) {
1332 printk(KERN_ERR "htb: tree is too deep\n");
1333 goto failure;
1334 }
1335 err = -ENOBUFS;
0da974f4 1336 if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
1da177e4 1337 goto failure;
87990467 1338
71bcb09a
SH
1339 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1340 qdisc_root_sleeping_lock(sch),
1341 tca[TCA_RATE] ? : &est.nla);
1342 if (err) {
1343 kfree(cl);
1344 goto failure;
1345 }
1346
1da177e4 1347 cl->refcnt = 1;
42077599 1348 cl->children = 0;
1da177e4 1349 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
3696f625
SH
1350 RB_CLEAR_NODE(&cl->pq_node);
1351
1352 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1353 RB_CLEAR_NODE(&cl->node[prio]);
1da177e4
LT
1354
1355 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1356 so that can't be used inside of sch_tree_lock
1357 -- thanks to Karlis Peisenieks */
5ce2d488 1358 new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd 1359 &pfifo_qdisc_ops, classid);
1da177e4
LT
1360 sch_tree_lock(sch);
1361 if (parent && !parent->level) {
256d61b8
PM
1362 unsigned int qlen = parent->un.leaf.q->q.qlen;
1363
1da177e4 1364 /* turn parent into inner node */
256d61b8
PM
1365 qdisc_reset(parent->un.leaf.q);
1366 qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
87990467
SH
1367 qdisc_destroy(parent->un.leaf.q);
1368 if (parent->prio_activity)
1369 htb_deactivate(q, parent);
1da177e4
LT
1370
1371 /* remove from evt list because of level change */
1372 if (parent->cmode != HTB_CAN_SEND) {
3696f625 1373 htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
1da177e4
LT
1374 parent->cmode = HTB_CAN_SEND;
1375 }
1376 parent->level = (parent->parent ? parent->parent->level
87990467
SH
1377 : TC_HTB_MAXDEPTH) - 1;
1378 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1da177e4
LT
1379 }
1380 /* leaf (we) needs elementary qdisc */
1381 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1382
f4c1f3e0 1383 cl->common.classid = classid;
87990467 1384 cl->parent = parent;
1da177e4
LT
1385
1386 /* set class to be in HTB_CAN_SEND state */
1387 cl->tokens = hopt->buffer;
1388 cl->ctokens = hopt->cbuffer;
00c04af9 1389 cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC; /* 1min */
3bebcda2 1390 cl->t_c = psched_get_time();
1da177e4
LT
1391 cl->cmode = HTB_CAN_SEND;
1392
1393 /* attach to the hash list and parent's family */
f4c1f3e0 1394 qdisc_class_hash_insert(&q->clhash, &cl->common);
42077599
PM
1395 if (parent)
1396 parent->children++;
ee39e10c 1397 } else {
71bcb09a
SH
1398 if (tca[TCA_RATE]) {
1399 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1400 qdisc_root_sleeping_lock(sch),
1401 tca[TCA_RATE]);
1402 if (err)
1403 return err;
1404 }
87990467 1405 sch_tree_lock(sch);
ee39e10c 1406 }
1da177e4
LT
1407
1408 /* it used to be a nasty bug here, we have to check that node
87990467 1409 is really leaf before changing cl->un.leaf ! */
1da177e4 1410 if (!cl->level) {
c19f7a34
JP
1411 cl->quantum = rtab->rate.rate / q->rate2quantum;
1412 if (!hopt->quantum && cl->quantum < 1000) {
87990467
SH
1413 printk(KERN_WARNING
1414 "HTB: quantum of class %X is small. Consider r2q change.\n",
f4c1f3e0 1415 cl->common.classid);
c19f7a34 1416 cl->quantum = 1000;
1da177e4 1417 }
c19f7a34 1418 if (!hopt->quantum && cl->quantum > 200000) {
87990467
SH
1419 printk(KERN_WARNING
1420 "HTB: quantum of class %X is big. Consider r2q change.\n",
f4c1f3e0 1421 cl->common.classid);
c19f7a34 1422 cl->quantum = 200000;
1da177e4
LT
1423 }
1424 if (hopt->quantum)
c19f7a34
JP
1425 cl->quantum = hopt->quantum;
1426 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1427 cl->prio = TC_HTB_NUMPRIO - 1;
1da177e4
LT
1428 }
1429
1430 cl->buffer = hopt->buffer;
1431 cl->cbuffer = hopt->cbuffer;
87990467
SH
1432 if (cl->rate)
1433 qdisc_put_rtab(cl->rate);
1434 cl->rate = rtab;
1435 if (cl->ceil)
1436 qdisc_put_rtab(cl->ceil);
1437 cl->ceil = ctab;
1da177e4
LT
1438 sch_tree_unlock(sch);
1439
f4c1f3e0
PM
1440 qdisc_class_hash_grow(sch, &q->clhash);
1441
1da177e4
LT
1442 *arg = (unsigned long)cl;
1443 return 0;
1444
1445failure:
87990467
SH
1446 if (rtab)
1447 qdisc_put_rtab(rtab);
1448 if (ctab)
1449 qdisc_put_rtab(ctab);
1da177e4
LT
1450 return err;
1451}
1452
1453static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1454{
1455 struct htb_sched *q = qdisc_priv(sch);
1456 struct htb_class *cl = (struct htb_class *)arg;
1457 struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
3bf72957 1458
1da177e4
LT
1459 return fl;
1460}
1461
1462static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
87990467 1463 u32 classid)
1da177e4 1464{
87990467 1465 struct htb_class *cl = htb_find(classid, sch);
3bf72957 1466
1da177e4 1467 /*if (cl && !cl->level) return 0;
87990467
SH
1468 The line above used to be there to prevent attaching filters to
1469 leaves. But at least tc_index filter uses this just to get class
1470 for other reasons so that we have to allow for it.
1471 ----
1472 19.6.2002 As Werner explained it is ok - bind filter is just
1473 another way to "lock" the class - unlike "get" this lock can
1474 be broken by class during destroy IIUC.
1da177e4 1475 */
87990467
SH
1476 if (cl)
1477 cl->filter_cnt++;
1da177e4
LT
1478 return (unsigned long)cl;
1479}
1480
1481static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1482{
1da177e4 1483 struct htb_class *cl = (struct htb_class *)arg;
3bf72957 1484
87990467
SH
1485 if (cl)
1486 cl->filter_cnt--;
1da177e4
LT
1487}
1488
1489static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1490{
1491 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0
PM
1492 struct htb_class *cl;
1493 struct hlist_node *n;
1494 unsigned int i;
1da177e4
LT
1495
1496 if (arg->stop)
1497 return;
1498
f4c1f3e0
PM
1499 for (i = 0; i < q->clhash.hashsize; i++) {
1500 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
1da177e4
LT
1501 if (arg->count < arg->skip) {
1502 arg->count++;
1503 continue;
1504 }
1505 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1506 arg->stop = 1;
1507 return;
1508 }
1509 arg->count++;
1510 }
1511 }
1512}
1513
20fea08b 1514static const struct Qdisc_class_ops htb_class_ops = {
1da177e4
LT
1515 .graft = htb_graft,
1516 .leaf = htb_leaf,
256d61b8 1517 .qlen_notify = htb_qlen_notify,
1da177e4
LT
1518 .get = htb_get,
1519 .put = htb_put,
1520 .change = htb_change_class,
1521 .delete = htb_delete,
1522 .walk = htb_walk,
1523 .tcf_chain = htb_find_tcf,
1524 .bind_tcf = htb_bind_filter,
1525 .unbind_tcf = htb_unbind_filter,
1526 .dump = htb_dump_class,
1527 .dump_stats = htb_dump_class_stats,
1528};
1529
20fea08b 1530static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1da177e4
LT
1531 .next = NULL,
1532 .cl_ops = &htb_class_ops,
1533 .id = "htb",
1534 .priv_size = sizeof(struct htb_sched),
1535 .enqueue = htb_enqueue,
1536 .dequeue = htb_dequeue,
77be155c 1537 .peek = qdisc_peek_dequeued,
1da177e4
LT
1538 .drop = htb_drop,
1539 .init = htb_init,
1540 .reset = htb_reset,
1541 .destroy = htb_destroy,
1542 .change = NULL /* htb_change */,
1543 .dump = htb_dump,
1544 .owner = THIS_MODULE,
1545};
1546
1547static int __init htb_module_init(void)
1548{
87990467 1549 return register_qdisc(&htb_qdisc_ops);
1da177e4 1550}
87990467 1551static void __exit htb_module_exit(void)
1da177e4 1552{
87990467 1553 unregister_qdisc(&htb_qdisc_ops);
1da177e4 1554}
87990467 1555
1da177e4
LT
1556module_init(htb_module_init)
1557module_exit(htb_module_exit)
1558MODULE_LICENSE("GPL");