]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/sched/sch_htb.c
net-sched: sch_cbq: use dynamic class hash helpers
[mirror_ubuntu-artful-kernel.git] / net / sched / sch_htb.c
CommitLineData
87990467 1/*
1da177e4
LT
2 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Martin Devera, <devik@cdi.cz>
10 *
11 * Credits (in time order) for older HTB versions:
12 * Stef Coene <stef.coene@docum.org>
13 * HTB support at LARTC mailing list
10297b99 14 * Ondrej Kraus, <krauso@barr.cz>
1da177e4
LT
15 * found missing INIT_QDISC(htb)
16 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
17 * helped a lot to locate nasty class stall bug
18 * Andi Kleen, Jamal Hadi, Bert Hubert
19 * code review and helpful comments on shaping
20 * Tomasz Wrona, <tw@eter.tym.pl>
21 * created test case so that I was able to fix nasty bug
22 * Wilfried Weissmann
23 * spotted bug in dequeue code and helped with fix
24 * Jiri Fojtasek
25 * fixed requeue routine
26 * and many others. thanks.
1da177e4 27 */
1da177e4 28#include <linux/module.h>
47083fc0 29#include <linux/moduleparam.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/kernel.h>
1da177e4 32#include <linux/string.h>
1da177e4 33#include <linux/errno.h>
1da177e4
LT
34#include <linux/skbuff.h>
35#include <linux/list.h>
36#include <linux/compiler.h>
0ba48053 37#include <linux/rbtree.h>
dc5fc579 38#include <net/netlink.h>
1da177e4 39#include <net/pkt_sched.h>
1da177e4
LT
40
41/* HTB algorithm.
42 Author: devik@cdi.cz
43 ========================================================================
44 HTB is like TBF with multiple classes. It is also similar to CBQ because
10297b99 45 it allows to assign priority to each class in hierarchy.
1da177e4
LT
46 In fact it is another implementation of Floyd's formal sharing.
47
48 Levels:
10297b99 49 Each class is assigned level. Leaf has ALWAYS level 0 and root
1da177e4
LT
50 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51 one less than their parent.
52*/
53
87990467 54#define HTB_HSIZE 16 /* classid hash size */
47083fc0 55static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
87990467 56#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
1da177e4
LT
57
58#if HTB_VER >> 16 != TC_HTB_PROTOVER
59#error "Mismatched sch_htb.c and pkt_sch.h"
60#endif
61
47083fc0
JDB
62/* Module parameter and sysfs export */
63module_param (htb_hysteresis, int, 0640);
64MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
65
1da177e4
LT
66/* used internaly to keep status of single class */
67enum htb_cmode {
87990467
SH
68 HTB_CANT_SEND, /* class can't send and can't borrow */
69 HTB_MAY_BORROW, /* class can't send but may borrow */
70 HTB_CAN_SEND /* class can send */
1da177e4
LT
71};
72
73/* interior & leaf nodes; props specific to leaves are marked L: */
87990467
SH
74struct htb_class {
75 /* general class parameters */
76 u32 classid;
77 struct gnet_stats_basic bstats;
78 struct gnet_stats_queue qstats;
79 struct gnet_stats_rate_est rate_est;
80 struct tc_htb_xstats xstats; /* our special stats */
81 int refcnt; /* usage count of this class */
1da177e4 82
87990467
SH
83 /* topology */
84 int level; /* our level (see above) */
85 struct htb_class *parent; /* parent class */
0cef296d 86 struct hlist_node hlist; /* classid hash list item */
87990467
SH
87 struct list_head sibling; /* sibling list item */
88 struct list_head children; /* children list */
89
90 union {
91 struct htb_class_leaf {
92 struct Qdisc *q;
93 int prio;
94 int aprio;
95 int quantum;
96 int deficit[TC_HTB_MAXDEPTH];
97 struct list_head drop_list;
98 } leaf;
99 struct htb_class_inner {
100 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
101 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
102 /* When class changes from state 1->2 and disconnects from
103 parent's feed then we lost ptr value and start from the
104 first child again. Here we store classid of the
105 last valid ptr (used when ptr is NULL). */
106 u32 last_ptr_id[TC_HTB_NUMPRIO];
107 } inner;
108 } un;
109 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
110 struct rb_node pq_node; /* node for event queue */
fb983d45 111 psched_time_t pq_key;
87990467
SH
112
113 int prio_activity; /* for which prios are we active */
114 enum htb_cmode cmode; /* current mode of the class */
115
116 /* class attached filters */
117 struct tcf_proto *filter_list;
118 int filter_cnt;
119
120 int warned; /* only one warning about non work conserving .. */
121
122 /* token bucket parameters */
123 struct qdisc_rate_table *rate; /* rate table of the class itself */
124 struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */
125 long buffer, cbuffer; /* token bucket depth/rate */
126 psched_tdiff_t mbuffer; /* max wait time */
127 long tokens, ctokens; /* current number of tokens */
128 psched_time_t t_c; /* checkpoint time */
160d5e10
JP
129
130 int prio; /* For parent to leaf return possible here */
131 int quantum; /* we do backup. Finally full replacement */
132 /* of un.leaf originals should be done. */
1da177e4
LT
133};
134
87990467
SH
135static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
136 int size)
137{
e9bef55d
JDB
138 long result = qdisc_l2t(rate, size);
139 return result;
1da177e4
LT
140}
141
87990467
SH
142struct htb_sched {
143 struct list_head root; /* root classes list */
0cef296d
SH
144 struct hlist_head hash[HTB_HSIZE]; /* hashed by classid */
145 struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
87990467
SH
146
147 /* self list - roots of self generating tree */
148 struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
149 int row_mask[TC_HTB_MAXDEPTH];
150 struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
151 u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
1da177e4 152
87990467
SH
153 /* self wait list - roots of wait PQs per row */
154 struct rb_root wait_pq[TC_HTB_MAXDEPTH];
1da177e4 155
87990467 156 /* time of nearest event per level (row) */
fb983d45 157 psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
1da177e4 158
87990467
SH
159 /* whether we hit non-work conserving class during this dequeue; we use */
160 int nwc_hit; /* this to disable mindelay complaint in dequeue */
1da177e4 161
87990467 162 int defcls; /* class where unclassified flows go to */
1da177e4 163
87990467
SH
164 /* filters for qdisc itself */
165 struct tcf_proto *filter_list;
166 int filter_cnt;
1da177e4 167
87990467
SH
168 int rate2quantum; /* quant = rate / rate2quantum */
169 psched_time_t now; /* cached dequeue time */
fb983d45 170 struct qdisc_watchdog watchdog;
1da177e4 171
87990467
SH
172 /* non shaped skbs; let them go directly thru */
173 struct sk_buff_head direct_queue;
174 int direct_qlen; /* max qlen of above */
175
176 long direct_pkts;
1da177e4
LT
177};
178
179/* compute hash of size HTB_HSIZE for given handle */
87990467 180static inline int htb_hash(u32 h)
1da177e4
LT
181{
182#if HTB_HSIZE != 16
87990467 183#error "Declare new hash for your HTB_HSIZE"
1da177e4 184#endif
87990467
SH
185 h ^= h >> 8; /* stolen from cbq_hash */
186 h ^= h >> 4;
187 return h & 0xf;
1da177e4
LT
188}
189
190/* find class in global hash table using given handle */
87990467 191static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
1da177e4
LT
192{
193 struct htb_sched *q = qdisc_priv(sch);
0cef296d
SH
194 struct hlist_node *p;
195 struct htb_class *cl;
196
87990467 197 if (TC_H_MAJ(handle) != sch->handle)
1da177e4 198 return NULL;
87990467 199
0cef296d 200 hlist_for_each_entry(cl, p, q->hash + htb_hash(handle), hlist) {
1da177e4
LT
201 if (cl->classid == handle)
202 return cl;
203 }
204 return NULL;
205}
206
207/**
208 * htb_classify - classify a packet into class
209 *
210 * It returns NULL if the packet should be dropped or -1 if the packet
211 * should be passed directly thru. In all other cases leaf class is returned.
212 * We allow direct class selection by classid in priority. The we examine
213 * filters in qdisc and in inner nodes (if higher filter points to the inner
214 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
10297b99 215 * internal fifo (direct). These packets then go directly thru. If we still
1da177e4
LT
216 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
217 * then finish and return direct queue.
218 */
219#define HTB_DIRECT (struct htb_class*)-1
1da177e4 220
87990467
SH
221static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
222 int *qerr)
1da177e4
LT
223{
224 struct htb_sched *q = qdisc_priv(sch);
225 struct htb_class *cl;
226 struct tcf_result res;
227 struct tcf_proto *tcf;
228 int result;
229
230 /* allow to select class by setting skb->priority to valid classid;
231 note that nfmark can be used too by attaching filter fw with no
232 rules in it */
233 if (skb->priority == sch->handle)
87990467
SH
234 return HTB_DIRECT; /* X:0 (direct flow) selected */
235 if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
1da177e4
LT
236 return cl;
237
29f1df6c 238 *qerr = NET_XMIT_BYPASS;
1da177e4
LT
239 tcf = q->filter_list;
240 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
241#ifdef CONFIG_NET_CLS_ACT
242 switch (result) {
243 case TC_ACT_QUEUED:
87990467 244 case TC_ACT_STOLEN:
1da177e4
LT
245 *qerr = NET_XMIT_SUCCESS;
246 case TC_ACT_SHOT:
247 return NULL;
248 }
1da177e4 249#endif
87990467 250 if ((cl = (void *)res.class) == NULL) {
1da177e4 251 if (res.classid == sch->handle)
87990467
SH
252 return HTB_DIRECT; /* X:0 (direct flow) */
253 if ((cl = htb_find(res.classid, sch)) == NULL)
254 break; /* filter selected invalid classid */
1da177e4
LT
255 }
256 if (!cl->level)
87990467 257 return cl; /* we hit leaf; return it */
1da177e4
LT
258
259 /* we have got inner class; apply inner filter chain */
260 tcf = cl->filter_list;
261 }
262 /* classification failed; try to use default class */
87990467 263 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
1da177e4 264 if (!cl || cl->level)
87990467 265 return HTB_DIRECT; /* bad default .. this is safe bet */
1da177e4
LT
266 return cl;
267}
268
1da177e4
LT
269/**
270 * htb_add_to_id_tree - adds class to the round robin list
271 *
272 * Routine adds class to the list (actually tree) sorted by classid.
273 * Make sure that class is not already on such list for given prio.
274 */
87990467
SH
275static void htb_add_to_id_tree(struct rb_root *root,
276 struct htb_class *cl, int prio)
1da177e4
LT
277{
278 struct rb_node **p = &root->rb_node, *parent = NULL;
3bf72957 279
1da177e4 280 while (*p) {
87990467
SH
281 struct htb_class *c;
282 parent = *p;
1da177e4 283 c = rb_entry(parent, struct htb_class, node[prio]);
3bf72957 284
1da177e4
LT
285 if (cl->classid > c->classid)
286 p = &parent->rb_right;
87990467 287 else
1da177e4
LT
288 p = &parent->rb_left;
289 }
290 rb_link_node(&cl->node[prio], parent, p);
291 rb_insert_color(&cl->node[prio], root);
292}
293
294/**
295 * htb_add_to_wait_tree - adds class to the event queue with delay
296 *
297 * The class is added to priority event queue to indicate that class will
298 * change its mode in cl->pq_key microseconds. Make sure that class is not
299 * already in the queue.
300 */
87990467
SH
301static void htb_add_to_wait_tree(struct htb_sched *q,
302 struct htb_class *cl, long delay)
1da177e4
LT
303{
304 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
3bf72957 305
fb983d45
PM
306 cl->pq_key = q->now + delay;
307 if (cl->pq_key == q->now)
1da177e4
LT
308 cl->pq_key++;
309
310 /* update the nearest event cache */
fb983d45 311 if (q->near_ev_cache[cl->level] > cl->pq_key)
1da177e4 312 q->near_ev_cache[cl->level] = cl->pq_key;
87990467 313
1da177e4 314 while (*p) {
87990467
SH
315 struct htb_class *c;
316 parent = *p;
1da177e4 317 c = rb_entry(parent, struct htb_class, pq_node);
fb983d45 318 if (cl->pq_key >= c->pq_key)
1da177e4 319 p = &parent->rb_right;
87990467 320 else
1da177e4
LT
321 p = &parent->rb_left;
322 }
323 rb_link_node(&cl->pq_node, parent, p);
324 rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
325}
326
327/**
328 * htb_next_rb_node - finds next node in binary tree
329 *
330 * When we are past last key we return NULL.
331 * Average complexity is 2 steps per call.
332 */
3696f625 333static inline void htb_next_rb_node(struct rb_node **n)
1da177e4
LT
334{
335 *n = rb_next(*n);
336}
337
338/**
339 * htb_add_class_to_row - add class to its row
340 *
341 * The class is added to row at priorities marked in mask.
342 * It does nothing if mask == 0.
343 */
87990467
SH
344static inline void htb_add_class_to_row(struct htb_sched *q,
345 struct htb_class *cl, int mask)
1da177e4 346{
1da177e4
LT
347 q->row_mask[cl->level] |= mask;
348 while (mask) {
349 int prio = ffz(~mask);
350 mask &= ~(1 << prio);
87990467 351 htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
1da177e4
LT
352 }
353}
354
3696f625
SH
355/* If this triggers, it is a bug in this code, but it need not be fatal */
356static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
357{
81771b3b 358 if (RB_EMPTY_NODE(rb)) {
3696f625
SH
359 WARN_ON(1);
360 } else {
361 rb_erase(rb, root);
362 RB_CLEAR_NODE(rb);
363 }
364}
365
366
1da177e4
LT
367/**
368 * htb_remove_class_from_row - removes class from its row
369 *
370 * The class is removed from row at priorities marked in mask.
371 * It does nothing if mask == 0.
372 */
87990467
SH
373static inline void htb_remove_class_from_row(struct htb_sched *q,
374 struct htb_class *cl, int mask)
1da177e4
LT
375{
376 int m = 0;
3bf72957 377
1da177e4
LT
378 while (mask) {
379 int prio = ffz(~mask);
3696f625 380
1da177e4 381 mask &= ~(1 << prio);
87990467
SH
382 if (q->ptr[cl->level][prio] == cl->node + prio)
383 htb_next_rb_node(q->ptr[cl->level] + prio);
3696f625
SH
384
385 htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
87990467 386 if (!q->row[cl->level][prio].rb_node)
1da177e4
LT
387 m |= 1 << prio;
388 }
1da177e4
LT
389 q->row_mask[cl->level] &= ~m;
390}
391
392/**
393 * htb_activate_prios - creates active classe's feed chain
394 *
395 * The class is connected to ancestors and/or appropriate rows
10297b99 396 * for priorities it is participating on. cl->cmode must be new
1da177e4
LT
397 * (activated) mode. It does nothing if cl->prio_activity == 0.
398 */
87990467 399static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
1da177e4
LT
400{
401 struct htb_class *p = cl->parent;
87990467 402 long m, mask = cl->prio_activity;
1da177e4
LT
403
404 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
405 m = mask;
406 while (m) {
1da177e4
LT
407 int prio = ffz(~m);
408 m &= ~(1 << prio);
87990467 409
1da177e4
LT
410 if (p->un.inner.feed[prio].rb_node)
411 /* parent already has its feed in use so that
412 reset bit in mask as parent is already ok */
413 mask &= ~(1 << prio);
87990467
SH
414
415 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
1da177e4 416 }
1da177e4 417 p->prio_activity |= mask;
87990467
SH
418 cl = p;
419 p = cl->parent;
3bf72957 420
1da177e4
LT
421 }
422 if (cl->cmode == HTB_CAN_SEND && mask)
87990467 423 htb_add_class_to_row(q, cl, mask);
1da177e4
LT
424}
425
426/**
427 * htb_deactivate_prios - remove class from feed chain
428 *
10297b99 429 * cl->cmode must represent old mode (before deactivation). It does
1da177e4
LT
430 * nothing if cl->prio_activity == 0. Class is removed from all feed
431 * chains and rows.
432 */
433static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
434{
435 struct htb_class *p = cl->parent;
87990467 436 long m, mask = cl->prio_activity;
1da177e4
LT
437
438 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
439 m = mask;
440 mask = 0;
1da177e4
LT
441 while (m) {
442 int prio = ffz(~m);
443 m &= ~(1 << prio);
87990467
SH
444
445 if (p->un.inner.ptr[prio] == cl->node + prio) {
1da177e4
LT
446 /* we are removing child which is pointed to from
447 parent feed - forget the pointer but remember
448 classid */
449 p->un.inner.last_ptr_id[prio] = cl->classid;
450 p->un.inner.ptr[prio] = NULL;
451 }
87990467 452
3696f625 453 htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
87990467
SH
454
455 if (!p->un.inner.feed[prio].rb_node)
1da177e4
LT
456 mask |= 1 << prio;
457 }
3bf72957 458
1da177e4 459 p->prio_activity &= ~mask;
87990467
SH
460 cl = p;
461 p = cl->parent;
3bf72957 462
1da177e4 463 }
87990467
SH
464 if (cl->cmode == HTB_CAN_SEND && mask)
465 htb_remove_class_from_row(q, cl, mask);
1da177e4
LT
466}
467
18a63e86
SH
468static inline long htb_lowater(const struct htb_class *cl)
469{
47083fc0
JDB
470 if (htb_hysteresis)
471 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
472 else
473 return 0;
18a63e86
SH
474}
475static inline long htb_hiwater(const struct htb_class *cl)
476{
47083fc0
JDB
477 if (htb_hysteresis)
478 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
479 else
480 return 0;
18a63e86 481}
47083fc0 482
18a63e86 483
1da177e4
LT
484/**
485 * htb_class_mode - computes and returns current class mode
486 *
487 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
488 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
10297b99 489 * from now to time when cl will change its state.
1da177e4 490 * Also it is worth to note that class mode doesn't change simply
10297b99 491 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
1da177e4
LT
492 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
493 * mode transitions per time unit. The speed gain is about 1/6.
494 */
87990467
SH
495static inline enum htb_cmode
496htb_class_mode(struct htb_class *cl, long *diff)
1da177e4 497{
87990467 498 long toks;
1da177e4 499
87990467
SH
500 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
501 *diff = -toks;
502 return HTB_CANT_SEND;
503 }
18a63e86 504
87990467
SH
505 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
506 return HTB_CAN_SEND;
1da177e4 507
87990467
SH
508 *diff = -toks;
509 return HTB_MAY_BORROW;
1da177e4
LT
510}
511
512/**
513 * htb_change_class_mode - changes classe's mode
514 *
515 * This should be the only way how to change classe's mode under normal
516 * cirsumstances. Routine will update feed lists linkage, change mode
517 * and add class to the wait event queue if appropriate. New mode should
518 * be different from old one and cl->pq_key has to be valid if changing
519 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
520 */
87990467 521static void
1da177e4 522htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
87990467
SH
523{
524 enum htb_cmode new_mode = htb_class_mode(cl, diff);
1da177e4
LT
525
526 if (new_mode == cl->cmode)
87990467
SH
527 return;
528
529 if (cl->prio_activity) { /* not necessary: speed optimization */
530 if (cl->cmode != HTB_CANT_SEND)
531 htb_deactivate_prios(q, cl);
1da177e4 532 cl->cmode = new_mode;
87990467
SH
533 if (new_mode != HTB_CANT_SEND)
534 htb_activate_prios(q, cl);
535 } else
1da177e4
LT
536 cl->cmode = new_mode;
537}
538
539/**
10297b99 540 * htb_activate - inserts leaf cl into appropriate active feeds
1da177e4
LT
541 *
542 * Routine learns (new) priority of leaf and activates feed chain
543 * for the prio. It can be called on already active leaf safely.
544 * It also adds leaf into droplist.
545 */
87990467 546static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
1da177e4
LT
547{
548 BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen);
3bf72957 549
1da177e4
LT
550 if (!cl->prio_activity) {
551 cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
87990467
SH
552 htb_activate_prios(q, cl);
553 list_add_tail(&cl->un.leaf.drop_list,
554 q->drops + cl->un.leaf.aprio);
1da177e4
LT
555 }
556}
557
558/**
10297b99 559 * htb_deactivate - remove leaf cl from active feeds
1da177e4
LT
560 *
561 * Make sure that leaf is active. In the other words it can't be called
562 * with non-active leaf. It also removes class from the drop list.
563 */
87990467 564static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
1da177e4
LT
565{
566 BUG_TRAP(cl->prio_activity);
3bf72957 567
87990467 568 htb_deactivate_prios(q, cl);
1da177e4
LT
569 cl->prio_activity = 0;
570 list_del_init(&cl->un.leaf.drop_list);
571}
572
573static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
574{
87990467
SH
575 int ret;
576 struct htb_sched *q = qdisc_priv(sch);
577 struct htb_class *cl = htb_classify(skb, sch, &ret);
578
579 if (cl == HTB_DIRECT) {
580 /* enqueue to helper queue */
581 if (q->direct_queue.qlen < q->direct_qlen) {
582 __skb_queue_tail(&q->direct_queue, skb);
583 q->direct_pkts++;
584 } else {
585 kfree_skb(skb);
586 sch->qstats.drops++;
587 return NET_XMIT_DROP;
588 }
1da177e4 589#ifdef CONFIG_NET_CLS_ACT
87990467
SH
590 } else if (!cl) {
591 if (ret == NET_XMIT_BYPASS)
592 sch->qstats.drops++;
593 kfree_skb(skb);
594 return ret;
1da177e4 595#endif
87990467
SH
596 } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) !=
597 NET_XMIT_SUCCESS) {
598 sch->qstats.drops++;
599 cl->qstats.drops++;
600 return NET_XMIT_DROP;
601 } else {
c9726d68
RM
602 cl->bstats.packets +=
603 skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
87990467
SH
604 cl->bstats.bytes += skb->len;
605 htb_activate(q, cl);
606 }
607
608 sch->q.qlen++;
c9726d68 609 sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
87990467
SH
610 sch->bstats.bytes += skb->len;
611 return NET_XMIT_SUCCESS;
1da177e4
LT
612}
613
614/* TODO: requeuing packet charges it to policers again !! */
615static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
616{
21347456 617 int ret;
87990467 618 struct htb_sched *q = qdisc_priv(sch);
87990467
SH
619 struct htb_class *cl = htb_classify(skb, sch, &ret);
620 struct sk_buff *tskb;
621
21347456 622 if (cl == HTB_DIRECT) {
87990467 623 /* enqueue to helper queue */
21347456 624 if (q->direct_queue.qlen < q->direct_qlen) {
87990467
SH
625 __skb_queue_head(&q->direct_queue, skb);
626 } else {
627 __skb_queue_head(&q->direct_queue, skb);
628 tskb = __skb_dequeue_tail(&q->direct_queue);
629 kfree_skb(tskb);
630 sch->qstats.drops++;
631 return NET_XMIT_CN;
632 }
21347456
JP
633#ifdef CONFIG_NET_CLS_ACT
634 } else if (!cl) {
635 if (ret == NET_XMIT_BYPASS)
636 sch->qstats.drops++;
637 kfree_skb(skb);
638 return ret;
639#endif
87990467
SH
640 } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) !=
641 NET_XMIT_SUCCESS) {
642 sch->qstats.drops++;
643 cl->qstats.drops++;
644 return NET_XMIT_DROP;
645 } else
646 htb_activate(q, cl);
647
648 sch->q.qlen++;
649 sch->qstats.requeues++;
650 return NET_XMIT_SUCCESS;
1da177e4
LT
651}
652
1da177e4
LT
653/**
654 * htb_charge_class - charges amount "bytes" to leaf and ancestors
655 *
656 * Routine assumes that packet "bytes" long was dequeued from leaf cl
657 * borrowing from "level". It accounts bytes to ceil leaky bucket for
658 * leaf and all ancestors and to rate bucket for ancestors at levels
659 * "level" and higher. It also handles possible change of mode resulting
660 * from the update. Note that mode can also increase here (MAY_BORROW to
661 * CAN_SEND) because we can use more precise clock that event queue here.
662 * In such case we remove class from event queue first.
663 */
87990467 664static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
c9726d68 665 int level, struct sk_buff *skb)
87990467 666{
c9726d68 667 int bytes = skb->len;
87990467 668 long toks, diff;
1da177e4 669 enum htb_cmode old_mode;
1da177e4
LT
670
671#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
672 if (toks > cl->B) toks = cl->B; \
673 toks -= L2T(cl, cl->R, bytes); \
674 if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
675 cl->T = toks
676
677 while (cl) {
03cc45c0 678 diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
1da177e4 679 if (cl->level >= level) {
87990467
SH
680 if (cl->level == level)
681 cl->xstats.lends++;
682 HTB_ACCNT(tokens, buffer, rate);
1da177e4
LT
683 } else {
684 cl->xstats.borrows++;
87990467 685 cl->tokens += diff; /* we moved t_c; update tokens */
1da177e4 686 }
87990467 687 HTB_ACCNT(ctokens, cbuffer, ceil);
1da177e4 688 cl->t_c = q->now;
1da177e4 689
87990467
SH
690 old_mode = cl->cmode;
691 diff = 0;
692 htb_change_class_mode(q, cl, &diff);
1da177e4
LT
693 if (old_mode != cl->cmode) {
694 if (old_mode != HTB_CAN_SEND)
3696f625 695 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1da177e4 696 if (cl->cmode != HTB_CAN_SEND)
87990467 697 htb_add_to_wait_tree(q, cl, diff);
1da177e4 698 }
1da177e4
LT
699
700 /* update byte stats except for leaves which are already updated */
701 if (cl->level) {
702 cl->bstats.bytes += bytes;
c9726d68
RM
703 cl->bstats.packets += skb_is_gso(skb)?
704 skb_shinfo(skb)->gso_segs:1;
1da177e4
LT
705 }
706 cl = cl->parent;
707 }
708}
709
710/**
711 * htb_do_events - make mode changes to classes at the level
712 *
fb983d45 713 * Scans event queue for pending events and applies them. Returns time of
1da177e4 714 * next pending event (0 for no event in pq).
fb983d45 715 * Note: Applied are events whose have cl->pq_key <= q->now.
1da177e4 716 */
fb983d45 717static psched_time_t htb_do_events(struct htb_sched *q, int level)
1da177e4 718{
8f3ea33a
MD
719 /* don't run for longer than 2 jiffies; 2 is used instead of
720 1 to simplify things when jiffy is going to be incremented
721 too soon */
722 unsigned long stop_at = jiffies + 2;
723 while (time_before(jiffies, stop_at)) {
1da177e4
LT
724 struct htb_class *cl;
725 long diff;
30bdbe39
AM
726 struct rb_node *p = rb_first(&q->wait_pq[level]);
727
87990467
SH
728 if (!p)
729 return 0;
1da177e4
LT
730
731 cl = rb_entry(p, struct htb_class, pq_node);
fb983d45
PM
732 if (cl->pq_key > q->now)
733 return cl->pq_key;
734
3696f625 735 htb_safe_rb_erase(p, q->wait_pq + level);
03cc45c0 736 diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
87990467 737 htb_change_class_mode(q, cl, &diff);
1da177e4 738 if (cl->cmode != HTB_CAN_SEND)
87990467 739 htb_add_to_wait_tree(q, cl, diff);
1da177e4 740 }
8f3ea33a
MD
741 /* too much load - let's continue on next jiffie */
742 return q->now + PSCHED_TICKS_PER_SEC / HZ;
1da177e4
LT
743}
744
745/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
746 is no such one exists. */
87990467
SH
747static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
748 u32 id)
1da177e4
LT
749{
750 struct rb_node *r = NULL;
751 while (n) {
87990467
SH
752 struct htb_class *cl =
753 rb_entry(n, struct htb_class, node[prio]);
754 if (id == cl->classid)
755 return n;
756
1da177e4
LT
757 if (id > cl->classid) {
758 n = n->rb_right;
759 } else {
760 r = n;
761 n = n->rb_left;
762 }
763 }
764 return r;
765}
766
767/**
768 * htb_lookup_leaf - returns next leaf class in DRR order
769 *
770 * Find leaf where current feed pointers points to.
771 */
87990467
SH
772static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
773 struct rb_node **pptr, u32 * pid)
1da177e4
LT
774{
775 int i;
776 struct {
777 struct rb_node *root;
778 struct rb_node **pptr;
779 u32 *pid;
87990467
SH
780 } stk[TC_HTB_MAXDEPTH], *sp = stk;
781
1da177e4
LT
782 BUG_TRAP(tree->rb_node);
783 sp->root = tree->rb_node;
784 sp->pptr = pptr;
785 sp->pid = pid;
786
787 for (i = 0; i < 65535; i++) {
87990467 788 if (!*sp->pptr && *sp->pid) {
10297b99 789 /* ptr was invalidated but id is valid - try to recover
1da177e4 790 the original or next ptr */
87990467
SH
791 *sp->pptr =
792 htb_id_find_next_upper(prio, sp->root, *sp->pid);
1da177e4 793 }
87990467
SH
794 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
795 can become out of date quickly */
796 if (!*sp->pptr) { /* we are at right end; rewind & go up */
1da177e4 797 *sp->pptr = sp->root;
87990467 798 while ((*sp->pptr)->rb_left)
1da177e4
LT
799 *sp->pptr = (*sp->pptr)->rb_left;
800 if (sp > stk) {
801 sp--;
87990467
SH
802 BUG_TRAP(*sp->pptr);
803 if (!*sp->pptr)
804 return NULL;
805 htb_next_rb_node(sp->pptr);
1da177e4
LT
806 }
807 } else {
808 struct htb_class *cl;
87990467
SH
809 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
810 if (!cl->level)
1da177e4
LT
811 return cl;
812 (++sp)->root = cl->un.inner.feed[prio].rb_node;
87990467
SH
813 sp->pptr = cl->un.inner.ptr + prio;
814 sp->pid = cl->un.inner.last_ptr_id + prio;
1da177e4
LT
815 }
816 }
817 BUG_TRAP(0);
818 return NULL;
819}
820
821/* dequeues packet at given priority and level; call only if
822 you are sure that there is active class at prio/level */
87990467
SH
823static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
824 int level)
1da177e4
LT
825{
826 struct sk_buff *skb = NULL;
87990467 827 struct htb_class *cl, *start;
1da177e4 828 /* look initial class up in the row */
87990467
SH
829 start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
830 q->ptr[level] + prio,
831 q->last_ptr_id[level] + prio);
832
1da177e4
LT
833 do {
834next:
87990467
SH
835 BUG_TRAP(cl);
836 if (!cl)
837 return NULL;
1da177e4
LT
838
839 /* class can be empty - it is unlikely but can be true if leaf
840 qdisc drops packets in enqueue routine or if someone used
10297b99 841 graft operation on the leaf since last dequeue;
1da177e4
LT
842 simply deactivate and skip such class */
843 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
844 struct htb_class *next;
87990467 845 htb_deactivate(q, cl);
1da177e4
LT
846
847 /* row/level might become empty */
848 if ((q->row_mask[level] & (1 << prio)) == 0)
87990467 849 return NULL;
1da177e4 850
87990467
SH
851 next = htb_lookup_leaf(q->row[level] + prio,
852 prio, q->ptr[level] + prio,
853 q->last_ptr_id[level] + prio);
854
855 if (cl == start) /* fix start if we just deleted it */
1da177e4
LT
856 start = next;
857 cl = next;
858 goto next;
859 }
87990467
SH
860
861 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
862 if (likely(skb != NULL))
1da177e4
LT
863 break;
864 if (!cl->warned) {
87990467
SH
865 printk(KERN_WARNING
866 "htb: class %X isn't work conserving ?!\n",
867 cl->classid);
1da177e4
LT
868 cl->warned = 1;
869 }
870 q->nwc_hit++;
87990467
SH
871 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
872 ptr[0]) + prio);
873 cl = htb_lookup_leaf(q->row[level] + prio, prio,
874 q->ptr[level] + prio,
875 q->last_ptr_id[level] + prio);
1da177e4
LT
876
877 } while (cl != start);
878
879 if (likely(skb != NULL)) {
880 if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
1da177e4 881 cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
87990467
SH
882 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
883 ptr[0]) + prio);
1da177e4
LT
884 }
885 /* this used to be after charge_class but this constelation
886 gives us slightly better performance */
887 if (!cl->un.leaf.q->q.qlen)
87990467 888 htb_deactivate(q, cl);
c9726d68 889 htb_charge_class(q, cl, level, skb);
1da177e4
LT
890 }
891 return skb;
892}
893
1da177e4
LT
894static struct sk_buff *htb_dequeue(struct Qdisc *sch)
895{
896 struct sk_buff *skb = NULL;
897 struct htb_sched *q = qdisc_priv(sch);
898 int level;
fb983d45 899 psched_time_t next_event;
1da177e4
LT
900
901 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
87990467
SH
902 skb = __skb_dequeue(&q->direct_queue);
903 if (skb != NULL) {
1da177e4
LT
904 sch->flags &= ~TCQ_F_THROTTLED;
905 sch->q.qlen--;
906 return skb;
907 }
908
87990467
SH
909 if (!sch->q.qlen)
910 goto fin;
3bebcda2 911 q->now = psched_get_time();
1da177e4 912
fb983d45 913 next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
1da177e4
LT
914 q->nwc_hit = 0;
915 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
916 /* common case optimization - skip event handler quickly */
917 int m;
fb983d45
PM
918 psched_time_t event;
919
920 if (q->now >= q->near_ev_cache[level]) {
921 event = htb_do_events(q, level);
2e4b3b0e
PM
922 if (!event)
923 event = q->now + PSCHED_TICKS_PER_SEC;
924 q->near_ev_cache[level] = event;
1da177e4 925 } else
fb983d45
PM
926 event = q->near_ev_cache[level];
927
928 if (event && next_event > event)
929 next_event = event;
87990467 930
1da177e4
LT
931 m = ~q->row_mask[level];
932 while (m != (int)(-1)) {
87990467 933 int prio = ffz(m);
1da177e4 934 m |= 1 << prio;
87990467 935 skb = htb_dequeue_tree(q, prio, level);
1da177e4
LT
936 if (likely(skb != NULL)) {
937 sch->q.qlen--;
938 sch->flags &= ~TCQ_F_THROTTLED;
939 goto fin;
940 }
941 }
942 }
fb983d45
PM
943 sch->qstats.overlimits++;
944 qdisc_watchdog_schedule(&q->watchdog, next_event);
1da177e4 945fin:
1da177e4
LT
946 return skb;
947}
948
949/* try to drop from each class (by prio) until one succeed */
87990467 950static unsigned int htb_drop(struct Qdisc *sch)
1da177e4
LT
951{
952 struct htb_sched *q = qdisc_priv(sch);
953 int prio;
954
955 for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
956 struct list_head *p;
87990467 957 list_for_each(p, q->drops + prio) {
1da177e4
LT
958 struct htb_class *cl = list_entry(p, struct htb_class,
959 un.leaf.drop_list);
960 unsigned int len;
87990467
SH
961 if (cl->un.leaf.q->ops->drop &&
962 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
1da177e4
LT
963 sch->q.qlen--;
964 if (!cl->un.leaf.q->q.qlen)
87990467 965 htb_deactivate(q, cl);
1da177e4
LT
966 return len;
967 }
968 }
969 }
970 return 0;
971}
972
973/* reset all classes */
974/* always caled under BH & queue lock */
87990467 975static void htb_reset(struct Qdisc *sch)
1da177e4
LT
976{
977 struct htb_sched *q = qdisc_priv(sch);
978 int i;
1da177e4
LT
979
980 for (i = 0; i < HTB_HSIZE; i++) {
0cef296d
SH
981 struct hlist_node *p;
982 struct htb_class *cl;
983
984 hlist_for_each_entry(cl, p, q->hash + i, hlist) {
1da177e4 985 if (cl->level)
87990467 986 memset(&cl->un.inner, 0, sizeof(cl->un.inner));
1da177e4 987 else {
87990467 988 if (cl->un.leaf.q)
1da177e4
LT
989 qdisc_reset(cl->un.leaf.q);
990 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
991 }
992 cl->prio_activity = 0;
993 cl->cmode = HTB_CAN_SEND;
1da177e4
LT
994
995 }
996 }
fb983d45 997 qdisc_watchdog_cancel(&q->watchdog);
1da177e4
LT
998 __skb_queue_purge(&q->direct_queue);
999 sch->q.qlen = 0;
87990467
SH
1000 memset(q->row, 0, sizeof(q->row));
1001 memset(q->row_mask, 0, sizeof(q->row_mask));
1002 memset(q->wait_pq, 0, sizeof(q->wait_pq));
1003 memset(q->ptr, 0, sizeof(q->ptr));
1da177e4 1004 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 1005 INIT_LIST_HEAD(q->drops + i);
1da177e4
LT
1006}
1007
27a3421e
PM
1008static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1009 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
1010 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
1011 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1012 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1013};
1014
1e90474c 1015static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
1016{
1017 struct htb_sched *q = qdisc_priv(sch);
1e90474c 1018 struct nlattr *tb[TCA_HTB_INIT + 1];
1da177e4 1019 struct tc_htb_glob *gopt;
cee63723 1020 int err;
1da177e4 1021 int i;
cee63723
PM
1022
1023 if (!opt)
1024 return -EINVAL;
1025
27a3421e 1026 err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
cee63723
PM
1027 if (err < 0)
1028 return err;
1029
27a3421e 1030 if (tb[TCA_HTB_INIT] == NULL) {
1da177e4
LT
1031 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
1032 return -EINVAL;
1033 }
1e90474c 1034 gopt = nla_data(tb[TCA_HTB_INIT]);
1da177e4 1035 if (gopt->version != HTB_VER >> 16) {
87990467
SH
1036 printk(KERN_ERR
1037 "HTB: need tc/htb version %d (minor is %d), you have %d\n",
1038 HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
1da177e4
LT
1039 return -EINVAL;
1040 }
1da177e4
LT
1041
1042 INIT_LIST_HEAD(&q->root);
1043 for (i = 0; i < HTB_HSIZE; i++)
0cef296d 1044 INIT_HLIST_HEAD(q->hash + i);
1da177e4 1045 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 1046 INIT_LIST_HEAD(q->drops + i);
1da177e4 1047
fb983d45 1048 qdisc_watchdog_init(&q->watchdog, sch);
1da177e4
LT
1049 skb_queue_head_init(&q->direct_queue);
1050
1051 q->direct_qlen = sch->dev->tx_queue_len;
87990467 1052 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1da177e4 1053 q->direct_qlen = 2;
1da177e4 1054
1da177e4
LT
1055 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1056 q->rate2quantum = 1;
1057 q->defcls = gopt->defcls;
1058
1059 return 0;
1060}
1061
1062static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1063{
1064 struct htb_sched *q = qdisc_priv(sch);
4b3550ef 1065 struct nlattr *nest;
1da177e4 1066 struct tc_htb_glob gopt;
4b3550ef 1067
9ac961ee 1068 spin_lock_bh(&sch->dev->queue_lock);
1da177e4 1069
4b3550ef 1070 gopt.direct_pkts = q->direct_pkts;
1da177e4
LT
1071 gopt.version = HTB_VER;
1072 gopt.rate2quantum = q->rate2quantum;
1073 gopt.defcls = q->defcls;
3bf72957 1074 gopt.debug = 0;
4b3550ef
PM
1075
1076 nest = nla_nest_start(skb, TCA_OPTIONS);
1077 if (nest == NULL)
1078 goto nla_put_failure;
1e90474c 1079 NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
4b3550ef
PM
1080 nla_nest_end(skb, nest);
1081
9ac961ee 1082 spin_unlock_bh(&sch->dev->queue_lock);
1da177e4 1083 return skb->len;
4b3550ef 1084
1e90474c 1085nla_put_failure:
9ac961ee 1086 spin_unlock_bh(&sch->dev->queue_lock);
4b3550ef 1087 nla_nest_cancel(skb, nest);
1da177e4
LT
1088 return -1;
1089}
1090
1091static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
87990467 1092 struct sk_buff *skb, struct tcmsg *tcm)
1da177e4 1093{
87990467 1094 struct htb_class *cl = (struct htb_class *)arg;
4b3550ef 1095 struct nlattr *nest;
1da177e4
LT
1096 struct tc_htb_opt opt;
1097
9ac961ee 1098 spin_lock_bh(&sch->dev->queue_lock);
1da177e4
LT
1099 tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT;
1100 tcm->tcm_handle = cl->classid;
1101 if (!cl->level && cl->un.leaf.q)
1102 tcm->tcm_info = cl->un.leaf.q->handle;
1103
4b3550ef
PM
1104 nest = nla_nest_start(skb, TCA_OPTIONS);
1105 if (nest == NULL)
1106 goto nla_put_failure;
1da177e4 1107
87990467 1108 memset(&opt, 0, sizeof(opt));
1da177e4 1109
87990467
SH
1110 opt.rate = cl->rate->rate;
1111 opt.buffer = cl->buffer;
1112 opt.ceil = cl->ceil->rate;
1113 opt.cbuffer = cl->cbuffer;
1114 opt.quantum = cl->un.leaf.quantum;
1115 opt.prio = cl->un.leaf.prio;
1116 opt.level = cl->level;
1e90474c 1117 NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
4b3550ef
PM
1118
1119 nla_nest_end(skb, nest);
9ac961ee 1120 spin_unlock_bh(&sch->dev->queue_lock);
1da177e4 1121 return skb->len;
4b3550ef 1122
1e90474c 1123nla_put_failure:
9ac961ee 1124 spin_unlock_bh(&sch->dev->queue_lock);
4b3550ef 1125 nla_nest_cancel(skb, nest);
1da177e4
LT
1126 return -1;
1127}
1128
1129static int
87990467 1130htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1da177e4 1131{
87990467 1132 struct htb_class *cl = (struct htb_class *)arg;
1da177e4 1133
1da177e4
LT
1134 if (!cl->level && cl->un.leaf.q)
1135 cl->qstats.qlen = cl->un.leaf.q->q.qlen;
1136 cl->xstats.tokens = cl->tokens;
1137 cl->xstats.ctokens = cl->ctokens;
1138
1139 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1140 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1141 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1142 return -1;
1143
1144 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1145}
1146
1147static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
87990467 1148 struct Qdisc **old)
1da177e4 1149{
87990467 1150 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1151
1152 if (cl && !cl->level) {
9f9afec4
PM
1153 if (new == NULL &&
1154 (new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
10297b99 1155 cl->classid))
87990467
SH
1156 == NULL)
1157 return -ENOBUFS;
1da177e4
LT
1158 sch_tree_lock(sch);
1159 if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
256d61b8 1160 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1da177e4
LT
1161 qdisc_reset(*old);
1162 }
1163 sch_tree_unlock(sch);
1164 return 0;
1165 }
1166 return -ENOENT;
1167}
1168
87990467 1169static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1da177e4 1170{
87990467 1171 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1172 return (cl && !cl->level) ? cl->un.leaf.q : NULL;
1173}
1174
256d61b8
PM
1175static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1176{
1177 struct htb_class *cl = (struct htb_class *)arg;
1178
1179 if (cl->un.leaf.q->q.qlen == 0)
1180 htb_deactivate(qdisc_priv(sch), cl);
1181}
1182
1da177e4
LT
1183static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1184{
87990467
SH
1185 struct htb_class *cl = htb_find(classid, sch);
1186 if (cl)
1da177e4
LT
1187 cl->refcnt++;
1188 return (unsigned long)cl;
1189}
1190
160d5e10
JP
1191static inline int htb_parent_last_child(struct htb_class *cl)
1192{
1193 if (!cl->parent)
1194 /* the root class */
1195 return 0;
1196
1197 if (!(cl->parent->children.next == &cl->sibling &&
1198 cl->parent->children.prev == &cl->sibling))
1199 /* not the last child */
1200 return 0;
1201
1202 return 1;
1203}
1204
3ba08b00
JP
1205static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1206 struct Qdisc *new_q)
160d5e10
JP
1207{
1208 struct htb_class *parent = cl->parent;
1209
1210 BUG_TRAP(!cl->level && cl->un.leaf.q && !cl->prio_activity);
1211
3ba08b00
JP
1212 if (parent->cmode != HTB_CAN_SEND)
1213 htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
1214
160d5e10
JP
1215 parent->level = 0;
1216 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1217 INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1218 parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
1219 parent->un.leaf.quantum = parent->quantum;
1220 parent->un.leaf.prio = parent->prio;
1221 parent->tokens = parent->buffer;
1222 parent->ctokens = parent->cbuffer;
3bebcda2 1223 parent->t_c = psched_get_time();
160d5e10
JP
1224 parent->cmode = HTB_CAN_SEND;
1225}
1226
87990467 1227static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1da177e4
LT
1228{
1229 struct htb_sched *q = qdisc_priv(sch);
814a175e 1230
1da177e4
LT
1231 if (!cl->level) {
1232 BUG_TRAP(cl->un.leaf.q);
1da177e4
LT
1233 qdisc_destroy(cl->un.leaf.q);
1234 }
ee39e10c 1235 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1da177e4
LT
1236 qdisc_put_rtab(cl->rate);
1237 qdisc_put_rtab(cl->ceil);
87990467 1238
ff31ab56 1239 tcf_destroy_chain(&cl->filter_list);
87990467
SH
1240
1241 while (!list_empty(&cl->children))
1242 htb_destroy_class(sch, list_entry(cl->children.next,
1243 struct htb_class, sibling));
1da177e4
LT
1244
1245 /* note: this delete may happen twice (see htb_delete) */
da33e3eb 1246 hlist_del_init(&cl->hlist);
1da177e4 1247 list_del(&cl->sibling);
87990467 1248
1da177e4 1249 if (cl->prio_activity)
87990467
SH
1250 htb_deactivate(q, cl);
1251
1da177e4 1252 if (cl->cmode != HTB_CAN_SEND)
3696f625 1253 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
87990467 1254
1da177e4
LT
1255 kfree(cl);
1256}
1257
1258/* always caled under BH & queue lock */
87990467 1259static void htb_destroy(struct Qdisc *sch)
1da177e4
LT
1260{
1261 struct htb_sched *q = qdisc_priv(sch);
1da177e4 1262
fb983d45 1263 qdisc_watchdog_cancel(&q->watchdog);
1da177e4 1264 /* This line used to be after htb_destroy_class call below
10297b99 1265 and surprisingly it worked in 2.4. But it must precede it
1da177e4
LT
1266 because filter need its target class alive to be able to call
1267 unbind_filter on it (without Oops). */
ff31ab56 1268 tcf_destroy_chain(&q->filter_list);
87990467
SH
1269
1270 while (!list_empty(&q->root))
1271 htb_destroy_class(sch, list_entry(q->root.next,
1272 struct htb_class, sibling));
1da177e4
LT
1273
1274 __skb_queue_purge(&q->direct_queue);
1275}
1276
1277static int htb_delete(struct Qdisc *sch, unsigned long arg)
1278{
1279 struct htb_sched *q = qdisc_priv(sch);
87990467 1280 struct htb_class *cl = (struct htb_class *)arg;
256d61b8 1281 unsigned int qlen;
160d5e10
JP
1282 struct Qdisc *new_q = NULL;
1283 int last_child = 0;
1da177e4
LT
1284
1285 // TODO: why don't allow to delete subtree ? references ? does
1286 // tc subsys quarantee us that in htb_destroy it holds no class
1287 // refs so that we can remove children safely there ?
1288 if (!list_empty(&cl->children) || cl->filter_cnt)
1289 return -EBUSY;
87990467 1290
160d5e10
JP
1291 if (!cl->level && htb_parent_last_child(cl)) {
1292 new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1293 cl->parent->classid);
1294 last_child = 1;
1295 }
1296
1da177e4 1297 sch_tree_lock(sch);
87990467 1298
814a175e 1299 if (!cl->level) {
256d61b8 1300 qlen = cl->un.leaf.q->q.qlen;
814a175e 1301 qdisc_reset(cl->un.leaf.q);
256d61b8 1302 qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
814a175e
PM
1303 }
1304
c38c83cb
PM
1305 /* delete from hash and active; remainder in destroy_class */
1306 hlist_del_init(&cl->hlist);
1307
1da177e4 1308 if (cl->prio_activity)
87990467 1309 htb_deactivate(q, cl);
1da177e4 1310
160d5e10 1311 if (last_child)
3ba08b00 1312 htb_parent_to_leaf(q, cl, new_q);
160d5e10 1313
1da177e4 1314 if (--cl->refcnt == 0)
87990467 1315 htb_destroy_class(sch, cl);
1da177e4
LT
1316
1317 sch_tree_unlock(sch);
1318 return 0;
1319}
1320
1321static void htb_put(struct Qdisc *sch, unsigned long arg)
1322{
87990467 1323 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1324
1325 if (--cl->refcnt == 0)
87990467 1326 htb_destroy_class(sch, cl);
1da177e4
LT
1327}
1328
87990467 1329static int htb_change_class(struct Qdisc *sch, u32 classid,
1e90474c 1330 u32 parentid, struct nlattr **tca,
87990467 1331 unsigned long *arg)
1da177e4
LT
1332{
1333 int err = -EINVAL;
1334 struct htb_sched *q = qdisc_priv(sch);
87990467 1335 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1e90474c 1336 struct nlattr *opt = tca[TCA_OPTIONS];
1da177e4 1337 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1e90474c 1338 struct nlattr *tb[TCA_HTB_RTAB + 1];
1da177e4
LT
1339 struct tc_htb_opt *hopt;
1340
1341 /* extract all subattrs from opt attr */
cee63723
PM
1342 if (!opt)
1343 goto failure;
1344
27a3421e 1345 err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, htb_policy);
cee63723
PM
1346 if (err < 0)
1347 goto failure;
1348
1349 err = -EINVAL;
27a3421e 1350 if (tb[TCA_HTB_PARMS] == NULL)
1da177e4 1351 goto failure;
1da177e4 1352
87990467
SH
1353 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1354
1e90474c 1355 hopt = nla_data(tb[TCA_HTB_PARMS]);
3bf72957 1356
1e90474c
PM
1357 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
1358 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
87990467
SH
1359 if (!rtab || !ctab)
1360 goto failure;
1da177e4 1361
87990467 1362 if (!cl) { /* new class */
1da177e4 1363 struct Qdisc *new_q;
3696f625 1364 int prio;
ee39e10c 1365 struct {
1e90474c 1366 struct nlattr nla;
ee39e10c
PM
1367 struct gnet_estimator opt;
1368 } est = {
1e90474c
PM
1369 .nla = {
1370 .nla_len = nla_attr_size(sizeof(est.opt)),
1371 .nla_type = TCA_RATE,
ee39e10c
PM
1372 },
1373 .opt = {
1374 /* 4s interval, 16s averaging constant */
1375 .interval = 2,
1376 .ewma_log = 2,
1377 },
1378 };
3696f625 1379
1da177e4 1380 /* check for valid classid */
87990467
SH
1381 if (!classid || TC_H_MAJ(classid ^ sch->handle)
1382 || htb_find(classid, sch))
1da177e4
LT
1383 goto failure;
1384
1385 /* check maximal depth */
1386 if (parent && parent->parent && parent->parent->level < 2) {
1387 printk(KERN_ERR "htb: tree is too deep\n");
1388 goto failure;
1389 }
1390 err = -ENOBUFS;
0da974f4 1391 if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
1da177e4 1392 goto failure;
87990467 1393
ee39e10c
PM
1394 gen_new_estimator(&cl->bstats, &cl->rate_est,
1395 &sch->dev->queue_lock,
1e90474c 1396 tca[TCA_RATE] ? : &est.nla);
1da177e4
LT
1397 cl->refcnt = 1;
1398 INIT_LIST_HEAD(&cl->sibling);
0cef296d 1399 INIT_HLIST_NODE(&cl->hlist);
1da177e4
LT
1400 INIT_LIST_HEAD(&cl->children);
1401 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
3696f625
SH
1402 RB_CLEAR_NODE(&cl->pq_node);
1403
1404 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1405 RB_CLEAR_NODE(&cl->node[prio]);
1da177e4
LT
1406
1407 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1408 so that can't be used inside of sch_tree_lock
1409 -- thanks to Karlis Peisenieks */
9f9afec4 1410 new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid);
1da177e4
LT
1411 sch_tree_lock(sch);
1412 if (parent && !parent->level) {
256d61b8
PM
1413 unsigned int qlen = parent->un.leaf.q->q.qlen;
1414
1da177e4 1415 /* turn parent into inner node */
256d61b8
PM
1416 qdisc_reset(parent->un.leaf.q);
1417 qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
87990467
SH
1418 qdisc_destroy(parent->un.leaf.q);
1419 if (parent->prio_activity)
1420 htb_deactivate(q, parent);
1da177e4
LT
1421
1422 /* remove from evt list because of level change */
1423 if (parent->cmode != HTB_CAN_SEND) {
3696f625 1424 htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
1da177e4
LT
1425 parent->cmode = HTB_CAN_SEND;
1426 }
1427 parent->level = (parent->parent ? parent->parent->level
87990467
SH
1428 : TC_HTB_MAXDEPTH) - 1;
1429 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1da177e4
LT
1430 }
1431 /* leaf (we) needs elementary qdisc */
1432 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1433
87990467
SH
1434 cl->classid = classid;
1435 cl->parent = parent;
1da177e4
LT
1436
1437 /* set class to be in HTB_CAN_SEND state */
1438 cl->tokens = hopt->buffer;
1439 cl->ctokens = hopt->cbuffer;
00c04af9 1440 cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC; /* 1min */
3bebcda2 1441 cl->t_c = psched_get_time();
1da177e4
LT
1442 cl->cmode = HTB_CAN_SEND;
1443
1444 /* attach to the hash list and parent's family */
0cef296d 1445 hlist_add_head(&cl->hlist, q->hash + htb_hash(classid));
87990467
SH
1446 list_add_tail(&cl->sibling,
1447 parent ? &parent->children : &q->root);
ee39e10c 1448 } else {
1e90474c 1449 if (tca[TCA_RATE])
ee39e10c
PM
1450 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1451 &sch->dev->queue_lock,
1e90474c 1452 tca[TCA_RATE]);
87990467 1453 sch_tree_lock(sch);
ee39e10c 1454 }
1da177e4
LT
1455
1456 /* it used to be a nasty bug here, we have to check that node
87990467 1457 is really leaf before changing cl->un.leaf ! */
1da177e4
LT
1458 if (!cl->level) {
1459 cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
1460 if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
87990467
SH
1461 printk(KERN_WARNING
1462 "HTB: quantum of class %X is small. Consider r2q change.\n",
1463 cl->classid);
1da177e4
LT
1464 cl->un.leaf.quantum = 1000;
1465 }
1466 if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
87990467
SH
1467 printk(KERN_WARNING
1468 "HTB: quantum of class %X is big. Consider r2q change.\n",
1469 cl->classid);
1da177e4
LT
1470 cl->un.leaf.quantum = 200000;
1471 }
1472 if (hopt->quantum)
1473 cl->un.leaf.quantum = hopt->quantum;
1474 if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO)
1475 cl->un.leaf.prio = TC_HTB_NUMPRIO - 1;
160d5e10
JP
1476
1477 /* backup for htb_parent_to_leaf */
1478 cl->quantum = cl->un.leaf.quantum;
1479 cl->prio = cl->un.leaf.prio;
1da177e4
LT
1480 }
1481
1482 cl->buffer = hopt->buffer;
1483 cl->cbuffer = hopt->cbuffer;
87990467
SH
1484 if (cl->rate)
1485 qdisc_put_rtab(cl->rate);
1486 cl->rate = rtab;
1487 if (cl->ceil)
1488 qdisc_put_rtab(cl->ceil);
1489 cl->ceil = ctab;
1da177e4
LT
1490 sch_tree_unlock(sch);
1491
1492 *arg = (unsigned long)cl;
1493 return 0;
1494
1495failure:
87990467
SH
1496 if (rtab)
1497 qdisc_put_rtab(rtab);
1498 if (ctab)
1499 qdisc_put_rtab(ctab);
1da177e4
LT
1500 return err;
1501}
1502
1503static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1504{
1505 struct htb_sched *q = qdisc_priv(sch);
1506 struct htb_class *cl = (struct htb_class *)arg;
1507 struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
3bf72957 1508
1da177e4
LT
1509 return fl;
1510}
1511
1512static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
87990467 1513 u32 classid)
1da177e4
LT
1514{
1515 struct htb_sched *q = qdisc_priv(sch);
87990467 1516 struct htb_class *cl = htb_find(classid, sch);
3bf72957 1517
1da177e4 1518 /*if (cl && !cl->level) return 0;
87990467
SH
1519 The line above used to be there to prevent attaching filters to
1520 leaves. But at least tc_index filter uses this just to get class
1521 for other reasons so that we have to allow for it.
1522 ----
1523 19.6.2002 As Werner explained it is ok - bind filter is just
1524 another way to "lock" the class - unlike "get" this lock can
1525 be broken by class during destroy IIUC.
1da177e4 1526 */
87990467
SH
1527 if (cl)
1528 cl->filter_cnt++;
1529 else
1da177e4
LT
1530 q->filter_cnt++;
1531 return (unsigned long)cl;
1532}
1533
1534static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1535{
1536 struct htb_sched *q = qdisc_priv(sch);
1537 struct htb_class *cl = (struct htb_class *)arg;
3bf72957 1538
87990467
SH
1539 if (cl)
1540 cl->filter_cnt--;
1541 else
1da177e4
LT
1542 q->filter_cnt--;
1543}
1544
1545static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1546{
1547 struct htb_sched *q = qdisc_priv(sch);
1548 int i;
1549
1550 if (arg->stop)
1551 return;
1552
1553 for (i = 0; i < HTB_HSIZE; i++) {
0cef296d
SH
1554 struct hlist_node *p;
1555 struct htb_class *cl;
1556
1557 hlist_for_each_entry(cl, p, q->hash + i, hlist) {
1da177e4
LT
1558 if (arg->count < arg->skip) {
1559 arg->count++;
1560 continue;
1561 }
1562 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1563 arg->stop = 1;
1564 return;
1565 }
1566 arg->count++;
1567 }
1568 }
1569}
1570
20fea08b 1571static const struct Qdisc_class_ops htb_class_ops = {
1da177e4
LT
1572 .graft = htb_graft,
1573 .leaf = htb_leaf,
256d61b8 1574 .qlen_notify = htb_qlen_notify,
1da177e4
LT
1575 .get = htb_get,
1576 .put = htb_put,
1577 .change = htb_change_class,
1578 .delete = htb_delete,
1579 .walk = htb_walk,
1580 .tcf_chain = htb_find_tcf,
1581 .bind_tcf = htb_bind_filter,
1582 .unbind_tcf = htb_unbind_filter,
1583 .dump = htb_dump_class,
1584 .dump_stats = htb_dump_class_stats,
1585};
1586
20fea08b 1587static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1da177e4
LT
1588 .next = NULL,
1589 .cl_ops = &htb_class_ops,
1590 .id = "htb",
1591 .priv_size = sizeof(struct htb_sched),
1592 .enqueue = htb_enqueue,
1593 .dequeue = htb_dequeue,
1594 .requeue = htb_requeue,
1595 .drop = htb_drop,
1596 .init = htb_init,
1597 .reset = htb_reset,
1598 .destroy = htb_destroy,
1599 .change = NULL /* htb_change */,
1600 .dump = htb_dump,
1601 .owner = THIS_MODULE,
1602};
1603
1604static int __init htb_module_init(void)
1605{
87990467 1606 return register_qdisc(&htb_qdisc_ops);
1da177e4 1607}
87990467 1608static void __exit htb_module_exit(void)
1da177e4 1609{
87990467 1610 unregister_qdisc(&htb_qdisc_ops);
1da177e4 1611}
87990467 1612
1da177e4
LT
1613module_init(htb_module_init)
1614module_exit(htb_module_exit)
1615MODULE_LICENSE("GPL");