]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/sched/sch_htb.c
pkt_sched: sch_hfsc: sch_htb: Add non-work-conserving warning handler.
[mirror_ubuntu-artful-kernel.git] / net / sched / sch_htb.c
CommitLineData
87990467 1/*
1da177e4
LT
2 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Martin Devera, <devik@cdi.cz>
10 *
11 * Credits (in time order) for older HTB versions:
12 * Stef Coene <stef.coene@docum.org>
13 * HTB support at LARTC mailing list
10297b99 14 * Ondrej Kraus, <krauso@barr.cz>
1da177e4
LT
15 * found missing INIT_QDISC(htb)
16 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
17 * helped a lot to locate nasty class stall bug
18 * Andi Kleen, Jamal Hadi, Bert Hubert
19 * code review and helpful comments on shaping
20 * Tomasz Wrona, <tw@eter.tym.pl>
21 * created test case so that I was able to fix nasty bug
22 * Wilfried Weissmann
23 * spotted bug in dequeue code and helped with fix
24 * Jiri Fojtasek
25 * fixed requeue routine
26 * and many others. thanks.
1da177e4 27 */
1da177e4 28#include <linux/module.h>
47083fc0 29#include <linux/moduleparam.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/kernel.h>
1da177e4 32#include <linux/string.h>
1da177e4 33#include <linux/errno.h>
1da177e4
LT
34#include <linux/skbuff.h>
35#include <linux/list.h>
36#include <linux/compiler.h>
0ba48053 37#include <linux/rbtree.h>
dc5fc579 38#include <net/netlink.h>
1da177e4 39#include <net/pkt_sched.h>
1da177e4
LT
40
41/* HTB algorithm.
42 Author: devik@cdi.cz
43 ========================================================================
44 HTB is like TBF with multiple classes. It is also similar to CBQ because
10297b99 45 it allows to assign priority to each class in hierarchy.
1da177e4
LT
46 In fact it is another implementation of Floyd's formal sharing.
47
48 Levels:
10297b99 49 Each class is assigned level. Leaf has ALWAYS level 0 and root
1da177e4
LT
50 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51 one less than their parent.
52*/
53
47083fc0 54static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
87990467 55#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
1da177e4
LT
56
57#if HTB_VER >> 16 != TC_HTB_PROTOVER
58#error "Mismatched sch_htb.c and pkt_sch.h"
59#endif
60
47083fc0
JDB
61/* Module parameter and sysfs export */
62module_param (htb_hysteresis, int, 0640);
63MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64
1da177e4
LT
65/* used internaly to keep status of single class */
66enum htb_cmode {
87990467
SH
67 HTB_CANT_SEND, /* class can't send and can't borrow */
68 HTB_MAY_BORROW, /* class can't send but may borrow */
69 HTB_CAN_SEND /* class can send */
1da177e4
LT
70};
71
72/* interior & leaf nodes; props specific to leaves are marked L: */
87990467 73struct htb_class {
f4c1f3e0 74 struct Qdisc_class_common common;
87990467 75 /* general class parameters */
87990467
SH
76 struct gnet_stats_basic bstats;
77 struct gnet_stats_queue qstats;
78 struct gnet_stats_rate_est rate_est;
79 struct tc_htb_xstats xstats; /* our special stats */
80 int refcnt; /* usage count of this class */
1da177e4 81
87990467
SH
82 /* topology */
83 int level; /* our level (see above) */
42077599 84 unsigned int children;
87990467 85 struct htb_class *parent; /* parent class */
87990467 86
c19f7a34
JP
87 int prio; /* these two are used only by leaves... */
88 int quantum; /* but stored for parent-to-leaf return */
89
87990467
SH
90 union {
91 struct htb_class_leaf {
92 struct Qdisc *q;
87990467
SH
93 int deficit[TC_HTB_MAXDEPTH];
94 struct list_head drop_list;
95 } leaf;
96 struct htb_class_inner {
97 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
98 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
99 /* When class changes from state 1->2 and disconnects from
100 parent's feed then we lost ptr value and start from the
101 first child again. Here we store classid of the
102 last valid ptr (used when ptr is NULL). */
103 u32 last_ptr_id[TC_HTB_NUMPRIO];
104 } inner;
105 } un;
106 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
107 struct rb_node pq_node; /* node for event queue */
fb983d45 108 psched_time_t pq_key;
87990467
SH
109
110 int prio_activity; /* for which prios are we active */
111 enum htb_cmode cmode; /* current mode of the class */
112
113 /* class attached filters */
114 struct tcf_proto *filter_list;
115 int filter_cnt;
116
87990467
SH
117 /* token bucket parameters */
118 struct qdisc_rate_table *rate; /* rate table of the class itself */
119 struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */
120 long buffer, cbuffer; /* token bucket depth/rate */
121 psched_tdiff_t mbuffer; /* max wait time */
122 long tokens, ctokens; /* current number of tokens */
123 psched_time_t t_c; /* checkpoint time */
1da177e4
LT
124};
125
87990467 126struct htb_sched {
f4c1f3e0 127 struct Qdisc_class_hash clhash;
0cef296d 128 struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
87990467
SH
129
130 /* self list - roots of self generating tree */
131 struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
132 int row_mask[TC_HTB_MAXDEPTH];
133 struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
134 u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
1da177e4 135
87990467
SH
136 /* self wait list - roots of wait PQs per row */
137 struct rb_root wait_pq[TC_HTB_MAXDEPTH];
1da177e4 138
87990467 139 /* time of nearest event per level (row) */
fb983d45 140 psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
1da177e4 141
87990467 142 int defcls; /* class where unclassified flows go to */
1da177e4 143
87990467
SH
144 /* filters for qdisc itself */
145 struct tcf_proto *filter_list;
1da177e4 146
87990467
SH
147 int rate2quantum; /* quant = rate / rate2quantum */
148 psched_time_t now; /* cached dequeue time */
fb983d45 149 struct qdisc_watchdog watchdog;
1da177e4 150
87990467
SH
151 /* non shaped skbs; let them go directly thru */
152 struct sk_buff_head direct_queue;
153 int direct_qlen; /* max qlen of above */
154
155 long direct_pkts;
1da177e4
LT
156};
157
1da177e4 158/* find class in global hash table using given handle */
87990467 159static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
1da177e4
LT
160{
161 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 162 struct Qdisc_class_common *clc;
0cef296d 163
f4c1f3e0
PM
164 clc = qdisc_class_find(&q->clhash, handle);
165 if (clc == NULL)
1da177e4 166 return NULL;
f4c1f3e0 167 return container_of(clc, struct htb_class, common);
1da177e4
LT
168}
169
170/**
171 * htb_classify - classify a packet into class
172 *
173 * It returns NULL if the packet should be dropped or -1 if the packet
174 * should be passed directly thru. In all other cases leaf class is returned.
175 * We allow direct class selection by classid in priority. The we examine
176 * filters in qdisc and in inner nodes (if higher filter points to the inner
177 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
10297b99 178 * internal fifo (direct). These packets then go directly thru. If we still
1da177e4
LT
179 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
180 * then finish and return direct queue.
181 */
182#define HTB_DIRECT (struct htb_class*)-1
1da177e4 183
87990467
SH
184static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
185 int *qerr)
1da177e4
LT
186{
187 struct htb_sched *q = qdisc_priv(sch);
188 struct htb_class *cl;
189 struct tcf_result res;
190 struct tcf_proto *tcf;
191 int result;
192
193 /* allow to select class by setting skb->priority to valid classid;
194 note that nfmark can be used too by attaching filter fw with no
195 rules in it */
196 if (skb->priority == sch->handle)
87990467
SH
197 return HTB_DIRECT; /* X:0 (direct flow) selected */
198 if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
1da177e4
LT
199 return cl;
200
c27f339a 201 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1da177e4
LT
202 tcf = q->filter_list;
203 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
204#ifdef CONFIG_NET_CLS_ACT
205 switch (result) {
206 case TC_ACT_QUEUED:
87990467 207 case TC_ACT_STOLEN:
378a2f09 208 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
1da177e4
LT
209 case TC_ACT_SHOT:
210 return NULL;
211 }
1da177e4 212#endif
87990467 213 if ((cl = (void *)res.class) == NULL) {
1da177e4 214 if (res.classid == sch->handle)
87990467
SH
215 return HTB_DIRECT; /* X:0 (direct flow) */
216 if ((cl = htb_find(res.classid, sch)) == NULL)
217 break; /* filter selected invalid classid */
1da177e4
LT
218 }
219 if (!cl->level)
87990467 220 return cl; /* we hit leaf; return it */
1da177e4
LT
221
222 /* we have got inner class; apply inner filter chain */
223 tcf = cl->filter_list;
224 }
225 /* classification failed; try to use default class */
87990467 226 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
1da177e4 227 if (!cl || cl->level)
87990467 228 return HTB_DIRECT; /* bad default .. this is safe bet */
1da177e4
LT
229 return cl;
230}
231
1da177e4
LT
232/**
233 * htb_add_to_id_tree - adds class to the round robin list
234 *
235 * Routine adds class to the list (actually tree) sorted by classid.
236 * Make sure that class is not already on such list for given prio.
237 */
87990467
SH
238static void htb_add_to_id_tree(struct rb_root *root,
239 struct htb_class *cl, int prio)
1da177e4
LT
240{
241 struct rb_node **p = &root->rb_node, *parent = NULL;
3bf72957 242
1da177e4 243 while (*p) {
87990467
SH
244 struct htb_class *c;
245 parent = *p;
1da177e4 246 c = rb_entry(parent, struct htb_class, node[prio]);
3bf72957 247
f4c1f3e0 248 if (cl->common.classid > c->common.classid)
1da177e4 249 p = &parent->rb_right;
87990467 250 else
1da177e4
LT
251 p = &parent->rb_left;
252 }
253 rb_link_node(&cl->node[prio], parent, p);
254 rb_insert_color(&cl->node[prio], root);
255}
256
257/**
258 * htb_add_to_wait_tree - adds class to the event queue with delay
259 *
260 * The class is added to priority event queue to indicate that class will
261 * change its mode in cl->pq_key microseconds. Make sure that class is not
262 * already in the queue.
263 */
87990467
SH
264static void htb_add_to_wait_tree(struct htb_sched *q,
265 struct htb_class *cl, long delay)
1da177e4
LT
266{
267 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
3bf72957 268
fb983d45
PM
269 cl->pq_key = q->now + delay;
270 if (cl->pq_key == q->now)
1da177e4
LT
271 cl->pq_key++;
272
273 /* update the nearest event cache */
fb983d45 274 if (q->near_ev_cache[cl->level] > cl->pq_key)
1da177e4 275 q->near_ev_cache[cl->level] = cl->pq_key;
87990467 276
1da177e4 277 while (*p) {
87990467
SH
278 struct htb_class *c;
279 parent = *p;
1da177e4 280 c = rb_entry(parent, struct htb_class, pq_node);
fb983d45 281 if (cl->pq_key >= c->pq_key)
1da177e4 282 p = &parent->rb_right;
87990467 283 else
1da177e4
LT
284 p = &parent->rb_left;
285 }
286 rb_link_node(&cl->pq_node, parent, p);
287 rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
288}
289
290/**
291 * htb_next_rb_node - finds next node in binary tree
292 *
293 * When we are past last key we return NULL.
294 * Average complexity is 2 steps per call.
295 */
3696f625 296static inline void htb_next_rb_node(struct rb_node **n)
1da177e4
LT
297{
298 *n = rb_next(*n);
299}
300
301/**
302 * htb_add_class_to_row - add class to its row
303 *
304 * The class is added to row at priorities marked in mask.
305 * It does nothing if mask == 0.
306 */
87990467
SH
307static inline void htb_add_class_to_row(struct htb_sched *q,
308 struct htb_class *cl, int mask)
1da177e4 309{
1da177e4
LT
310 q->row_mask[cl->level] |= mask;
311 while (mask) {
312 int prio = ffz(~mask);
313 mask &= ~(1 << prio);
87990467 314 htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
1da177e4
LT
315 }
316}
317
3696f625
SH
318/* If this triggers, it is a bug in this code, but it need not be fatal */
319static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
320{
81771b3b 321 if (RB_EMPTY_NODE(rb)) {
3696f625
SH
322 WARN_ON(1);
323 } else {
324 rb_erase(rb, root);
325 RB_CLEAR_NODE(rb);
326 }
327}
328
329
1da177e4
LT
330/**
331 * htb_remove_class_from_row - removes class from its row
332 *
333 * The class is removed from row at priorities marked in mask.
334 * It does nothing if mask == 0.
335 */
87990467
SH
336static inline void htb_remove_class_from_row(struct htb_sched *q,
337 struct htb_class *cl, int mask)
1da177e4
LT
338{
339 int m = 0;
3bf72957 340
1da177e4
LT
341 while (mask) {
342 int prio = ffz(~mask);
3696f625 343
1da177e4 344 mask &= ~(1 << prio);
87990467
SH
345 if (q->ptr[cl->level][prio] == cl->node + prio)
346 htb_next_rb_node(q->ptr[cl->level] + prio);
3696f625
SH
347
348 htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
87990467 349 if (!q->row[cl->level][prio].rb_node)
1da177e4
LT
350 m |= 1 << prio;
351 }
1da177e4
LT
352 q->row_mask[cl->level] &= ~m;
353}
354
355/**
356 * htb_activate_prios - creates active classe's feed chain
357 *
358 * The class is connected to ancestors and/or appropriate rows
10297b99 359 * for priorities it is participating on. cl->cmode must be new
1da177e4
LT
360 * (activated) mode. It does nothing if cl->prio_activity == 0.
361 */
87990467 362static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
1da177e4
LT
363{
364 struct htb_class *p = cl->parent;
87990467 365 long m, mask = cl->prio_activity;
1da177e4
LT
366
367 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
368 m = mask;
369 while (m) {
1da177e4
LT
370 int prio = ffz(~m);
371 m &= ~(1 << prio);
87990467 372
1da177e4
LT
373 if (p->un.inner.feed[prio].rb_node)
374 /* parent already has its feed in use so that
375 reset bit in mask as parent is already ok */
376 mask &= ~(1 << prio);
87990467
SH
377
378 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
1da177e4 379 }
1da177e4 380 p->prio_activity |= mask;
87990467
SH
381 cl = p;
382 p = cl->parent;
3bf72957 383
1da177e4
LT
384 }
385 if (cl->cmode == HTB_CAN_SEND && mask)
87990467 386 htb_add_class_to_row(q, cl, mask);
1da177e4
LT
387}
388
389/**
390 * htb_deactivate_prios - remove class from feed chain
391 *
10297b99 392 * cl->cmode must represent old mode (before deactivation). It does
1da177e4
LT
393 * nothing if cl->prio_activity == 0. Class is removed from all feed
394 * chains and rows.
395 */
396static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
397{
398 struct htb_class *p = cl->parent;
87990467 399 long m, mask = cl->prio_activity;
1da177e4
LT
400
401 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
402 m = mask;
403 mask = 0;
1da177e4
LT
404 while (m) {
405 int prio = ffz(~m);
406 m &= ~(1 << prio);
87990467
SH
407
408 if (p->un.inner.ptr[prio] == cl->node + prio) {
1da177e4
LT
409 /* we are removing child which is pointed to from
410 parent feed - forget the pointer but remember
411 classid */
f4c1f3e0 412 p->un.inner.last_ptr_id[prio] = cl->common.classid;
1da177e4
LT
413 p->un.inner.ptr[prio] = NULL;
414 }
87990467 415
3696f625 416 htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
87990467
SH
417
418 if (!p->un.inner.feed[prio].rb_node)
1da177e4
LT
419 mask |= 1 << prio;
420 }
3bf72957 421
1da177e4 422 p->prio_activity &= ~mask;
87990467
SH
423 cl = p;
424 p = cl->parent;
3bf72957 425
1da177e4 426 }
87990467
SH
427 if (cl->cmode == HTB_CAN_SEND && mask)
428 htb_remove_class_from_row(q, cl, mask);
1da177e4
LT
429}
430
18a63e86
SH
431static inline long htb_lowater(const struct htb_class *cl)
432{
47083fc0
JDB
433 if (htb_hysteresis)
434 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
435 else
436 return 0;
18a63e86
SH
437}
438static inline long htb_hiwater(const struct htb_class *cl)
439{
47083fc0
JDB
440 if (htb_hysteresis)
441 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
442 else
443 return 0;
18a63e86 444}
47083fc0 445
18a63e86 446
1da177e4
LT
447/**
448 * htb_class_mode - computes and returns current class mode
449 *
450 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
451 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
10297b99 452 * from now to time when cl will change its state.
1da177e4 453 * Also it is worth to note that class mode doesn't change simply
10297b99 454 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
1da177e4
LT
455 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
456 * mode transitions per time unit. The speed gain is about 1/6.
457 */
87990467
SH
458static inline enum htb_cmode
459htb_class_mode(struct htb_class *cl, long *diff)
1da177e4 460{
87990467 461 long toks;
1da177e4 462
87990467
SH
463 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
464 *diff = -toks;
465 return HTB_CANT_SEND;
466 }
18a63e86 467
87990467
SH
468 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
469 return HTB_CAN_SEND;
1da177e4 470
87990467
SH
471 *diff = -toks;
472 return HTB_MAY_BORROW;
1da177e4
LT
473}
474
475/**
476 * htb_change_class_mode - changes classe's mode
477 *
478 * This should be the only way how to change classe's mode under normal
479 * cirsumstances. Routine will update feed lists linkage, change mode
480 * and add class to the wait event queue if appropriate. New mode should
481 * be different from old one and cl->pq_key has to be valid if changing
482 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
483 */
87990467 484static void
1da177e4 485htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
87990467
SH
486{
487 enum htb_cmode new_mode = htb_class_mode(cl, diff);
1da177e4
LT
488
489 if (new_mode == cl->cmode)
87990467
SH
490 return;
491
492 if (cl->prio_activity) { /* not necessary: speed optimization */
493 if (cl->cmode != HTB_CANT_SEND)
494 htb_deactivate_prios(q, cl);
1da177e4 495 cl->cmode = new_mode;
87990467
SH
496 if (new_mode != HTB_CANT_SEND)
497 htb_activate_prios(q, cl);
498 } else
1da177e4
LT
499 cl->cmode = new_mode;
500}
501
502/**
10297b99 503 * htb_activate - inserts leaf cl into appropriate active feeds
1da177e4
LT
504 *
505 * Routine learns (new) priority of leaf and activates feed chain
506 * for the prio. It can be called on already active leaf safely.
507 * It also adds leaf into droplist.
508 */
87990467 509static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
1da177e4 510{
547b792c 511 WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
3bf72957 512
1da177e4 513 if (!cl->prio_activity) {
c19f7a34 514 cl->prio_activity = 1 << cl->prio;
87990467
SH
515 htb_activate_prios(q, cl);
516 list_add_tail(&cl->un.leaf.drop_list,
c19f7a34 517 q->drops + cl->prio);
1da177e4
LT
518 }
519}
520
521/**
10297b99 522 * htb_deactivate - remove leaf cl from active feeds
1da177e4
LT
523 *
524 * Make sure that leaf is active. In the other words it can't be called
525 * with non-active leaf. It also removes class from the drop list.
526 */
87990467 527static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
1da177e4 528{
547b792c 529 WARN_ON(!cl->prio_activity);
3bf72957 530
87990467 531 htb_deactivate_prios(q, cl);
1da177e4
LT
532 cl->prio_activity = 0;
533 list_del_init(&cl->un.leaf.drop_list);
534}
535
536static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
537{
f30ab418 538 int uninitialized_var(ret);
87990467
SH
539 struct htb_sched *q = qdisc_priv(sch);
540 struct htb_class *cl = htb_classify(skb, sch, &ret);
541
542 if (cl == HTB_DIRECT) {
543 /* enqueue to helper queue */
544 if (q->direct_queue.qlen < q->direct_qlen) {
545 __skb_queue_tail(&q->direct_queue, skb);
546 q->direct_pkts++;
547 } else {
548 kfree_skb(skb);
549 sch->qstats.drops++;
550 return NET_XMIT_DROP;
551 }
1da177e4 552#ifdef CONFIG_NET_CLS_ACT
87990467 553 } else if (!cl) {
c27f339a 554 if (ret & __NET_XMIT_BYPASS)
87990467
SH
555 sch->qstats.drops++;
556 kfree_skb(skb);
557 return ret;
1da177e4 558#endif
378a2f09
JP
559 } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
560 if (net_xmit_drop_count(ret)) {
561 sch->qstats.drops++;
562 cl->qstats.drops++;
563 }
69747650 564 return ret;
87990467 565 } else {
c9726d68
RM
566 cl->bstats.packets +=
567 skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
0abf77e5 568 cl->bstats.bytes += qdisc_pkt_len(skb);
87990467
SH
569 htb_activate(q, cl);
570 }
571
572 sch->q.qlen++;
c9726d68 573 sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
0abf77e5 574 sch->bstats.bytes += qdisc_pkt_len(skb);
87990467 575 return NET_XMIT_SUCCESS;
1da177e4
LT
576}
577
59e4220a
JP
578static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, long diff)
579{
580 long toks = diff + cl->tokens;
581
582 if (toks > cl->buffer)
583 toks = cl->buffer;
584 toks -= (long) qdisc_l2t(cl->rate, bytes);
585 if (toks <= -cl->mbuffer)
586 toks = 1 - cl->mbuffer;
587
588 cl->tokens = toks;
589}
590
591static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, long diff)
592{
593 long toks = diff + cl->ctokens;
594
595 if (toks > cl->cbuffer)
596 toks = cl->cbuffer;
597 toks -= (long) qdisc_l2t(cl->ceil, bytes);
598 if (toks <= -cl->mbuffer)
599 toks = 1 - cl->mbuffer;
600
601 cl->ctokens = toks;
602}
603
1da177e4
LT
604/**
605 * htb_charge_class - charges amount "bytes" to leaf and ancestors
606 *
607 * Routine assumes that packet "bytes" long was dequeued from leaf cl
608 * borrowing from "level". It accounts bytes to ceil leaky bucket for
609 * leaf and all ancestors and to rate bucket for ancestors at levels
610 * "level" and higher. It also handles possible change of mode resulting
611 * from the update. Note that mode can also increase here (MAY_BORROW to
612 * CAN_SEND) because we can use more precise clock that event queue here.
613 * In such case we remove class from event queue first.
614 */
87990467 615static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
c9726d68 616 int level, struct sk_buff *skb)
87990467 617{
0abf77e5 618 int bytes = qdisc_pkt_len(skb);
1da177e4 619 enum htb_cmode old_mode;
59e4220a 620 long diff;
1da177e4
LT
621
622 while (cl) {
03cc45c0 623 diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
1da177e4 624 if (cl->level >= level) {
87990467
SH
625 if (cl->level == level)
626 cl->xstats.lends++;
59e4220a 627 htb_accnt_tokens(cl, bytes, diff);
1da177e4
LT
628 } else {
629 cl->xstats.borrows++;
87990467 630 cl->tokens += diff; /* we moved t_c; update tokens */
1da177e4 631 }
59e4220a 632 htb_accnt_ctokens(cl, bytes, diff);
1da177e4 633 cl->t_c = q->now;
1da177e4 634
87990467
SH
635 old_mode = cl->cmode;
636 diff = 0;
637 htb_change_class_mode(q, cl, &diff);
1da177e4
LT
638 if (old_mode != cl->cmode) {
639 if (old_mode != HTB_CAN_SEND)
3696f625 640 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1da177e4 641 if (cl->cmode != HTB_CAN_SEND)
87990467 642 htb_add_to_wait_tree(q, cl, diff);
1da177e4 643 }
1da177e4
LT
644
645 /* update byte stats except for leaves which are already updated */
646 if (cl->level) {
647 cl->bstats.bytes += bytes;
c9726d68
RM
648 cl->bstats.packets += skb_is_gso(skb)?
649 skb_shinfo(skb)->gso_segs:1;
1da177e4
LT
650 }
651 cl = cl->parent;
652 }
653}
654
655/**
656 * htb_do_events - make mode changes to classes at the level
657 *
fb983d45 658 * Scans event queue for pending events and applies them. Returns time of
1da177e4 659 * next pending event (0 for no event in pq).
fb983d45 660 * Note: Applied are events whose have cl->pq_key <= q->now.
1da177e4 661 */
a73be040
JP
662static psched_time_t htb_do_events(struct htb_sched *q, int level,
663 unsigned long start)
1da177e4 664{
8f3ea33a
MD
665 /* don't run for longer than 2 jiffies; 2 is used instead of
666 1 to simplify things when jiffy is going to be incremented
667 too soon */
a73be040 668 unsigned long stop_at = start + 2;
8f3ea33a 669 while (time_before(jiffies, stop_at)) {
1da177e4
LT
670 struct htb_class *cl;
671 long diff;
30bdbe39
AM
672 struct rb_node *p = rb_first(&q->wait_pq[level]);
673
87990467
SH
674 if (!p)
675 return 0;
1da177e4
LT
676
677 cl = rb_entry(p, struct htb_class, pq_node);
fb983d45
PM
678 if (cl->pq_key > q->now)
679 return cl->pq_key;
680
3696f625 681 htb_safe_rb_erase(p, q->wait_pq + level);
03cc45c0 682 diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
87990467 683 htb_change_class_mode(q, cl, &diff);
1da177e4 684 if (cl->cmode != HTB_CAN_SEND)
87990467 685 htb_add_to_wait_tree(q, cl, diff);
1da177e4 686 }
c0851347
JP
687 /* too much load - let's continue on next jiffie (including above) */
688 return q->now + 2 * PSCHED_TICKS_PER_SEC / HZ;
1da177e4
LT
689}
690
691/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
692 is no such one exists. */
87990467
SH
693static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
694 u32 id)
1da177e4
LT
695{
696 struct rb_node *r = NULL;
697 while (n) {
87990467
SH
698 struct htb_class *cl =
699 rb_entry(n, struct htb_class, node[prio]);
87990467 700
f4c1f3e0 701 if (id > cl->common.classid) {
1da177e4 702 n = n->rb_right;
1b5c0077 703 } else if (id < cl->common.classid) {
1da177e4
LT
704 r = n;
705 n = n->rb_left;
1b5c0077
JP
706 } else {
707 return n;
1da177e4
LT
708 }
709 }
710 return r;
711}
712
713/**
714 * htb_lookup_leaf - returns next leaf class in DRR order
715 *
716 * Find leaf where current feed pointers points to.
717 */
87990467
SH
718static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
719 struct rb_node **pptr, u32 * pid)
1da177e4
LT
720{
721 int i;
722 struct {
723 struct rb_node *root;
724 struct rb_node **pptr;
725 u32 *pid;
87990467
SH
726 } stk[TC_HTB_MAXDEPTH], *sp = stk;
727
512bb43e 728 BUG_ON(!tree->rb_node);
1da177e4
LT
729 sp->root = tree->rb_node;
730 sp->pptr = pptr;
731 sp->pid = pid;
732
733 for (i = 0; i < 65535; i++) {
87990467 734 if (!*sp->pptr && *sp->pid) {
10297b99 735 /* ptr was invalidated but id is valid - try to recover
1da177e4 736 the original or next ptr */
87990467
SH
737 *sp->pptr =
738 htb_id_find_next_upper(prio, sp->root, *sp->pid);
1da177e4 739 }
87990467
SH
740 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
741 can become out of date quickly */
742 if (!*sp->pptr) { /* we are at right end; rewind & go up */
1da177e4 743 *sp->pptr = sp->root;
87990467 744 while ((*sp->pptr)->rb_left)
1da177e4
LT
745 *sp->pptr = (*sp->pptr)->rb_left;
746 if (sp > stk) {
747 sp--;
512bb43e
JP
748 if (!*sp->pptr) {
749 WARN_ON(1);
87990467 750 return NULL;
512bb43e 751 }
87990467 752 htb_next_rb_node(sp->pptr);
1da177e4
LT
753 }
754 } else {
755 struct htb_class *cl;
87990467
SH
756 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
757 if (!cl->level)
1da177e4
LT
758 return cl;
759 (++sp)->root = cl->un.inner.feed[prio].rb_node;
87990467
SH
760 sp->pptr = cl->un.inner.ptr + prio;
761 sp->pid = cl->un.inner.last_ptr_id + prio;
1da177e4
LT
762 }
763 }
547b792c 764 WARN_ON(1);
1da177e4
LT
765 return NULL;
766}
767
768/* dequeues packet at given priority and level; call only if
769 you are sure that there is active class at prio/level */
87990467
SH
770static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
771 int level)
1da177e4
LT
772{
773 struct sk_buff *skb = NULL;
87990467 774 struct htb_class *cl, *start;
1da177e4 775 /* look initial class up in the row */
87990467
SH
776 start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
777 q->ptr[level] + prio,
778 q->last_ptr_id[level] + prio);
779
1da177e4
LT
780 do {
781next:
512bb43e 782 if (unlikely(!cl))
87990467 783 return NULL;
1da177e4
LT
784
785 /* class can be empty - it is unlikely but can be true if leaf
786 qdisc drops packets in enqueue routine or if someone used
10297b99 787 graft operation on the leaf since last dequeue;
1da177e4
LT
788 simply deactivate and skip such class */
789 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
790 struct htb_class *next;
87990467 791 htb_deactivate(q, cl);
1da177e4
LT
792
793 /* row/level might become empty */
794 if ((q->row_mask[level] & (1 << prio)) == 0)
87990467 795 return NULL;
1da177e4 796
87990467
SH
797 next = htb_lookup_leaf(q->row[level] + prio,
798 prio, q->ptr[level] + prio,
799 q->last_ptr_id[level] + prio);
800
801 if (cl == start) /* fix start if we just deleted it */
1da177e4
LT
802 start = next;
803 cl = next;
804 goto next;
805 }
87990467
SH
806
807 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
808 if (likely(skb != NULL))
1da177e4 809 break;
633fe66e 810
b00355db 811 qdisc_warn_nonwc("htb", cl->un.leaf.q);
87990467
SH
812 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
813 ptr[0]) + prio);
814 cl = htb_lookup_leaf(q->row[level] + prio, prio,
815 q->ptr[level] + prio,
816 q->last_ptr_id[level] + prio);
1da177e4
LT
817
818 } while (cl != start);
819
820 if (likely(skb != NULL)) {
0abf77e5
JK
821 cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
822 if (cl->un.leaf.deficit[level] < 0) {
c19f7a34 823 cl->un.leaf.deficit[level] += cl->quantum;
87990467
SH
824 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
825 ptr[0]) + prio);
1da177e4
LT
826 }
827 /* this used to be after charge_class but this constelation
828 gives us slightly better performance */
829 if (!cl->un.leaf.q->q.qlen)
87990467 830 htb_deactivate(q, cl);
c9726d68 831 htb_charge_class(q, cl, level, skb);
1da177e4
LT
832 }
833 return skb;
834}
835
1da177e4
LT
836static struct sk_buff *htb_dequeue(struct Qdisc *sch)
837{
838 struct sk_buff *skb = NULL;
839 struct htb_sched *q = qdisc_priv(sch);
840 int level;
fb983d45 841 psched_time_t next_event;
a73be040 842 unsigned long start_at;
1da177e4
LT
843
844 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
87990467
SH
845 skb = __skb_dequeue(&q->direct_queue);
846 if (skb != NULL) {
1da177e4
LT
847 sch->flags &= ~TCQ_F_THROTTLED;
848 sch->q.qlen--;
849 return skb;
850 }
851
87990467
SH
852 if (!sch->q.qlen)
853 goto fin;
3bebcda2 854 q->now = psched_get_time();
a73be040 855 start_at = jiffies;
1da177e4 856
fb983d45 857 next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
633fe66e 858
1da177e4
LT
859 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
860 /* common case optimization - skip event handler quickly */
861 int m;
fb983d45
PM
862 psched_time_t event;
863
864 if (q->now >= q->near_ev_cache[level]) {
a73be040 865 event = htb_do_events(q, level, start_at);
2e4b3b0e
PM
866 if (!event)
867 event = q->now + PSCHED_TICKS_PER_SEC;
868 q->near_ev_cache[level] = event;
1da177e4 869 } else
fb983d45
PM
870 event = q->near_ev_cache[level];
871
c0851347 872 if (next_event > event)
fb983d45 873 next_event = event;
87990467 874
1da177e4
LT
875 m = ~q->row_mask[level];
876 while (m != (int)(-1)) {
87990467 877 int prio = ffz(m);
1da177e4 878 m |= 1 << prio;
87990467 879 skb = htb_dequeue_tree(q, prio, level);
1da177e4
LT
880 if (likely(skb != NULL)) {
881 sch->q.qlen--;
882 sch->flags &= ~TCQ_F_THROTTLED;
883 goto fin;
884 }
885 }
886 }
fb983d45
PM
887 sch->qstats.overlimits++;
888 qdisc_watchdog_schedule(&q->watchdog, next_event);
1da177e4 889fin:
1da177e4
LT
890 return skb;
891}
892
893/* try to drop from each class (by prio) until one succeed */
87990467 894static unsigned int htb_drop(struct Qdisc *sch)
1da177e4
LT
895{
896 struct htb_sched *q = qdisc_priv(sch);
897 int prio;
898
899 for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
900 struct list_head *p;
87990467 901 list_for_each(p, q->drops + prio) {
1da177e4
LT
902 struct htb_class *cl = list_entry(p, struct htb_class,
903 un.leaf.drop_list);
904 unsigned int len;
87990467
SH
905 if (cl->un.leaf.q->ops->drop &&
906 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
1da177e4
LT
907 sch->q.qlen--;
908 if (!cl->un.leaf.q->q.qlen)
87990467 909 htb_deactivate(q, cl);
1da177e4
LT
910 return len;
911 }
912 }
913 }
914 return 0;
915}
916
917/* reset all classes */
918/* always caled under BH & queue lock */
87990467 919static void htb_reset(struct Qdisc *sch)
1da177e4
LT
920{
921 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0
PM
922 struct htb_class *cl;
923 struct hlist_node *n;
924 unsigned int i;
0cef296d 925
f4c1f3e0
PM
926 for (i = 0; i < q->clhash.hashsize; i++) {
927 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
1da177e4 928 if (cl->level)
87990467 929 memset(&cl->un.inner, 0, sizeof(cl->un.inner));
1da177e4 930 else {
87990467 931 if (cl->un.leaf.q)
1da177e4
LT
932 qdisc_reset(cl->un.leaf.q);
933 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
934 }
935 cl->prio_activity = 0;
936 cl->cmode = HTB_CAN_SEND;
1da177e4
LT
937
938 }
939 }
fb983d45 940 qdisc_watchdog_cancel(&q->watchdog);
1da177e4
LT
941 __skb_queue_purge(&q->direct_queue);
942 sch->q.qlen = 0;
87990467
SH
943 memset(q->row, 0, sizeof(q->row));
944 memset(q->row_mask, 0, sizeof(q->row_mask));
945 memset(q->wait_pq, 0, sizeof(q->wait_pq));
946 memset(q->ptr, 0, sizeof(q->ptr));
1da177e4 947 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 948 INIT_LIST_HEAD(q->drops + i);
1da177e4
LT
949}
950
27a3421e
PM
951static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
952 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
953 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
954 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
955 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
956};
957
1e90474c 958static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
959{
960 struct htb_sched *q = qdisc_priv(sch);
1e90474c 961 struct nlattr *tb[TCA_HTB_INIT + 1];
1da177e4 962 struct tc_htb_glob *gopt;
cee63723 963 int err;
1da177e4 964 int i;
cee63723
PM
965
966 if (!opt)
967 return -EINVAL;
968
27a3421e 969 err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
cee63723
PM
970 if (err < 0)
971 return err;
972
27a3421e 973 if (tb[TCA_HTB_INIT] == NULL) {
1da177e4
LT
974 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
975 return -EINVAL;
976 }
1e90474c 977 gopt = nla_data(tb[TCA_HTB_INIT]);
1da177e4 978 if (gopt->version != HTB_VER >> 16) {
87990467
SH
979 printk(KERN_ERR
980 "HTB: need tc/htb version %d (minor is %d), you have %d\n",
981 HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
1da177e4
LT
982 return -EINVAL;
983 }
1da177e4 984
f4c1f3e0
PM
985 err = qdisc_class_hash_init(&q->clhash);
986 if (err < 0)
987 return err;
1da177e4 988 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 989 INIT_LIST_HEAD(q->drops + i);
1da177e4 990
fb983d45 991 qdisc_watchdog_init(&q->watchdog, sch);
1da177e4
LT
992 skb_queue_head_init(&q->direct_queue);
993
5ce2d488 994 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
87990467 995 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1da177e4 996 q->direct_qlen = 2;
1da177e4 997
1da177e4
LT
998 if ((q->rate2quantum = gopt->rate2quantum) < 1)
999 q->rate2quantum = 1;
1000 q->defcls = gopt->defcls;
1001
1002 return 0;
1003}
1004
1005static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1006{
102396ae 1007 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1da177e4 1008 struct htb_sched *q = qdisc_priv(sch);
4b3550ef 1009 struct nlattr *nest;
1da177e4 1010 struct tc_htb_glob gopt;
4b3550ef 1011
7698b4fc 1012 spin_lock_bh(root_lock);
1da177e4 1013
4b3550ef 1014 gopt.direct_pkts = q->direct_pkts;
1da177e4
LT
1015 gopt.version = HTB_VER;
1016 gopt.rate2quantum = q->rate2quantum;
1017 gopt.defcls = q->defcls;
3bf72957 1018 gopt.debug = 0;
4b3550ef
PM
1019
1020 nest = nla_nest_start(skb, TCA_OPTIONS);
1021 if (nest == NULL)
1022 goto nla_put_failure;
1e90474c 1023 NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
4b3550ef
PM
1024 nla_nest_end(skb, nest);
1025
7698b4fc 1026 spin_unlock_bh(root_lock);
1da177e4 1027 return skb->len;
4b3550ef 1028
1e90474c 1029nla_put_failure:
7698b4fc 1030 spin_unlock_bh(root_lock);
4b3550ef 1031 nla_nest_cancel(skb, nest);
1da177e4
LT
1032 return -1;
1033}
1034
1035static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
87990467 1036 struct sk_buff *skb, struct tcmsg *tcm)
1da177e4 1037{
87990467 1038 struct htb_class *cl = (struct htb_class *)arg;
102396ae 1039 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
4b3550ef 1040 struct nlattr *nest;
1da177e4
LT
1041 struct tc_htb_opt opt;
1042
7698b4fc 1043 spin_lock_bh(root_lock);
f4c1f3e0
PM
1044 tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1045 tcm->tcm_handle = cl->common.classid;
1da177e4
LT
1046 if (!cl->level && cl->un.leaf.q)
1047 tcm->tcm_info = cl->un.leaf.q->handle;
1048
4b3550ef
PM
1049 nest = nla_nest_start(skb, TCA_OPTIONS);
1050 if (nest == NULL)
1051 goto nla_put_failure;
1da177e4 1052
87990467 1053 memset(&opt, 0, sizeof(opt));
1da177e4 1054
87990467
SH
1055 opt.rate = cl->rate->rate;
1056 opt.buffer = cl->buffer;
1057 opt.ceil = cl->ceil->rate;
1058 opt.cbuffer = cl->cbuffer;
c19f7a34
JP
1059 opt.quantum = cl->quantum;
1060 opt.prio = cl->prio;
87990467 1061 opt.level = cl->level;
1e90474c 1062 NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
4b3550ef
PM
1063
1064 nla_nest_end(skb, nest);
7698b4fc 1065 spin_unlock_bh(root_lock);
1da177e4 1066 return skb->len;
4b3550ef 1067
1e90474c 1068nla_put_failure:
7698b4fc 1069 spin_unlock_bh(root_lock);
4b3550ef 1070 nla_nest_cancel(skb, nest);
1da177e4
LT
1071 return -1;
1072}
1073
1074static int
87990467 1075htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1da177e4 1076{
87990467 1077 struct htb_class *cl = (struct htb_class *)arg;
1da177e4 1078
1da177e4
LT
1079 if (!cl->level && cl->un.leaf.q)
1080 cl->qstats.qlen = cl->un.leaf.q->q.qlen;
1081 cl->xstats.tokens = cl->tokens;
1082 cl->xstats.ctokens = cl->ctokens;
1083
1084 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1085 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1086 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1087 return -1;
1088
1089 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1090}
1091
1092static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
87990467 1093 struct Qdisc **old)
1da177e4 1094{
87990467 1095 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1096
1097 if (cl && !cl->level) {
9f9afec4 1098 if (new == NULL &&
5ce2d488 1099 (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd 1100 &pfifo_qdisc_ops,
f4c1f3e0 1101 cl->common.classid))
87990467
SH
1102 == NULL)
1103 return -ENOBUFS;
1da177e4 1104 sch_tree_lock(sch);
b94c8afc
PM
1105 *old = cl->un.leaf.q;
1106 cl->un.leaf.q = new;
1107 if (*old != NULL) {
256d61b8 1108 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1da177e4
LT
1109 qdisc_reset(*old);
1110 }
1111 sch_tree_unlock(sch);
1112 return 0;
1113 }
1114 return -ENOENT;
1115}
1116
87990467 1117static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1da177e4 1118{
87990467 1119 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1120 return (cl && !cl->level) ? cl->un.leaf.q : NULL;
1121}
1122
256d61b8
PM
1123static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1124{
1125 struct htb_class *cl = (struct htb_class *)arg;
1126
1127 if (cl->un.leaf.q->q.qlen == 0)
1128 htb_deactivate(qdisc_priv(sch), cl);
1129}
1130
1da177e4
LT
1131static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1132{
87990467
SH
1133 struct htb_class *cl = htb_find(classid, sch);
1134 if (cl)
1da177e4
LT
1135 cl->refcnt++;
1136 return (unsigned long)cl;
1137}
1138
160d5e10
JP
1139static inline int htb_parent_last_child(struct htb_class *cl)
1140{
1141 if (!cl->parent)
1142 /* the root class */
1143 return 0;
42077599 1144 if (cl->parent->children > 1)
160d5e10
JP
1145 /* not the last child */
1146 return 0;
160d5e10
JP
1147 return 1;
1148}
1149
3ba08b00
JP
1150static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1151 struct Qdisc *new_q)
160d5e10
JP
1152{
1153 struct htb_class *parent = cl->parent;
1154
547b792c 1155 WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
160d5e10 1156
3ba08b00
JP
1157 if (parent->cmode != HTB_CAN_SEND)
1158 htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
1159
160d5e10
JP
1160 parent->level = 0;
1161 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1162 INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1163 parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
160d5e10
JP
1164 parent->tokens = parent->buffer;
1165 parent->ctokens = parent->cbuffer;
3bebcda2 1166 parent->t_c = psched_get_time();
160d5e10
JP
1167 parent->cmode = HTB_CAN_SEND;
1168}
1169
87990467 1170static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1da177e4 1171{
1da177e4 1172 if (!cl->level) {
547b792c 1173 WARN_ON(!cl->un.leaf.q);
1da177e4
LT
1174 qdisc_destroy(cl->un.leaf.q);
1175 }
ee39e10c 1176 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1da177e4
LT
1177 qdisc_put_rtab(cl->rate);
1178 qdisc_put_rtab(cl->ceil);
87990467 1179
ff31ab56 1180 tcf_destroy_chain(&cl->filter_list);
1da177e4
LT
1181 kfree(cl);
1182}
1183
1184/* always caled under BH & queue lock */
87990467 1185static void htb_destroy(struct Qdisc *sch)
1da177e4
LT
1186{
1187 struct htb_sched *q = qdisc_priv(sch);
fbd8f137
PM
1188 struct hlist_node *n, *next;
1189 struct htb_class *cl;
1190 unsigned int i;
1da177e4 1191
fb983d45 1192 qdisc_watchdog_cancel(&q->watchdog);
1da177e4 1193 /* This line used to be after htb_destroy_class call below
10297b99 1194 and surprisingly it worked in 2.4. But it must precede it
1da177e4
LT
1195 because filter need its target class alive to be able to call
1196 unbind_filter on it (without Oops). */
ff31ab56 1197 tcf_destroy_chain(&q->filter_list);
87990467 1198
f4c1f3e0
PM
1199 for (i = 0; i < q->clhash.hashsize; i++) {
1200 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
fbd8f137
PM
1201 tcf_destroy_chain(&cl->filter_list);
1202 }
f4c1f3e0
PM
1203 for (i = 0; i < q->clhash.hashsize; i++) {
1204 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
1205 common.hnode)
fbd8f137
PM
1206 htb_destroy_class(sch, cl);
1207 }
f4c1f3e0 1208 qdisc_class_hash_destroy(&q->clhash);
1da177e4
LT
1209 __skb_queue_purge(&q->direct_queue);
1210}
1211
1212static int htb_delete(struct Qdisc *sch, unsigned long arg)
1213{
1214 struct htb_sched *q = qdisc_priv(sch);
87990467 1215 struct htb_class *cl = (struct htb_class *)arg;
256d61b8 1216 unsigned int qlen;
160d5e10
JP
1217 struct Qdisc *new_q = NULL;
1218 int last_child = 0;
1da177e4
LT
1219
1220 // TODO: why don't allow to delete subtree ? references ? does
1221 // tc subsys quarantee us that in htb_destroy it holds no class
1222 // refs so that we can remove children safely there ?
42077599 1223 if (cl->children || cl->filter_cnt)
1da177e4 1224 return -EBUSY;
87990467 1225
160d5e10 1226 if (!cl->level && htb_parent_last_child(cl)) {
5ce2d488 1227 new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd
DM
1228 &pfifo_qdisc_ops,
1229 cl->parent->common.classid);
160d5e10
JP
1230 last_child = 1;
1231 }
1232
1da177e4 1233 sch_tree_lock(sch);
87990467 1234
814a175e 1235 if (!cl->level) {
256d61b8 1236 qlen = cl->un.leaf.q->q.qlen;
814a175e 1237 qdisc_reset(cl->un.leaf.q);
256d61b8 1238 qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
814a175e
PM
1239 }
1240
f4c1f3e0
PM
1241 /* delete from hash and active; remainder in destroy_class */
1242 qdisc_class_hash_remove(&q->clhash, &cl->common);
26b284de
JP
1243 if (cl->parent)
1244 cl->parent->children--;
c38c83cb 1245
1da177e4 1246 if (cl->prio_activity)
87990467 1247 htb_deactivate(q, cl);
1da177e4 1248
fbd8f137
PM
1249 if (cl->cmode != HTB_CAN_SEND)
1250 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1251
160d5e10 1252 if (last_child)
3ba08b00 1253 htb_parent_to_leaf(q, cl, new_q);
160d5e10 1254
1da177e4 1255 if (--cl->refcnt == 0)
87990467 1256 htb_destroy_class(sch, cl);
1da177e4
LT
1257
1258 sch_tree_unlock(sch);
1259 return 0;
1260}
1261
1262static void htb_put(struct Qdisc *sch, unsigned long arg)
1263{
87990467 1264 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1265
1266 if (--cl->refcnt == 0)
87990467 1267 htb_destroy_class(sch, cl);
1da177e4
LT
1268}
1269
87990467 1270static int htb_change_class(struct Qdisc *sch, u32 classid,
1e90474c 1271 u32 parentid, struct nlattr **tca,
87990467 1272 unsigned long *arg)
1da177e4
LT
1273{
1274 int err = -EINVAL;
1275 struct htb_sched *q = qdisc_priv(sch);
87990467 1276 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1e90474c 1277 struct nlattr *opt = tca[TCA_OPTIONS];
1da177e4 1278 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1e90474c 1279 struct nlattr *tb[TCA_HTB_RTAB + 1];
1da177e4
LT
1280 struct tc_htb_opt *hopt;
1281
1282 /* extract all subattrs from opt attr */
cee63723
PM
1283 if (!opt)
1284 goto failure;
1285
27a3421e 1286 err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, htb_policy);
cee63723
PM
1287 if (err < 0)
1288 goto failure;
1289
1290 err = -EINVAL;
27a3421e 1291 if (tb[TCA_HTB_PARMS] == NULL)
1da177e4 1292 goto failure;
1da177e4 1293
87990467
SH
1294 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1295
1e90474c 1296 hopt = nla_data(tb[TCA_HTB_PARMS]);
3bf72957 1297
1e90474c
PM
1298 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
1299 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
87990467
SH
1300 if (!rtab || !ctab)
1301 goto failure;
1da177e4 1302
87990467 1303 if (!cl) { /* new class */
1da177e4 1304 struct Qdisc *new_q;
3696f625 1305 int prio;
ee39e10c 1306 struct {
1e90474c 1307 struct nlattr nla;
ee39e10c
PM
1308 struct gnet_estimator opt;
1309 } est = {
1e90474c
PM
1310 .nla = {
1311 .nla_len = nla_attr_size(sizeof(est.opt)),
1312 .nla_type = TCA_RATE,
ee39e10c
PM
1313 },
1314 .opt = {
1315 /* 4s interval, 16s averaging constant */
1316 .interval = 2,
1317 .ewma_log = 2,
1318 },
1319 };
3696f625 1320
1da177e4 1321 /* check for valid classid */
87990467
SH
1322 if (!classid || TC_H_MAJ(classid ^ sch->handle)
1323 || htb_find(classid, sch))
1da177e4
LT
1324 goto failure;
1325
1326 /* check maximal depth */
1327 if (parent && parent->parent && parent->parent->level < 2) {
1328 printk(KERN_ERR "htb: tree is too deep\n");
1329 goto failure;
1330 }
1331 err = -ENOBUFS;
0da974f4 1332 if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
1da177e4 1333 goto failure;
87990467 1334
71bcb09a
SH
1335 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1336 qdisc_root_sleeping_lock(sch),
1337 tca[TCA_RATE] ? : &est.nla);
1338 if (err) {
1339 kfree(cl);
1340 goto failure;
1341 }
1342
1da177e4 1343 cl->refcnt = 1;
42077599 1344 cl->children = 0;
1da177e4 1345 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
3696f625
SH
1346 RB_CLEAR_NODE(&cl->pq_node);
1347
1348 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1349 RB_CLEAR_NODE(&cl->node[prio]);
1da177e4
LT
1350
1351 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1352 so that can't be used inside of sch_tree_lock
1353 -- thanks to Karlis Peisenieks */
5ce2d488 1354 new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd 1355 &pfifo_qdisc_ops, classid);
1da177e4
LT
1356 sch_tree_lock(sch);
1357 if (parent && !parent->level) {
256d61b8
PM
1358 unsigned int qlen = parent->un.leaf.q->q.qlen;
1359
1da177e4 1360 /* turn parent into inner node */
256d61b8
PM
1361 qdisc_reset(parent->un.leaf.q);
1362 qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
87990467
SH
1363 qdisc_destroy(parent->un.leaf.q);
1364 if (parent->prio_activity)
1365 htb_deactivate(q, parent);
1da177e4
LT
1366
1367 /* remove from evt list because of level change */
1368 if (parent->cmode != HTB_CAN_SEND) {
3696f625 1369 htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
1da177e4
LT
1370 parent->cmode = HTB_CAN_SEND;
1371 }
1372 parent->level = (parent->parent ? parent->parent->level
87990467
SH
1373 : TC_HTB_MAXDEPTH) - 1;
1374 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1da177e4
LT
1375 }
1376 /* leaf (we) needs elementary qdisc */
1377 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1378
f4c1f3e0 1379 cl->common.classid = classid;
87990467 1380 cl->parent = parent;
1da177e4
LT
1381
1382 /* set class to be in HTB_CAN_SEND state */
1383 cl->tokens = hopt->buffer;
1384 cl->ctokens = hopt->cbuffer;
00c04af9 1385 cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC; /* 1min */
3bebcda2 1386 cl->t_c = psched_get_time();
1da177e4
LT
1387 cl->cmode = HTB_CAN_SEND;
1388
1389 /* attach to the hash list and parent's family */
f4c1f3e0 1390 qdisc_class_hash_insert(&q->clhash, &cl->common);
42077599
PM
1391 if (parent)
1392 parent->children++;
ee39e10c 1393 } else {
71bcb09a
SH
1394 if (tca[TCA_RATE]) {
1395 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1396 qdisc_root_sleeping_lock(sch),
1397 tca[TCA_RATE]);
1398 if (err)
1399 return err;
1400 }
87990467 1401 sch_tree_lock(sch);
ee39e10c 1402 }
1da177e4
LT
1403
1404 /* it used to be a nasty bug here, we have to check that node
87990467 1405 is really leaf before changing cl->un.leaf ! */
1da177e4 1406 if (!cl->level) {
c19f7a34
JP
1407 cl->quantum = rtab->rate.rate / q->rate2quantum;
1408 if (!hopt->quantum && cl->quantum < 1000) {
87990467
SH
1409 printk(KERN_WARNING
1410 "HTB: quantum of class %X is small. Consider r2q change.\n",
f4c1f3e0 1411 cl->common.classid);
c19f7a34 1412 cl->quantum = 1000;
1da177e4 1413 }
c19f7a34 1414 if (!hopt->quantum && cl->quantum > 200000) {
87990467
SH
1415 printk(KERN_WARNING
1416 "HTB: quantum of class %X is big. Consider r2q change.\n",
f4c1f3e0 1417 cl->common.classid);
c19f7a34 1418 cl->quantum = 200000;
1da177e4
LT
1419 }
1420 if (hopt->quantum)
c19f7a34
JP
1421 cl->quantum = hopt->quantum;
1422 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1423 cl->prio = TC_HTB_NUMPRIO - 1;
1da177e4
LT
1424 }
1425
1426 cl->buffer = hopt->buffer;
1427 cl->cbuffer = hopt->cbuffer;
87990467
SH
1428 if (cl->rate)
1429 qdisc_put_rtab(cl->rate);
1430 cl->rate = rtab;
1431 if (cl->ceil)
1432 qdisc_put_rtab(cl->ceil);
1433 cl->ceil = ctab;
1da177e4
LT
1434 sch_tree_unlock(sch);
1435
f4c1f3e0
PM
1436 qdisc_class_hash_grow(sch, &q->clhash);
1437
1da177e4
LT
1438 *arg = (unsigned long)cl;
1439 return 0;
1440
1441failure:
87990467
SH
1442 if (rtab)
1443 qdisc_put_rtab(rtab);
1444 if (ctab)
1445 qdisc_put_rtab(ctab);
1da177e4
LT
1446 return err;
1447}
1448
1449static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1450{
1451 struct htb_sched *q = qdisc_priv(sch);
1452 struct htb_class *cl = (struct htb_class *)arg;
1453 struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
3bf72957 1454
1da177e4
LT
1455 return fl;
1456}
1457
1458static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
87990467 1459 u32 classid)
1da177e4 1460{
87990467 1461 struct htb_class *cl = htb_find(classid, sch);
3bf72957 1462
1da177e4 1463 /*if (cl && !cl->level) return 0;
87990467
SH
1464 The line above used to be there to prevent attaching filters to
1465 leaves. But at least tc_index filter uses this just to get class
1466 for other reasons so that we have to allow for it.
1467 ----
1468 19.6.2002 As Werner explained it is ok - bind filter is just
1469 another way to "lock" the class - unlike "get" this lock can
1470 be broken by class during destroy IIUC.
1da177e4 1471 */
87990467
SH
1472 if (cl)
1473 cl->filter_cnt++;
1da177e4
LT
1474 return (unsigned long)cl;
1475}
1476
1477static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1478{
1da177e4 1479 struct htb_class *cl = (struct htb_class *)arg;
3bf72957 1480
87990467
SH
1481 if (cl)
1482 cl->filter_cnt--;
1da177e4
LT
1483}
1484
1485static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1486{
1487 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0
PM
1488 struct htb_class *cl;
1489 struct hlist_node *n;
1490 unsigned int i;
1da177e4
LT
1491
1492 if (arg->stop)
1493 return;
1494
f4c1f3e0
PM
1495 for (i = 0; i < q->clhash.hashsize; i++) {
1496 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
1da177e4
LT
1497 if (arg->count < arg->skip) {
1498 arg->count++;
1499 continue;
1500 }
1501 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1502 arg->stop = 1;
1503 return;
1504 }
1505 arg->count++;
1506 }
1507 }
1508}
1509
20fea08b 1510static const struct Qdisc_class_ops htb_class_ops = {
1da177e4
LT
1511 .graft = htb_graft,
1512 .leaf = htb_leaf,
256d61b8 1513 .qlen_notify = htb_qlen_notify,
1da177e4
LT
1514 .get = htb_get,
1515 .put = htb_put,
1516 .change = htb_change_class,
1517 .delete = htb_delete,
1518 .walk = htb_walk,
1519 .tcf_chain = htb_find_tcf,
1520 .bind_tcf = htb_bind_filter,
1521 .unbind_tcf = htb_unbind_filter,
1522 .dump = htb_dump_class,
1523 .dump_stats = htb_dump_class_stats,
1524};
1525
20fea08b 1526static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1da177e4
LT
1527 .next = NULL,
1528 .cl_ops = &htb_class_ops,
1529 .id = "htb",
1530 .priv_size = sizeof(struct htb_sched),
1531 .enqueue = htb_enqueue,
1532 .dequeue = htb_dequeue,
77be155c 1533 .peek = qdisc_peek_dequeued,
1da177e4
LT
1534 .drop = htb_drop,
1535 .init = htb_init,
1536 .reset = htb_reset,
1537 .destroy = htb_destroy,
1538 .change = NULL /* htb_change */,
1539 .dump = htb_dump,
1540 .owner = THIS_MODULE,
1541};
1542
1543static int __init htb_module_init(void)
1544{
87990467 1545 return register_qdisc(&htb_qdisc_ops);
1da177e4 1546}
87990467 1547static void __exit htb_module_exit(void)
1da177e4 1548{
87990467 1549 unregister_qdisc(&htb_qdisc_ops);
1da177e4 1550}
87990467 1551
1da177e4
LT
1552module_init(htb_module_init)
1553module_exit(htb_module_exit)
1554MODULE_LICENSE("GPL");