]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - net/sched/sch_htb.c
net-rps: fixes for rps flow limit
[mirror_ubuntu-hirsute-kernel.git] / net / sched / sch_htb.c
CommitLineData
87990467 1/*
1da177e4
LT
2 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Martin Devera, <devik@cdi.cz>
10 *
11 * Credits (in time order) for older HTB versions:
12 * Stef Coene <stef.coene@docum.org>
13 * HTB support at LARTC mailing list
10297b99 14 * Ondrej Kraus, <krauso@barr.cz>
1da177e4
LT
15 * found missing INIT_QDISC(htb)
16 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
17 * helped a lot to locate nasty class stall bug
18 * Andi Kleen, Jamal Hadi, Bert Hubert
19 * code review and helpful comments on shaping
20 * Tomasz Wrona, <tw@eter.tym.pl>
21 * created test case so that I was able to fix nasty bug
22 * Wilfried Weissmann
23 * spotted bug in dequeue code and helped with fix
24 * Jiri Fojtasek
25 * fixed requeue routine
26 * and many others. thanks.
1da177e4 27 */
1da177e4 28#include <linux/module.h>
47083fc0 29#include <linux/moduleparam.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/kernel.h>
1da177e4 32#include <linux/string.h>
1da177e4 33#include <linux/errno.h>
1da177e4
LT
34#include <linux/skbuff.h>
35#include <linux/list.h>
36#include <linux/compiler.h>
0ba48053 37#include <linux/rbtree.h>
1224736d 38#include <linux/workqueue.h>
5a0e3ad6 39#include <linux/slab.h>
dc5fc579 40#include <net/netlink.h>
292f1c7f 41#include <net/sch_generic.h>
1da177e4 42#include <net/pkt_sched.h>
1da177e4
LT
43
44/* HTB algorithm.
45 Author: devik@cdi.cz
46 ========================================================================
47 HTB is like TBF with multiple classes. It is also similar to CBQ because
10297b99 48 it allows to assign priority to each class in hierarchy.
1da177e4
LT
49 In fact it is another implementation of Floyd's formal sharing.
50
51 Levels:
10297b99 52 Each class is assigned level. Leaf has ALWAYS level 0 and root
1da177e4
LT
53 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
54 one less than their parent.
55*/
56
47083fc0 57static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
87990467 58#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
1da177e4
LT
59
60#if HTB_VER >> 16 != TC_HTB_PROTOVER
61#error "Mismatched sch_htb.c and pkt_sch.h"
62#endif
63
47083fc0
JDB
64/* Module parameter and sysfs export */
65module_param (htb_hysteresis, int, 0640);
66MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
67
64153ce0
ED
68static int htb_rate_est = 0; /* htb classes have a default rate estimator */
69module_param(htb_rate_est, int, 0640);
70MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
71
1da177e4
LT
72/* used internaly to keep status of single class */
73enum htb_cmode {
87990467
SH
74 HTB_CANT_SEND, /* class can't send and can't borrow */
75 HTB_MAY_BORROW, /* class can't send but may borrow */
76 HTB_CAN_SEND /* class can send */
1da177e4
LT
77};
78
79/* interior & leaf nodes; props specific to leaves are marked L: */
87990467 80struct htb_class {
f4c1f3e0 81 struct Qdisc_class_common common;
87990467 82 /* general class parameters */
c1a8f1f1 83 struct gnet_stats_basic_packed bstats;
87990467 84 struct gnet_stats_queue qstats;
45203a3b 85 struct gnet_stats_rate_est64 rate_est;
87990467
SH
86 struct tc_htb_xstats xstats; /* our special stats */
87 int refcnt; /* usage count of this class */
1da177e4 88
87990467
SH
89 /* topology */
90 int level; /* our level (see above) */
42077599 91 unsigned int children;
87990467 92 struct htb_class *parent; /* parent class */
87990467 93
c19f7a34
JP
94 int prio; /* these two are used only by leaves... */
95 int quantum; /* but stored for parent-to-leaf return */
96
87990467
SH
97 union {
98 struct htb_class_leaf {
99 struct Qdisc *q;
87990467
SH
100 int deficit[TC_HTB_MAXDEPTH];
101 struct list_head drop_list;
102 } leaf;
103 struct htb_class_inner {
104 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
105 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
106 /* When class changes from state 1->2 and disconnects from
cc7ec456
ED
107 * parent's feed then we lost ptr value and start from the
108 * first child again. Here we store classid of the
109 * last valid ptr (used when ptr is NULL).
110 */
87990467
SH
111 u32 last_ptr_id[TC_HTB_NUMPRIO];
112 } inner;
113 } un;
114 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
115 struct rb_node pq_node; /* node for event queue */
5343a7f8 116 s64 pq_key;
87990467
SH
117
118 int prio_activity; /* for which prios are we active */
119 enum htb_cmode cmode; /* current mode of the class */
120
121 /* class attached filters */
122 struct tcf_proto *filter_list;
123 int filter_cnt;
124
87990467 125 /* token bucket parameters */
292f1c7f
JP
126 struct psched_ratecfg rate;
127 struct psched_ratecfg ceil;
5343a7f8
ED
128 s64 buffer, cbuffer; /* token bucket depth/rate */
129 s64 mbuffer; /* max wait time */
130 s64 tokens, ctokens; /* current number of tokens */
131 s64 t_c; /* checkpoint time */
1da177e4
LT
132};
133
87990467 134struct htb_sched {
f4c1f3e0 135 struct Qdisc_class_hash clhash;
0cef296d 136 struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
87990467
SH
137
138 /* self list - roots of self generating tree */
139 struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
140 int row_mask[TC_HTB_MAXDEPTH];
141 struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
142 u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
1da177e4 143
87990467
SH
144 /* self wait list - roots of wait PQs per row */
145 struct rb_root wait_pq[TC_HTB_MAXDEPTH];
1da177e4 146
87990467 147 /* time of nearest event per level (row) */
5343a7f8 148 s64 near_ev_cache[TC_HTB_MAXDEPTH];
1da177e4 149
87990467 150 int defcls; /* class where unclassified flows go to */
1da177e4 151
87990467
SH
152 /* filters for qdisc itself */
153 struct tcf_proto *filter_list;
1da177e4 154
5343a7f8
ED
155 int rate2quantum; /* quant = rate / rate2quantum */
156 s64 now; /* cached dequeue time */
fb983d45 157 struct qdisc_watchdog watchdog;
1da177e4 158
87990467
SH
159 /* non shaped skbs; let them go directly thru */
160 struct sk_buff_head direct_queue;
161 int direct_qlen; /* max qlen of above */
162
163 long direct_pkts;
e82181de
JP
164
165#define HTB_WARN_TOOMANYEVENTS 0x1
166 unsigned int warned; /* only one warning */
1224736d 167 struct work_struct work;
1da177e4
LT
168};
169
1da177e4 170/* find class in global hash table using given handle */
87990467 171static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
1da177e4
LT
172{
173 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 174 struct Qdisc_class_common *clc;
0cef296d 175
f4c1f3e0
PM
176 clc = qdisc_class_find(&q->clhash, handle);
177 if (clc == NULL)
1da177e4 178 return NULL;
f4c1f3e0 179 return container_of(clc, struct htb_class, common);
1da177e4
LT
180}
181
182/**
183 * htb_classify - classify a packet into class
184 *
185 * It returns NULL if the packet should be dropped or -1 if the packet
186 * should be passed directly thru. In all other cases leaf class is returned.
187 * We allow direct class selection by classid in priority. The we examine
188 * filters in qdisc and in inner nodes (if higher filter points to the inner
189 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
10297b99 190 * internal fifo (direct). These packets then go directly thru. If we still
25985edc 191 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
1da177e4
LT
192 * then finish and return direct queue.
193 */
cc7ec456 194#define HTB_DIRECT ((struct htb_class *)-1L)
1da177e4 195
87990467
SH
196static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
197 int *qerr)
1da177e4
LT
198{
199 struct htb_sched *q = qdisc_priv(sch);
200 struct htb_class *cl;
201 struct tcf_result res;
202 struct tcf_proto *tcf;
203 int result;
204
205 /* allow to select class by setting skb->priority to valid classid;
cc7ec456
ED
206 * note that nfmark can be used too by attaching filter fw with no
207 * rules in it
208 */
1da177e4 209 if (skb->priority == sch->handle)
87990467 210 return HTB_DIRECT; /* X:0 (direct flow) selected */
cc7ec456
ED
211 cl = htb_find(skb->priority, sch);
212 if (cl && cl->level == 0)
1da177e4
LT
213 return cl;
214
c27f339a 215 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1da177e4
LT
216 tcf = q->filter_list;
217 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
218#ifdef CONFIG_NET_CLS_ACT
219 switch (result) {
220 case TC_ACT_QUEUED:
87990467 221 case TC_ACT_STOLEN:
378a2f09 222 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
1da177e4
LT
223 case TC_ACT_SHOT:
224 return NULL;
225 }
1da177e4 226#endif
cc7ec456
ED
227 cl = (void *)res.class;
228 if (!cl) {
1da177e4 229 if (res.classid == sch->handle)
87990467 230 return HTB_DIRECT; /* X:0 (direct flow) */
cc7ec456
ED
231 cl = htb_find(res.classid, sch);
232 if (!cl)
87990467 233 break; /* filter selected invalid classid */
1da177e4
LT
234 }
235 if (!cl->level)
87990467 236 return cl; /* we hit leaf; return it */
1da177e4
LT
237
238 /* we have got inner class; apply inner filter chain */
239 tcf = cl->filter_list;
240 }
241 /* classification failed; try to use default class */
87990467 242 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
1da177e4 243 if (!cl || cl->level)
87990467 244 return HTB_DIRECT; /* bad default .. this is safe bet */
1da177e4
LT
245 return cl;
246}
247
1da177e4
LT
248/**
249 * htb_add_to_id_tree - adds class to the round robin list
250 *
251 * Routine adds class to the list (actually tree) sorted by classid.
252 * Make sure that class is not already on such list for given prio.
253 */
87990467
SH
254static void htb_add_to_id_tree(struct rb_root *root,
255 struct htb_class *cl, int prio)
1da177e4
LT
256{
257 struct rb_node **p = &root->rb_node, *parent = NULL;
3bf72957 258
1da177e4 259 while (*p) {
87990467
SH
260 struct htb_class *c;
261 parent = *p;
1da177e4 262 c = rb_entry(parent, struct htb_class, node[prio]);
3bf72957 263
f4c1f3e0 264 if (cl->common.classid > c->common.classid)
1da177e4 265 p = &parent->rb_right;
87990467 266 else
1da177e4
LT
267 p = &parent->rb_left;
268 }
269 rb_link_node(&cl->node[prio], parent, p);
270 rb_insert_color(&cl->node[prio], root);
271}
272
273/**
274 * htb_add_to_wait_tree - adds class to the event queue with delay
275 *
276 * The class is added to priority event queue to indicate that class will
277 * change its mode in cl->pq_key microseconds. Make sure that class is not
278 * already in the queue.
279 */
87990467 280static void htb_add_to_wait_tree(struct htb_sched *q,
56b765b7 281 struct htb_class *cl, s64 delay)
1da177e4
LT
282{
283 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
3bf72957 284
fb983d45
PM
285 cl->pq_key = q->now + delay;
286 if (cl->pq_key == q->now)
1da177e4
LT
287 cl->pq_key++;
288
289 /* update the nearest event cache */
fb983d45 290 if (q->near_ev_cache[cl->level] > cl->pq_key)
1da177e4 291 q->near_ev_cache[cl->level] = cl->pq_key;
87990467 292
1da177e4 293 while (*p) {
87990467
SH
294 struct htb_class *c;
295 parent = *p;
1da177e4 296 c = rb_entry(parent, struct htb_class, pq_node);
fb983d45 297 if (cl->pq_key >= c->pq_key)
1da177e4 298 p = &parent->rb_right;
87990467 299 else
1da177e4
LT
300 p = &parent->rb_left;
301 }
302 rb_link_node(&cl->pq_node, parent, p);
303 rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
304}
305
306/**
307 * htb_next_rb_node - finds next node in binary tree
308 *
309 * When we are past last key we return NULL.
310 * Average complexity is 2 steps per call.
311 */
3696f625 312static inline void htb_next_rb_node(struct rb_node **n)
1da177e4
LT
313{
314 *n = rb_next(*n);
315}
316
317/**
318 * htb_add_class_to_row - add class to its row
319 *
320 * The class is added to row at priorities marked in mask.
321 * It does nothing if mask == 0.
322 */
87990467
SH
323static inline void htb_add_class_to_row(struct htb_sched *q,
324 struct htb_class *cl, int mask)
1da177e4 325{
1da177e4
LT
326 q->row_mask[cl->level] |= mask;
327 while (mask) {
328 int prio = ffz(~mask);
329 mask &= ~(1 << prio);
87990467 330 htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
1da177e4
LT
331 }
332}
333
3696f625
SH
334/* If this triggers, it is a bug in this code, but it need not be fatal */
335static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
336{
81771b3b 337 if (RB_EMPTY_NODE(rb)) {
3696f625
SH
338 WARN_ON(1);
339 } else {
340 rb_erase(rb, root);
341 RB_CLEAR_NODE(rb);
342 }
343}
344
345
1da177e4
LT
346/**
347 * htb_remove_class_from_row - removes class from its row
348 *
349 * The class is removed from row at priorities marked in mask.
350 * It does nothing if mask == 0.
351 */
87990467
SH
352static inline void htb_remove_class_from_row(struct htb_sched *q,
353 struct htb_class *cl, int mask)
1da177e4
LT
354{
355 int m = 0;
3bf72957 356
1da177e4
LT
357 while (mask) {
358 int prio = ffz(~mask);
3696f625 359
1da177e4 360 mask &= ~(1 << prio);
87990467
SH
361 if (q->ptr[cl->level][prio] == cl->node + prio)
362 htb_next_rb_node(q->ptr[cl->level] + prio);
3696f625
SH
363
364 htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
87990467 365 if (!q->row[cl->level][prio].rb_node)
1da177e4
LT
366 m |= 1 << prio;
367 }
1da177e4
LT
368 q->row_mask[cl->level] &= ~m;
369}
370
371/**
372 * htb_activate_prios - creates active classe's feed chain
373 *
374 * The class is connected to ancestors and/or appropriate rows
10297b99 375 * for priorities it is participating on. cl->cmode must be new
1da177e4
LT
376 * (activated) mode. It does nothing if cl->prio_activity == 0.
377 */
87990467 378static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
1da177e4
LT
379{
380 struct htb_class *p = cl->parent;
87990467 381 long m, mask = cl->prio_activity;
1da177e4
LT
382
383 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
384 m = mask;
385 while (m) {
1da177e4
LT
386 int prio = ffz(~m);
387 m &= ~(1 << prio);
87990467 388
1da177e4
LT
389 if (p->un.inner.feed[prio].rb_node)
390 /* parent already has its feed in use so that
cc7ec456
ED
391 * reset bit in mask as parent is already ok
392 */
1da177e4 393 mask &= ~(1 << prio);
87990467
SH
394
395 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
1da177e4 396 }
1da177e4 397 p->prio_activity |= mask;
87990467
SH
398 cl = p;
399 p = cl->parent;
3bf72957 400
1da177e4
LT
401 }
402 if (cl->cmode == HTB_CAN_SEND && mask)
87990467 403 htb_add_class_to_row(q, cl, mask);
1da177e4
LT
404}
405
406/**
407 * htb_deactivate_prios - remove class from feed chain
408 *
10297b99 409 * cl->cmode must represent old mode (before deactivation). It does
1da177e4
LT
410 * nothing if cl->prio_activity == 0. Class is removed from all feed
411 * chains and rows.
412 */
413static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
414{
415 struct htb_class *p = cl->parent;
87990467 416 long m, mask = cl->prio_activity;
1da177e4
LT
417
418 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
419 m = mask;
420 mask = 0;
1da177e4
LT
421 while (m) {
422 int prio = ffz(~m);
423 m &= ~(1 << prio);
87990467
SH
424
425 if (p->un.inner.ptr[prio] == cl->node + prio) {
1da177e4 426 /* we are removing child which is pointed to from
cc7ec456
ED
427 * parent feed - forget the pointer but remember
428 * classid
429 */
f4c1f3e0 430 p->un.inner.last_ptr_id[prio] = cl->common.classid;
1da177e4
LT
431 p->un.inner.ptr[prio] = NULL;
432 }
87990467 433
3696f625 434 htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
87990467
SH
435
436 if (!p->un.inner.feed[prio].rb_node)
1da177e4
LT
437 mask |= 1 << prio;
438 }
3bf72957 439
1da177e4 440 p->prio_activity &= ~mask;
87990467
SH
441 cl = p;
442 p = cl->parent;
3bf72957 443
1da177e4 444 }
87990467
SH
445 if (cl->cmode == HTB_CAN_SEND && mask)
446 htb_remove_class_from_row(q, cl, mask);
1da177e4
LT
447}
448
56b765b7 449static inline s64 htb_lowater(const struct htb_class *cl)
18a63e86 450{
47083fc0
JDB
451 if (htb_hysteresis)
452 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
453 else
454 return 0;
18a63e86 455}
56b765b7 456static inline s64 htb_hiwater(const struct htb_class *cl)
18a63e86 457{
47083fc0
JDB
458 if (htb_hysteresis)
459 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
460 else
461 return 0;
18a63e86 462}
47083fc0 463
18a63e86 464
1da177e4
LT
465/**
466 * htb_class_mode - computes and returns current class mode
467 *
468 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
469 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
10297b99 470 * from now to time when cl will change its state.
1da177e4 471 * Also it is worth to note that class mode doesn't change simply
10297b99 472 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
1da177e4
LT
473 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
474 * mode transitions per time unit. The speed gain is about 1/6.
475 */
87990467 476static inline enum htb_cmode
56b765b7 477htb_class_mode(struct htb_class *cl, s64 *diff)
1da177e4 478{
56b765b7 479 s64 toks;
1da177e4 480
87990467
SH
481 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
482 *diff = -toks;
483 return HTB_CANT_SEND;
484 }
18a63e86 485
87990467
SH
486 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
487 return HTB_CAN_SEND;
1da177e4 488
87990467
SH
489 *diff = -toks;
490 return HTB_MAY_BORROW;
1da177e4
LT
491}
492
493/**
494 * htb_change_class_mode - changes classe's mode
495 *
496 * This should be the only way how to change classe's mode under normal
497 * cirsumstances. Routine will update feed lists linkage, change mode
498 * and add class to the wait event queue if appropriate. New mode should
499 * be different from old one and cl->pq_key has to be valid if changing
500 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
501 */
87990467 502static void
56b765b7 503htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
87990467
SH
504{
505 enum htb_cmode new_mode = htb_class_mode(cl, diff);
1da177e4
LT
506
507 if (new_mode == cl->cmode)
87990467
SH
508 return;
509
510 if (cl->prio_activity) { /* not necessary: speed optimization */
511 if (cl->cmode != HTB_CANT_SEND)
512 htb_deactivate_prios(q, cl);
1da177e4 513 cl->cmode = new_mode;
87990467
SH
514 if (new_mode != HTB_CANT_SEND)
515 htb_activate_prios(q, cl);
516 } else
1da177e4
LT
517 cl->cmode = new_mode;
518}
519
520/**
10297b99 521 * htb_activate - inserts leaf cl into appropriate active feeds
1da177e4
LT
522 *
523 * Routine learns (new) priority of leaf and activates feed chain
524 * for the prio. It can be called on already active leaf safely.
525 * It also adds leaf into droplist.
526 */
87990467 527static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
1da177e4 528{
547b792c 529 WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
3bf72957 530
1da177e4 531 if (!cl->prio_activity) {
c19f7a34 532 cl->prio_activity = 1 << cl->prio;
87990467
SH
533 htb_activate_prios(q, cl);
534 list_add_tail(&cl->un.leaf.drop_list,
c19f7a34 535 q->drops + cl->prio);
1da177e4
LT
536 }
537}
538
539/**
10297b99 540 * htb_deactivate - remove leaf cl from active feeds
1da177e4
LT
541 *
542 * Make sure that leaf is active. In the other words it can't be called
543 * with non-active leaf. It also removes class from the drop list.
544 */
87990467 545static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
1da177e4 546{
547b792c 547 WARN_ON(!cl->prio_activity);
3bf72957 548
87990467 549 htb_deactivate_prios(q, cl);
1da177e4
LT
550 cl->prio_activity = 0;
551 list_del_init(&cl->un.leaf.drop_list);
552}
553
554static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
555{
f30ab418 556 int uninitialized_var(ret);
87990467
SH
557 struct htb_sched *q = qdisc_priv(sch);
558 struct htb_class *cl = htb_classify(skb, sch, &ret);
559
560 if (cl == HTB_DIRECT) {
561 /* enqueue to helper queue */
562 if (q->direct_queue.qlen < q->direct_qlen) {
563 __skb_queue_tail(&q->direct_queue, skb);
564 q->direct_pkts++;
565 } else {
17045755 566 return qdisc_drop(skb, sch);
87990467 567 }
1da177e4 568#ifdef CONFIG_NET_CLS_ACT
87990467 569 } else if (!cl) {
c27f339a 570 if (ret & __NET_XMIT_BYPASS)
87990467
SH
571 sch->qstats.drops++;
572 kfree_skb(skb);
573 return ret;
1da177e4 574#endif
378a2f09
JP
575 } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
576 if (net_xmit_drop_count(ret)) {
577 sch->qstats.drops++;
578 cl->qstats.drops++;
579 }
69747650 580 return ret;
87990467 581 } else {
87990467
SH
582 htb_activate(q, cl);
583 }
584
585 sch->q.qlen++;
87990467 586 return NET_XMIT_SUCCESS;
1da177e4
LT
587}
588
56b765b7 589static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
59e4220a 590{
56b765b7 591 s64 toks = diff + cl->tokens;
59e4220a
JP
592
593 if (toks > cl->buffer)
594 toks = cl->buffer;
292f1c7f 595 toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
59e4220a
JP
596 if (toks <= -cl->mbuffer)
597 toks = 1 - cl->mbuffer;
598
599 cl->tokens = toks;
600}
601
56b765b7 602static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
59e4220a 603{
56b765b7 604 s64 toks = diff + cl->ctokens;
59e4220a
JP
605
606 if (toks > cl->cbuffer)
607 toks = cl->cbuffer;
292f1c7f 608 toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
59e4220a
JP
609 if (toks <= -cl->mbuffer)
610 toks = 1 - cl->mbuffer;
611
612 cl->ctokens = toks;
613}
614
1da177e4
LT
615/**
616 * htb_charge_class - charges amount "bytes" to leaf and ancestors
617 *
618 * Routine assumes that packet "bytes" long was dequeued from leaf cl
619 * borrowing from "level". It accounts bytes to ceil leaky bucket for
620 * leaf and all ancestors and to rate bucket for ancestors at levels
621 * "level" and higher. It also handles possible change of mode resulting
622 * from the update. Note that mode can also increase here (MAY_BORROW to
623 * CAN_SEND) because we can use more precise clock that event queue here.
624 * In such case we remove class from event queue first.
625 */
87990467 626static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
c9726d68 627 int level, struct sk_buff *skb)
87990467 628{
0abf77e5 629 int bytes = qdisc_pkt_len(skb);
1da177e4 630 enum htb_cmode old_mode;
56b765b7 631 s64 diff;
1da177e4
LT
632
633 while (cl) {
56b765b7 634 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
1da177e4 635 if (cl->level >= level) {
87990467
SH
636 if (cl->level == level)
637 cl->xstats.lends++;
59e4220a 638 htb_accnt_tokens(cl, bytes, diff);
1da177e4
LT
639 } else {
640 cl->xstats.borrows++;
87990467 641 cl->tokens += diff; /* we moved t_c; update tokens */
1da177e4 642 }
59e4220a 643 htb_accnt_ctokens(cl, bytes, diff);
1da177e4 644 cl->t_c = q->now;
1da177e4 645
87990467
SH
646 old_mode = cl->cmode;
647 diff = 0;
648 htb_change_class_mode(q, cl, &diff);
1da177e4
LT
649 if (old_mode != cl->cmode) {
650 if (old_mode != HTB_CAN_SEND)
3696f625 651 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1da177e4 652 if (cl->cmode != HTB_CAN_SEND)
87990467 653 htb_add_to_wait_tree(q, cl, diff);
1da177e4 654 }
1da177e4 655
bfe0d029
ED
656 /* update basic stats except for leaves which are already updated */
657 if (cl->level)
658 bstats_update(&cl->bstats, skb);
659
1da177e4
LT
660 cl = cl->parent;
661 }
662}
663
664/**
665 * htb_do_events - make mode changes to classes at the level
666 *
fb983d45 667 * Scans event queue for pending events and applies them. Returns time of
1224736d 668 * next pending event (0 for no event in pq, q->now for too many events).
fb983d45 669 * Note: Applied are events whose have cl->pq_key <= q->now.
1da177e4 670 */
5343a7f8
ED
671static s64 htb_do_events(struct htb_sched *q, int level,
672 unsigned long start)
1da177e4 673{
8f3ea33a 674 /* don't run for longer than 2 jiffies; 2 is used instead of
cc7ec456
ED
675 * 1 to simplify things when jiffy is going to be incremented
676 * too soon
677 */
a73be040 678 unsigned long stop_at = start + 2;
8f3ea33a 679 while (time_before(jiffies, stop_at)) {
1da177e4 680 struct htb_class *cl;
56b765b7 681 s64 diff;
30bdbe39
AM
682 struct rb_node *p = rb_first(&q->wait_pq[level]);
683
87990467
SH
684 if (!p)
685 return 0;
1da177e4
LT
686
687 cl = rb_entry(p, struct htb_class, pq_node);
fb983d45
PM
688 if (cl->pq_key > q->now)
689 return cl->pq_key;
690
3696f625 691 htb_safe_rb_erase(p, q->wait_pq + level);
56b765b7 692 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
87990467 693 htb_change_class_mode(q, cl, &diff);
1da177e4 694 if (cl->cmode != HTB_CAN_SEND)
87990467 695 htb_add_to_wait_tree(q, cl, diff);
1da177e4 696 }
1224736d
JP
697
698 /* too much load - let's continue after a break for scheduling */
e82181de 699 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
cc7ec456 700 pr_warning("htb: too many events!\n");
e82181de
JP
701 q->warned |= HTB_WARN_TOOMANYEVENTS;
702 }
1224736d
JP
703
704 return q->now;
1da177e4
LT
705}
706
707/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
cc7ec456
ED
708 * is no such one exists.
709 */
87990467
SH
710static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
711 u32 id)
1da177e4
LT
712{
713 struct rb_node *r = NULL;
714 while (n) {
87990467
SH
715 struct htb_class *cl =
716 rb_entry(n, struct htb_class, node[prio]);
87990467 717
f4c1f3e0 718 if (id > cl->common.classid) {
1da177e4 719 n = n->rb_right;
1b5c0077 720 } else if (id < cl->common.classid) {
1da177e4
LT
721 r = n;
722 n = n->rb_left;
1b5c0077
JP
723 } else {
724 return n;
1da177e4
LT
725 }
726 }
727 return r;
728}
729
730/**
731 * htb_lookup_leaf - returns next leaf class in DRR order
732 *
733 * Find leaf where current feed pointers points to.
734 */
87990467
SH
735static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
736 struct rb_node **pptr, u32 * pid)
1da177e4
LT
737{
738 int i;
739 struct {
740 struct rb_node *root;
741 struct rb_node **pptr;
742 u32 *pid;
87990467
SH
743 } stk[TC_HTB_MAXDEPTH], *sp = stk;
744
512bb43e 745 BUG_ON(!tree->rb_node);
1da177e4
LT
746 sp->root = tree->rb_node;
747 sp->pptr = pptr;
748 sp->pid = pid;
749
750 for (i = 0; i < 65535; i++) {
87990467 751 if (!*sp->pptr && *sp->pid) {
10297b99 752 /* ptr was invalidated but id is valid - try to recover
cc7ec456
ED
753 * the original or next ptr
754 */
87990467
SH
755 *sp->pptr =
756 htb_id_find_next_upper(prio, sp->root, *sp->pid);
1da177e4 757 }
87990467 758 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
cc7ec456
ED
759 * can become out of date quickly
760 */
87990467 761 if (!*sp->pptr) { /* we are at right end; rewind & go up */
1da177e4 762 *sp->pptr = sp->root;
87990467 763 while ((*sp->pptr)->rb_left)
1da177e4
LT
764 *sp->pptr = (*sp->pptr)->rb_left;
765 if (sp > stk) {
766 sp--;
512bb43e
JP
767 if (!*sp->pptr) {
768 WARN_ON(1);
87990467 769 return NULL;
512bb43e 770 }
87990467 771 htb_next_rb_node(sp->pptr);
1da177e4
LT
772 }
773 } else {
774 struct htb_class *cl;
87990467
SH
775 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
776 if (!cl->level)
1da177e4
LT
777 return cl;
778 (++sp)->root = cl->un.inner.feed[prio].rb_node;
87990467
SH
779 sp->pptr = cl->un.inner.ptr + prio;
780 sp->pid = cl->un.inner.last_ptr_id + prio;
1da177e4
LT
781 }
782 }
547b792c 783 WARN_ON(1);
1da177e4
LT
784 return NULL;
785}
786
787/* dequeues packet at given priority and level; call only if
cc7ec456
ED
788 * you are sure that there is active class at prio/level
789 */
87990467
SH
790static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
791 int level)
1da177e4
LT
792{
793 struct sk_buff *skb = NULL;
87990467 794 struct htb_class *cl, *start;
1da177e4 795 /* look initial class up in the row */
87990467
SH
796 start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
797 q->ptr[level] + prio,
798 q->last_ptr_id[level] + prio);
799
1da177e4
LT
800 do {
801next:
512bb43e 802 if (unlikely(!cl))
87990467 803 return NULL;
1da177e4
LT
804
805 /* class can be empty - it is unlikely but can be true if leaf
cc7ec456
ED
806 * qdisc drops packets in enqueue routine or if someone used
807 * graft operation on the leaf since last dequeue;
808 * simply deactivate and skip such class
809 */
1da177e4
LT
810 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
811 struct htb_class *next;
87990467 812 htb_deactivate(q, cl);
1da177e4
LT
813
814 /* row/level might become empty */
815 if ((q->row_mask[level] & (1 << prio)) == 0)
87990467 816 return NULL;
1da177e4 817
87990467
SH
818 next = htb_lookup_leaf(q->row[level] + prio,
819 prio, q->ptr[level] + prio,
820 q->last_ptr_id[level] + prio);
821
822 if (cl == start) /* fix start if we just deleted it */
1da177e4
LT
823 start = next;
824 cl = next;
825 goto next;
826 }
87990467
SH
827
828 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
829 if (likely(skb != NULL))
1da177e4 830 break;
633fe66e 831
b00355db 832 qdisc_warn_nonwc("htb", cl->un.leaf.q);
87990467
SH
833 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
834 ptr[0]) + prio);
835 cl = htb_lookup_leaf(q->row[level] + prio, prio,
836 q->ptr[level] + prio,
837 q->last_ptr_id[level] + prio);
1da177e4
LT
838
839 } while (cl != start);
840
841 if (likely(skb != NULL)) {
196d97f6 842 bstats_update(&cl->bstats, skb);
0abf77e5
JK
843 cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
844 if (cl->un.leaf.deficit[level] < 0) {
c19f7a34 845 cl->un.leaf.deficit[level] += cl->quantum;
87990467
SH
846 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
847 ptr[0]) + prio);
1da177e4
LT
848 }
849 /* this used to be after charge_class but this constelation
cc7ec456
ED
850 * gives us slightly better performance
851 */
1da177e4 852 if (!cl->un.leaf.q->q.qlen)
87990467 853 htb_deactivate(q, cl);
c9726d68 854 htb_charge_class(q, cl, level, skb);
1da177e4
LT
855 }
856 return skb;
857}
858
1da177e4
LT
859static struct sk_buff *htb_dequeue(struct Qdisc *sch)
860{
9190b3b3 861 struct sk_buff *skb;
1da177e4
LT
862 struct htb_sched *q = qdisc_priv(sch);
863 int level;
5343a7f8 864 s64 next_event;
a73be040 865 unsigned long start_at;
1da177e4
LT
866
867 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
87990467
SH
868 skb = __skb_dequeue(&q->direct_queue);
869 if (skb != NULL) {
9190b3b3
ED
870ok:
871 qdisc_bstats_update(sch, skb);
fd245a4a 872 qdisc_unthrottled(sch);
1da177e4
LT
873 sch->q.qlen--;
874 return skb;
875 }
876
87990467
SH
877 if (!sch->q.qlen)
878 goto fin;
56b765b7 879 q->now = ktime_to_ns(ktime_get());
a73be040 880 start_at = jiffies;
1da177e4 881
d2fe85da 882 next_event = q->now + 5LLU * NSEC_PER_SEC;
633fe66e 883
1da177e4
LT
884 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
885 /* common case optimization - skip event handler quickly */
886 int m;
5343a7f8 887 s64 event;
fb983d45
PM
888
889 if (q->now >= q->near_ev_cache[level]) {
a73be040 890 event = htb_do_events(q, level, start_at);
2e4b3b0e 891 if (!event)
56b765b7 892 event = q->now + NSEC_PER_SEC;
2e4b3b0e 893 q->near_ev_cache[level] = event;
1da177e4 894 } else
fb983d45
PM
895 event = q->near_ev_cache[level];
896
c0851347 897 if (next_event > event)
fb983d45 898 next_event = event;
87990467 899
1da177e4
LT
900 m = ~q->row_mask[level];
901 while (m != (int)(-1)) {
87990467 902 int prio = ffz(m);
cc7ec456 903
1da177e4 904 m |= 1 << prio;
87990467 905 skb = htb_dequeue_tree(q, prio, level);
9190b3b3
ED
906 if (likely(skb != NULL))
907 goto ok;
1da177e4
LT
908 }
909 }
fb983d45 910 sch->qstats.overlimits++;
56b765b7
V
911 if (likely(next_event > q->now)) {
912 if (!test_bit(__QDISC_STATE_DEACTIVATED,
913 &qdisc_root_sleeping(q->watchdog.qdisc)->state)) {
914 ktime_t time = ns_to_ktime(next_event);
915 qdisc_throttled(q->watchdog.qdisc);
916 hrtimer_start(&q->watchdog.timer, time,
917 HRTIMER_MODE_ABS);
918 }
919 } else {
1224736d 920 schedule_work(&q->work);
56b765b7 921 }
1da177e4 922fin:
1da177e4
LT
923 return skb;
924}
925
926/* try to drop from each class (by prio) until one succeed */
87990467 927static unsigned int htb_drop(struct Qdisc *sch)
1da177e4
LT
928{
929 struct htb_sched *q = qdisc_priv(sch);
930 int prio;
931
932 for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
933 struct list_head *p;
87990467 934 list_for_each(p, q->drops + prio) {
1da177e4
LT
935 struct htb_class *cl = list_entry(p, struct htb_class,
936 un.leaf.drop_list);
937 unsigned int len;
87990467
SH
938 if (cl->un.leaf.q->ops->drop &&
939 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
1da177e4
LT
940 sch->q.qlen--;
941 if (!cl->un.leaf.q->q.qlen)
87990467 942 htb_deactivate(q, cl);
1da177e4
LT
943 return len;
944 }
945 }
946 }
947 return 0;
948}
949
950/* reset all classes */
951/* always caled under BH & queue lock */
87990467 952static void htb_reset(struct Qdisc *sch)
1da177e4
LT
953{
954 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 955 struct htb_class *cl;
f4c1f3e0 956 unsigned int i;
0cef296d 957
f4c1f3e0 958 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 959 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1da177e4 960 if (cl->level)
87990467 961 memset(&cl->un.inner, 0, sizeof(cl->un.inner));
1da177e4 962 else {
87990467 963 if (cl->un.leaf.q)
1da177e4
LT
964 qdisc_reset(cl->un.leaf.q);
965 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
966 }
967 cl->prio_activity = 0;
968 cl->cmode = HTB_CAN_SEND;
1da177e4
LT
969
970 }
971 }
fb983d45 972 qdisc_watchdog_cancel(&q->watchdog);
1da177e4
LT
973 __skb_queue_purge(&q->direct_queue);
974 sch->q.qlen = 0;
87990467
SH
975 memset(q->row, 0, sizeof(q->row));
976 memset(q->row_mask, 0, sizeof(q->row_mask));
977 memset(q->wait_pq, 0, sizeof(q->wait_pq));
978 memset(q->ptr, 0, sizeof(q->ptr));
1da177e4 979 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 980 INIT_LIST_HEAD(q->drops + i);
1da177e4
LT
981}
982
27a3421e
PM
983static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
984 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
985 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
986 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
987 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
6906f4ed 988 [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
27a3421e
PM
989};
990
1224736d
JP
991static void htb_work_func(struct work_struct *work)
992{
993 struct htb_sched *q = container_of(work, struct htb_sched, work);
994 struct Qdisc *sch = q->watchdog.qdisc;
995
996 __netif_schedule(qdisc_root(sch));
997}
998
1e90474c 999static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
1000{
1001 struct htb_sched *q = qdisc_priv(sch);
6906f4ed 1002 struct nlattr *tb[TCA_HTB_MAX + 1];
1da177e4 1003 struct tc_htb_glob *gopt;
cee63723 1004 int err;
1da177e4 1005 int i;
cee63723
PM
1006
1007 if (!opt)
1008 return -EINVAL;
1009
6906f4ed 1010 err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
cee63723
PM
1011 if (err < 0)
1012 return err;
1013
6906f4ed 1014 if (!tb[TCA_HTB_INIT])
1da177e4 1015 return -EINVAL;
6906f4ed 1016
1e90474c 1017 gopt = nla_data(tb[TCA_HTB_INIT]);
6906f4ed 1018 if (gopt->version != HTB_VER >> 16)
1da177e4 1019 return -EINVAL;
1da177e4 1020
f4c1f3e0
PM
1021 err = qdisc_class_hash_init(&q->clhash);
1022 if (err < 0)
1023 return err;
1da177e4 1024 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 1025 INIT_LIST_HEAD(q->drops + i);
1da177e4 1026
fb983d45 1027 qdisc_watchdog_init(&q->watchdog, sch);
1224736d 1028 INIT_WORK(&q->work, htb_work_func);
1da177e4
LT
1029 skb_queue_head_init(&q->direct_queue);
1030
6906f4ed
ED
1031 if (tb[TCA_HTB_DIRECT_QLEN])
1032 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1033 else {
1034 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1035 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1036 q->direct_qlen = 2;
1037 }
1da177e4
LT
1038 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1039 q->rate2quantum = 1;
1040 q->defcls = gopt->defcls;
1041
1042 return 0;
1043}
1044
1045static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1046{
102396ae 1047 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1da177e4 1048 struct htb_sched *q = qdisc_priv(sch);
4b3550ef 1049 struct nlattr *nest;
1da177e4 1050 struct tc_htb_glob gopt;
4b3550ef 1051
7698b4fc 1052 spin_lock_bh(root_lock);
1da177e4 1053
4b3550ef 1054 gopt.direct_pkts = q->direct_pkts;
1da177e4
LT
1055 gopt.version = HTB_VER;
1056 gopt.rate2quantum = q->rate2quantum;
1057 gopt.defcls = q->defcls;
3bf72957 1058 gopt.debug = 0;
4b3550ef
PM
1059
1060 nest = nla_nest_start(skb, TCA_OPTIONS);
1061 if (nest == NULL)
1062 goto nla_put_failure;
6906f4ed
ED
1063 if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1064 nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1b34ec43 1065 goto nla_put_failure;
4b3550ef
PM
1066 nla_nest_end(skb, nest);
1067
7698b4fc 1068 spin_unlock_bh(root_lock);
1da177e4 1069 return skb->len;
4b3550ef 1070
1e90474c 1071nla_put_failure:
7698b4fc 1072 spin_unlock_bh(root_lock);
4b3550ef 1073 nla_nest_cancel(skb, nest);
1da177e4
LT
1074 return -1;
1075}
1076
1077static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
87990467 1078 struct sk_buff *skb, struct tcmsg *tcm)
1da177e4 1079{
87990467 1080 struct htb_class *cl = (struct htb_class *)arg;
102396ae 1081 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
4b3550ef 1082 struct nlattr *nest;
1da177e4
LT
1083 struct tc_htb_opt opt;
1084
7698b4fc 1085 spin_lock_bh(root_lock);
f4c1f3e0
PM
1086 tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1087 tcm->tcm_handle = cl->common.classid;
1da177e4
LT
1088 if (!cl->level && cl->un.leaf.q)
1089 tcm->tcm_info = cl->un.leaf.q->handle;
1090
4b3550ef
PM
1091 nest = nla_nest_start(skb, TCA_OPTIONS);
1092 if (nest == NULL)
1093 goto nla_put_failure;
1da177e4 1094
87990467 1095 memset(&opt, 0, sizeof(opt));
1da177e4 1096
01cb71d2 1097 psched_ratecfg_getrate(&opt.rate, &cl->rate);
9c10f411 1098 opt.buffer = PSCHED_NS2TICKS(cl->buffer);
01cb71d2 1099 psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
9c10f411 1100 opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
c19f7a34
JP
1101 opt.quantum = cl->quantum;
1102 opt.prio = cl->prio;
87990467 1103 opt.level = cl->level;
1b34ec43
DM
1104 if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1105 goto nla_put_failure;
4b3550ef
PM
1106
1107 nla_nest_end(skb, nest);
7698b4fc 1108 spin_unlock_bh(root_lock);
1da177e4 1109 return skb->len;
4b3550ef 1110
1e90474c 1111nla_put_failure:
7698b4fc 1112 spin_unlock_bh(root_lock);
4b3550ef 1113 nla_nest_cancel(skb, nest);
1da177e4
LT
1114 return -1;
1115}
1116
1117static int
87990467 1118htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1da177e4 1119{
87990467 1120 struct htb_class *cl = (struct htb_class *)arg;
1da177e4 1121
1da177e4
LT
1122 if (!cl->level && cl->un.leaf.q)
1123 cl->qstats.qlen = cl->un.leaf.q->q.qlen;
5343a7f8
ED
1124 cl->xstats.tokens = PSCHED_NS2TICKS(cl->tokens);
1125 cl->xstats.ctokens = PSCHED_NS2TICKS(cl->ctokens);
1da177e4
LT
1126
1127 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
d250a5f9 1128 gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
1da177e4
LT
1129 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1130 return -1;
1131
1132 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1133}
1134
1135static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
87990467 1136 struct Qdisc **old)
1da177e4 1137{
87990467 1138 struct htb_class *cl = (struct htb_class *)arg;
1da177e4 1139
5b9a9ccf
PM
1140 if (cl->level)
1141 return -EINVAL;
1142 if (new == NULL &&
3511c913 1143 (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
5b9a9ccf
PM
1144 cl->common.classid)) == NULL)
1145 return -ENOBUFS;
1146
1147 sch_tree_lock(sch);
1148 *old = cl->un.leaf.q;
1149 cl->un.leaf.q = new;
1150 if (*old != NULL) {
1151 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1152 qdisc_reset(*old);
1da177e4 1153 }
5b9a9ccf
PM
1154 sch_tree_unlock(sch);
1155 return 0;
1da177e4
LT
1156}
1157
87990467 1158static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1da177e4 1159{
87990467 1160 struct htb_class *cl = (struct htb_class *)arg;
5b9a9ccf 1161 return !cl->level ? cl->un.leaf.q : NULL;
1da177e4
LT
1162}
1163
256d61b8
PM
1164static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1165{
1166 struct htb_class *cl = (struct htb_class *)arg;
1167
1168 if (cl->un.leaf.q->q.qlen == 0)
1169 htb_deactivate(qdisc_priv(sch), cl);
1170}
1171
1da177e4
LT
1172static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1173{
87990467
SH
1174 struct htb_class *cl = htb_find(classid, sch);
1175 if (cl)
1da177e4
LT
1176 cl->refcnt++;
1177 return (unsigned long)cl;
1178}
1179
160d5e10
JP
1180static inline int htb_parent_last_child(struct htb_class *cl)
1181{
1182 if (!cl->parent)
1183 /* the root class */
1184 return 0;
42077599 1185 if (cl->parent->children > 1)
160d5e10
JP
1186 /* not the last child */
1187 return 0;
160d5e10
JP
1188 return 1;
1189}
1190
3ba08b00
JP
1191static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1192 struct Qdisc *new_q)
160d5e10
JP
1193{
1194 struct htb_class *parent = cl->parent;
1195
547b792c 1196 WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
160d5e10 1197
3ba08b00
JP
1198 if (parent->cmode != HTB_CAN_SEND)
1199 htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
1200
160d5e10
JP
1201 parent->level = 0;
1202 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1203 INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1204 parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
160d5e10
JP
1205 parent->tokens = parent->buffer;
1206 parent->ctokens = parent->cbuffer;
5343a7f8 1207 parent->t_c = ktime_to_ns(ktime_get());
160d5e10
JP
1208 parent->cmode = HTB_CAN_SEND;
1209}
1210
87990467 1211static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1da177e4 1212{
1da177e4 1213 if (!cl->level) {
547b792c 1214 WARN_ON(!cl->un.leaf.q);
1da177e4
LT
1215 qdisc_destroy(cl->un.leaf.q);
1216 }
ee39e10c 1217 gen_kill_estimator(&cl->bstats, &cl->rate_est);
ff31ab56 1218 tcf_destroy_chain(&cl->filter_list);
1da177e4
LT
1219 kfree(cl);
1220}
1221
87990467 1222static void htb_destroy(struct Qdisc *sch)
1da177e4
LT
1223{
1224 struct htb_sched *q = qdisc_priv(sch);
b67bfe0d 1225 struct hlist_node *next;
fbd8f137
PM
1226 struct htb_class *cl;
1227 unsigned int i;
1da177e4 1228
1224736d 1229 cancel_work_sync(&q->work);
fb983d45 1230 qdisc_watchdog_cancel(&q->watchdog);
1da177e4 1231 /* This line used to be after htb_destroy_class call below
cc7ec456
ED
1232 * and surprisingly it worked in 2.4. But it must precede it
1233 * because filter need its target class alive to be able to call
1234 * unbind_filter on it (without Oops).
1235 */
ff31ab56 1236 tcf_destroy_chain(&q->filter_list);
87990467 1237
f4c1f3e0 1238 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 1239 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode)
fbd8f137
PM
1240 tcf_destroy_chain(&cl->filter_list);
1241 }
f4c1f3e0 1242 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 1243 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
f4c1f3e0 1244 common.hnode)
fbd8f137
PM
1245 htb_destroy_class(sch, cl);
1246 }
f4c1f3e0 1247 qdisc_class_hash_destroy(&q->clhash);
1da177e4
LT
1248 __skb_queue_purge(&q->direct_queue);
1249}
1250
1251static int htb_delete(struct Qdisc *sch, unsigned long arg)
1252{
1253 struct htb_sched *q = qdisc_priv(sch);
87990467 1254 struct htb_class *cl = (struct htb_class *)arg;
256d61b8 1255 unsigned int qlen;
160d5e10
JP
1256 struct Qdisc *new_q = NULL;
1257 int last_child = 0;
1da177e4
LT
1258
1259 // TODO: why don't allow to delete subtree ? references ? does
1260 // tc subsys quarantee us that in htb_destroy it holds no class
1261 // refs so that we can remove children safely there ?
42077599 1262 if (cl->children || cl->filter_cnt)
1da177e4 1263 return -EBUSY;
87990467 1264
160d5e10 1265 if (!cl->level && htb_parent_last_child(cl)) {
3511c913 1266 new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
bb949fbd 1267 cl->parent->common.classid);
160d5e10
JP
1268 last_child = 1;
1269 }
1270
1da177e4 1271 sch_tree_lock(sch);
87990467 1272
814a175e 1273 if (!cl->level) {
256d61b8 1274 qlen = cl->un.leaf.q->q.qlen;
814a175e 1275 qdisc_reset(cl->un.leaf.q);
256d61b8 1276 qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
814a175e
PM
1277 }
1278
f4c1f3e0
PM
1279 /* delete from hash and active; remainder in destroy_class */
1280 qdisc_class_hash_remove(&q->clhash, &cl->common);
26b284de
JP
1281 if (cl->parent)
1282 cl->parent->children--;
c38c83cb 1283
1da177e4 1284 if (cl->prio_activity)
87990467 1285 htb_deactivate(q, cl);
1da177e4 1286
fbd8f137
PM
1287 if (cl->cmode != HTB_CAN_SEND)
1288 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1289
160d5e10 1290 if (last_child)
3ba08b00 1291 htb_parent_to_leaf(q, cl, new_q);
160d5e10 1292
7cd0a638
JP
1293 BUG_ON(--cl->refcnt == 0);
1294 /*
1295 * This shouldn't happen: we "hold" one cops->get() when called
1296 * from tc_ctl_tclass; the destroy method is done from cops->put().
1297 */
1da177e4
LT
1298
1299 sch_tree_unlock(sch);
1300 return 0;
1301}
1302
1303static void htb_put(struct Qdisc *sch, unsigned long arg)
1304{
87990467 1305 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1306
1307 if (--cl->refcnt == 0)
87990467 1308 htb_destroy_class(sch, cl);
1da177e4
LT
1309}
1310
87990467 1311static int htb_change_class(struct Qdisc *sch, u32 classid,
1e90474c 1312 u32 parentid, struct nlattr **tca,
87990467 1313 unsigned long *arg)
1da177e4
LT
1314{
1315 int err = -EINVAL;
1316 struct htb_sched *q = qdisc_priv(sch);
87990467 1317 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1e90474c 1318 struct nlattr *opt = tca[TCA_OPTIONS];
6906f4ed 1319 struct nlattr *tb[TCA_HTB_MAX + 1];
1da177e4
LT
1320 struct tc_htb_opt *hopt;
1321
1322 /* extract all subattrs from opt attr */
cee63723
PM
1323 if (!opt)
1324 goto failure;
1325
e18434c4 1326 err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
cee63723
PM
1327 if (err < 0)
1328 goto failure;
1329
1330 err = -EINVAL;
27a3421e 1331 if (tb[TCA_HTB_PARMS] == NULL)
1da177e4 1332 goto failure;
1da177e4 1333
87990467
SH
1334 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1335
1e90474c 1336 hopt = nla_data(tb[TCA_HTB_PARMS]);
196d97f6 1337 if (!hopt->rate.rate || !hopt->ceil.rate)
87990467 1338 goto failure;
1da177e4 1339
87990467 1340 if (!cl) { /* new class */
1da177e4 1341 struct Qdisc *new_q;
3696f625 1342 int prio;
ee39e10c 1343 struct {
1e90474c 1344 struct nlattr nla;
ee39e10c
PM
1345 struct gnet_estimator opt;
1346 } est = {
1e90474c
PM
1347 .nla = {
1348 .nla_len = nla_attr_size(sizeof(est.opt)),
1349 .nla_type = TCA_RATE,
ee39e10c
PM
1350 },
1351 .opt = {
1352 /* 4s interval, 16s averaging constant */
1353 .interval = 2,
1354 .ewma_log = 2,
1355 },
1356 };
3696f625 1357
1da177e4 1358 /* check for valid classid */
f64f9e71
JP
1359 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1360 htb_find(classid, sch))
1da177e4
LT
1361 goto failure;
1362
1363 /* check maximal depth */
1364 if (parent && parent->parent && parent->parent->level < 2) {
cc7ec456 1365 pr_err("htb: tree is too deep\n");
1da177e4
LT
1366 goto failure;
1367 }
1368 err = -ENOBUFS;
cc7ec456
ED
1369 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1370 if (!cl)
1da177e4 1371 goto failure;
87990467 1372
64153ce0
ED
1373 if (htb_rate_est || tca[TCA_RATE]) {
1374 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1375 qdisc_root_sleeping_lock(sch),
1376 tca[TCA_RATE] ? : &est.nla);
1377 if (err) {
1378 kfree(cl);
1379 goto failure;
1380 }
71bcb09a
SH
1381 }
1382
1da177e4 1383 cl->refcnt = 1;
42077599 1384 cl->children = 0;
1da177e4 1385 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
3696f625
SH
1386 RB_CLEAR_NODE(&cl->pq_node);
1387
1388 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1389 RB_CLEAR_NODE(&cl->node[prio]);
1da177e4
LT
1390
1391 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
cc7ec456
ED
1392 * so that can't be used inside of sch_tree_lock
1393 * -- thanks to Karlis Peisenieks
1394 */
3511c913 1395 new_q = qdisc_create_dflt(sch->dev_queue,
bb949fbd 1396 &pfifo_qdisc_ops, classid);
1da177e4
LT
1397 sch_tree_lock(sch);
1398 if (parent && !parent->level) {
256d61b8
PM
1399 unsigned int qlen = parent->un.leaf.q->q.qlen;
1400
1da177e4 1401 /* turn parent into inner node */
256d61b8
PM
1402 qdisc_reset(parent->un.leaf.q);
1403 qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
87990467
SH
1404 qdisc_destroy(parent->un.leaf.q);
1405 if (parent->prio_activity)
1406 htb_deactivate(q, parent);
1da177e4
LT
1407
1408 /* remove from evt list because of level change */
1409 if (parent->cmode != HTB_CAN_SEND) {
3696f625 1410 htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
1da177e4
LT
1411 parent->cmode = HTB_CAN_SEND;
1412 }
1413 parent->level = (parent->parent ? parent->parent->level
87990467
SH
1414 : TC_HTB_MAXDEPTH) - 1;
1415 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1da177e4
LT
1416 }
1417 /* leaf (we) needs elementary qdisc */
1418 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1419
f4c1f3e0 1420 cl->common.classid = classid;
87990467 1421 cl->parent = parent;
1da177e4
LT
1422
1423 /* set class to be in HTB_CAN_SEND state */
b9a7afde
JP
1424 cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1425 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
5343a7f8
ED
1426 cl->mbuffer = 60ULL * NSEC_PER_SEC; /* 1min */
1427 cl->t_c = ktime_to_ns(ktime_get());
1da177e4
LT
1428 cl->cmode = HTB_CAN_SEND;
1429
1430 /* attach to the hash list and parent's family */
f4c1f3e0 1431 qdisc_class_hash_insert(&q->clhash, &cl->common);
42077599
PM
1432 if (parent)
1433 parent->children++;
ee39e10c 1434 } else {
71bcb09a
SH
1435 if (tca[TCA_RATE]) {
1436 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1437 qdisc_root_sleeping_lock(sch),
1438 tca[TCA_RATE]);
1439 if (err)
1440 return err;
1441 }
87990467 1442 sch_tree_lock(sch);
ee39e10c 1443 }
1da177e4
LT
1444
1445 /* it used to be a nasty bug here, we have to check that node
cc7ec456
ED
1446 * is really leaf before changing cl->un.leaf !
1447 */
1da177e4 1448 if (!cl->level) {
196d97f6 1449 cl->quantum = hopt->rate.rate / q->rate2quantum;
c19f7a34 1450 if (!hopt->quantum && cl->quantum < 1000) {
cc7ec456 1451 pr_warning(
87990467 1452 "HTB: quantum of class %X is small. Consider r2q change.\n",
f4c1f3e0 1453 cl->common.classid);
c19f7a34 1454 cl->quantum = 1000;
1da177e4 1455 }
c19f7a34 1456 if (!hopt->quantum && cl->quantum > 200000) {
cc7ec456 1457 pr_warning(
87990467 1458 "HTB: quantum of class %X is big. Consider r2q change.\n",
f4c1f3e0 1459 cl->common.classid);
c19f7a34 1460 cl->quantum = 200000;
1da177e4
LT
1461 }
1462 if (hopt->quantum)
c19f7a34
JP
1463 cl->quantum = hopt->quantum;
1464 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1465 cl->prio = TC_HTB_NUMPRIO - 1;
1da177e4
LT
1466 }
1467
01cb71d2
ED
1468 psched_ratecfg_precompute(&cl->rate, &hopt->rate);
1469 psched_ratecfg_precompute(&cl->ceil, &hopt->ceil);
56b765b7 1470
324f5aa5
JP
1471 cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
1472 cl->cbuffer = PSCHED_TICKS2NS(hopt->buffer);
56b765b7 1473
1da177e4
LT
1474 sch_tree_unlock(sch);
1475
f4c1f3e0
PM
1476 qdisc_class_hash_grow(sch, &q->clhash);
1477
1da177e4
LT
1478 *arg = (unsigned long)cl;
1479 return 0;
1480
1481failure:
1da177e4
LT
1482 return err;
1483}
1484
1485static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1486{
1487 struct htb_sched *q = qdisc_priv(sch);
1488 struct htb_class *cl = (struct htb_class *)arg;
1489 struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
3bf72957 1490
1da177e4
LT
1491 return fl;
1492}
1493
1494static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
87990467 1495 u32 classid)
1da177e4 1496{
87990467 1497 struct htb_class *cl = htb_find(classid, sch);
3bf72957 1498
1da177e4 1499 /*if (cl && !cl->level) return 0;
cc7ec456
ED
1500 * The line above used to be there to prevent attaching filters to
1501 * leaves. But at least tc_index filter uses this just to get class
1502 * for other reasons so that we have to allow for it.
1503 * ----
1504 * 19.6.2002 As Werner explained it is ok - bind filter is just
1505 * another way to "lock" the class - unlike "get" this lock can
1506 * be broken by class during destroy IIUC.
1da177e4 1507 */
87990467
SH
1508 if (cl)
1509 cl->filter_cnt++;
1da177e4
LT
1510 return (unsigned long)cl;
1511}
1512
1513static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1514{
1da177e4 1515 struct htb_class *cl = (struct htb_class *)arg;
3bf72957 1516
87990467
SH
1517 if (cl)
1518 cl->filter_cnt--;
1da177e4
LT
1519}
1520
1521static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1522{
1523 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 1524 struct htb_class *cl;
f4c1f3e0 1525 unsigned int i;
1da177e4
LT
1526
1527 if (arg->stop)
1528 return;
1529
f4c1f3e0 1530 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 1531 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1da177e4
LT
1532 if (arg->count < arg->skip) {
1533 arg->count++;
1534 continue;
1535 }
1536 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1537 arg->stop = 1;
1538 return;
1539 }
1540 arg->count++;
1541 }
1542 }
1543}
1544
20fea08b 1545static const struct Qdisc_class_ops htb_class_ops = {
1da177e4
LT
1546 .graft = htb_graft,
1547 .leaf = htb_leaf,
256d61b8 1548 .qlen_notify = htb_qlen_notify,
1da177e4
LT
1549 .get = htb_get,
1550 .put = htb_put,
1551 .change = htb_change_class,
1552 .delete = htb_delete,
1553 .walk = htb_walk,
1554 .tcf_chain = htb_find_tcf,
1555 .bind_tcf = htb_bind_filter,
1556 .unbind_tcf = htb_unbind_filter,
1557 .dump = htb_dump_class,
1558 .dump_stats = htb_dump_class_stats,
1559};
1560
20fea08b 1561static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1da177e4
LT
1562 .cl_ops = &htb_class_ops,
1563 .id = "htb",
1564 .priv_size = sizeof(struct htb_sched),
1565 .enqueue = htb_enqueue,
1566 .dequeue = htb_dequeue,
77be155c 1567 .peek = qdisc_peek_dequeued,
1da177e4
LT
1568 .drop = htb_drop,
1569 .init = htb_init,
1570 .reset = htb_reset,
1571 .destroy = htb_destroy,
1da177e4
LT
1572 .dump = htb_dump,
1573 .owner = THIS_MODULE,
1574};
1575
1576static int __init htb_module_init(void)
1577{
87990467 1578 return register_qdisc(&htb_qdisc_ops);
1da177e4 1579}
87990467 1580static void __exit htb_module_exit(void)
1da177e4 1581{
87990467 1582 unregister_qdisc(&htb_qdisc_ops);
1da177e4 1583}
87990467 1584
1da177e4
LT
1585module_init(htb_module_init)
1586module_exit(htb_module_exit)
1587MODULE_LICENSE("GPL");