]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/sched/sch_htb.c
tcp: introduce a per-route knob for quick ack
[mirror_ubuntu-bionic-kernel.git] / net / sched / sch_htb.c
CommitLineData
87990467 1/*
1da177e4
LT
2 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Martin Devera, <devik@cdi.cz>
10 *
11 * Credits (in time order) for older HTB versions:
12 * Stef Coene <stef.coene@docum.org>
13 * HTB support at LARTC mailing list
10297b99 14 * Ondrej Kraus, <krauso@barr.cz>
1da177e4
LT
15 * found missing INIT_QDISC(htb)
16 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
17 * helped a lot to locate nasty class stall bug
18 * Andi Kleen, Jamal Hadi, Bert Hubert
19 * code review and helpful comments on shaping
20 * Tomasz Wrona, <tw@eter.tym.pl>
21 * created test case so that I was able to fix nasty bug
22 * Wilfried Weissmann
23 * spotted bug in dequeue code and helped with fix
24 * Jiri Fojtasek
25 * fixed requeue routine
26 * and many others. thanks.
1da177e4 27 */
1da177e4 28#include <linux/module.h>
47083fc0 29#include <linux/moduleparam.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/kernel.h>
1da177e4 32#include <linux/string.h>
1da177e4 33#include <linux/errno.h>
1da177e4
LT
34#include <linux/skbuff.h>
35#include <linux/list.h>
36#include <linux/compiler.h>
0ba48053 37#include <linux/rbtree.h>
1224736d 38#include <linux/workqueue.h>
5a0e3ad6 39#include <linux/slab.h>
dc5fc579 40#include <net/netlink.h>
292f1c7f 41#include <net/sch_generic.h>
1da177e4 42#include <net/pkt_sched.h>
1da177e4
LT
43
44/* HTB algorithm.
45 Author: devik@cdi.cz
46 ========================================================================
47 HTB is like TBF with multiple classes. It is also similar to CBQ because
10297b99 48 it allows to assign priority to each class in hierarchy.
1da177e4
LT
49 In fact it is another implementation of Floyd's formal sharing.
50
51 Levels:
10297b99 52 Each class is assigned level. Leaf has ALWAYS level 0 and root
1da177e4
LT
53 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
54 one less than their parent.
55*/
56
47083fc0 57static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
87990467 58#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
1da177e4
LT
59
60#if HTB_VER >> 16 != TC_HTB_PROTOVER
61#error "Mismatched sch_htb.c and pkt_sch.h"
62#endif
63
47083fc0
JDB
64/* Module parameter and sysfs export */
65module_param (htb_hysteresis, int, 0640);
66MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
67
64153ce0
ED
68static int htb_rate_est = 0; /* htb classes have a default rate estimator */
69module_param(htb_rate_est, int, 0640);
70MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
71
1da177e4
LT
72/* used internaly to keep status of single class */
73enum htb_cmode {
87990467
SH
74 HTB_CANT_SEND, /* class can't send and can't borrow */
75 HTB_MAY_BORROW, /* class can't send but may borrow */
76 HTB_CAN_SEND /* class can send */
1da177e4
LT
77};
78
ca4ec90b
ED
79/* interior & leaf nodes; props specific to leaves are marked L:
80 * To reduce false sharing, place mostly read fields at beginning,
81 * and mostly written ones at the end.
82 */
87990467 83struct htb_class {
f4c1f3e0 84 struct Qdisc_class_common common;
ca4ec90b
ED
85 struct psched_ratecfg rate;
86 struct psched_ratecfg ceil;
87 s64 buffer, cbuffer;/* token bucket depth/rate */
88 s64 mbuffer; /* max wait time */
89 int prio; /* these two are used only by leaves... */
90 int quantum; /* but stored for parent-to-leaf return */
91
92 struct tcf_proto *filter_list; /* class attached filters */
93 int filter_cnt;
94 int refcnt; /* usage count of this class */
95
96 int level; /* our level (see above) */
97 unsigned int children;
98 struct htb_class *parent; /* parent class */
99
45203a3b 100 struct gnet_stats_rate_est64 rate_est;
1da177e4 101
ca4ec90b
ED
102 /*
103 * Written often fields
104 */
105 struct gnet_stats_basic_packed bstats;
106 struct gnet_stats_queue qstats;
107 struct tc_htb_xstats xstats; /* our special stats */
87990467 108
ca4ec90b
ED
109 /* token bucket parameters */
110 s64 tokens, ctokens;/* current number of tokens */
111 s64 t_c; /* checkpoint time */
c19f7a34 112
87990467
SH
113 union {
114 struct htb_class_leaf {
115 struct Qdisc *q;
87990467
SH
116 int deficit[TC_HTB_MAXDEPTH];
117 struct list_head drop_list;
118 } leaf;
119 struct htb_class_inner {
120 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
121 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
122 /* When class changes from state 1->2 and disconnects from
cc7ec456
ED
123 * parent's feed then we lost ptr value and start from the
124 * first child again. Here we store classid of the
125 * last valid ptr (used when ptr is NULL).
126 */
87990467
SH
127 u32 last_ptr_id[TC_HTB_NUMPRIO];
128 } inner;
129 } un;
ca4ec90b 130 s64 pq_key;
87990467 131
ca4ec90b
ED
132 int prio_activity; /* for which prios are we active */
133 enum htb_cmode cmode; /* current mode of the class */
134 struct rb_node pq_node; /* node for event queue */
135 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
1da177e4
LT
136};
137
87990467 138struct htb_sched {
f4c1f3e0 139 struct Qdisc_class_hash clhash;
0cef296d 140 struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
87990467
SH
141
142 /* self list - roots of self generating tree */
143 struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
144 int row_mask[TC_HTB_MAXDEPTH];
145 struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
146 u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
1da177e4 147
87990467
SH
148 /* self wait list - roots of wait PQs per row */
149 struct rb_root wait_pq[TC_HTB_MAXDEPTH];
1da177e4 150
87990467 151 /* time of nearest event per level (row) */
5343a7f8 152 s64 near_ev_cache[TC_HTB_MAXDEPTH];
1da177e4 153
87990467 154 int defcls; /* class where unclassified flows go to */
1da177e4 155
87990467
SH
156 /* filters for qdisc itself */
157 struct tcf_proto *filter_list;
1da177e4 158
5343a7f8
ED
159 int rate2quantum; /* quant = rate / rate2quantum */
160 s64 now; /* cached dequeue time */
fb983d45 161 struct qdisc_watchdog watchdog;
1da177e4 162
87990467
SH
163 /* non shaped skbs; let them go directly thru */
164 struct sk_buff_head direct_queue;
165 int direct_qlen; /* max qlen of above */
166
167 long direct_pkts;
e82181de
JP
168
169#define HTB_WARN_TOOMANYEVENTS 0x1
170 unsigned int warned; /* only one warning */
1224736d 171 struct work_struct work;
1da177e4
LT
172};
173
1da177e4 174/* find class in global hash table using given handle */
87990467 175static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
1da177e4
LT
176{
177 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 178 struct Qdisc_class_common *clc;
0cef296d 179
f4c1f3e0
PM
180 clc = qdisc_class_find(&q->clhash, handle);
181 if (clc == NULL)
1da177e4 182 return NULL;
f4c1f3e0 183 return container_of(clc, struct htb_class, common);
1da177e4
LT
184}
185
186/**
187 * htb_classify - classify a packet into class
188 *
189 * It returns NULL if the packet should be dropped or -1 if the packet
190 * should be passed directly thru. In all other cases leaf class is returned.
191 * We allow direct class selection by classid in priority. The we examine
192 * filters in qdisc and in inner nodes (if higher filter points to the inner
193 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
10297b99 194 * internal fifo (direct). These packets then go directly thru. If we still
25985edc 195 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
1da177e4
LT
196 * then finish and return direct queue.
197 */
cc7ec456 198#define HTB_DIRECT ((struct htb_class *)-1L)
1da177e4 199
87990467
SH
200static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
201 int *qerr)
1da177e4
LT
202{
203 struct htb_sched *q = qdisc_priv(sch);
204 struct htb_class *cl;
205 struct tcf_result res;
206 struct tcf_proto *tcf;
207 int result;
208
209 /* allow to select class by setting skb->priority to valid classid;
cc7ec456
ED
210 * note that nfmark can be used too by attaching filter fw with no
211 * rules in it
212 */
1da177e4 213 if (skb->priority == sch->handle)
87990467 214 return HTB_DIRECT; /* X:0 (direct flow) selected */
cc7ec456
ED
215 cl = htb_find(skb->priority, sch);
216 if (cl && cl->level == 0)
1da177e4
LT
217 return cl;
218
c27f339a 219 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1da177e4
LT
220 tcf = q->filter_list;
221 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
222#ifdef CONFIG_NET_CLS_ACT
223 switch (result) {
224 case TC_ACT_QUEUED:
87990467 225 case TC_ACT_STOLEN:
378a2f09 226 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
1da177e4
LT
227 case TC_ACT_SHOT:
228 return NULL;
229 }
1da177e4 230#endif
cc7ec456
ED
231 cl = (void *)res.class;
232 if (!cl) {
1da177e4 233 if (res.classid == sch->handle)
87990467 234 return HTB_DIRECT; /* X:0 (direct flow) */
cc7ec456
ED
235 cl = htb_find(res.classid, sch);
236 if (!cl)
87990467 237 break; /* filter selected invalid classid */
1da177e4
LT
238 }
239 if (!cl->level)
87990467 240 return cl; /* we hit leaf; return it */
1da177e4
LT
241
242 /* we have got inner class; apply inner filter chain */
243 tcf = cl->filter_list;
244 }
245 /* classification failed; try to use default class */
87990467 246 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
1da177e4 247 if (!cl || cl->level)
87990467 248 return HTB_DIRECT; /* bad default .. this is safe bet */
1da177e4
LT
249 return cl;
250}
251
1da177e4
LT
252/**
253 * htb_add_to_id_tree - adds class to the round robin list
254 *
255 * Routine adds class to the list (actually tree) sorted by classid.
256 * Make sure that class is not already on such list for given prio.
257 */
87990467
SH
258static void htb_add_to_id_tree(struct rb_root *root,
259 struct htb_class *cl, int prio)
1da177e4
LT
260{
261 struct rb_node **p = &root->rb_node, *parent = NULL;
3bf72957 262
1da177e4 263 while (*p) {
87990467
SH
264 struct htb_class *c;
265 parent = *p;
1da177e4 266 c = rb_entry(parent, struct htb_class, node[prio]);
3bf72957 267
f4c1f3e0 268 if (cl->common.classid > c->common.classid)
1da177e4 269 p = &parent->rb_right;
87990467 270 else
1da177e4
LT
271 p = &parent->rb_left;
272 }
273 rb_link_node(&cl->node[prio], parent, p);
274 rb_insert_color(&cl->node[prio], root);
275}
276
277/**
278 * htb_add_to_wait_tree - adds class to the event queue with delay
279 *
280 * The class is added to priority event queue to indicate that class will
281 * change its mode in cl->pq_key microseconds. Make sure that class is not
282 * already in the queue.
283 */
87990467 284static void htb_add_to_wait_tree(struct htb_sched *q,
56b765b7 285 struct htb_class *cl, s64 delay)
1da177e4
LT
286{
287 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
3bf72957 288
fb983d45
PM
289 cl->pq_key = q->now + delay;
290 if (cl->pq_key == q->now)
1da177e4
LT
291 cl->pq_key++;
292
293 /* update the nearest event cache */
fb983d45 294 if (q->near_ev_cache[cl->level] > cl->pq_key)
1da177e4 295 q->near_ev_cache[cl->level] = cl->pq_key;
87990467 296
1da177e4 297 while (*p) {
87990467
SH
298 struct htb_class *c;
299 parent = *p;
1da177e4 300 c = rb_entry(parent, struct htb_class, pq_node);
fb983d45 301 if (cl->pq_key >= c->pq_key)
1da177e4 302 p = &parent->rb_right;
87990467 303 else
1da177e4
LT
304 p = &parent->rb_left;
305 }
306 rb_link_node(&cl->pq_node, parent, p);
307 rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
308}
309
310/**
311 * htb_next_rb_node - finds next node in binary tree
312 *
313 * When we are past last key we return NULL.
314 * Average complexity is 2 steps per call.
315 */
3696f625 316static inline void htb_next_rb_node(struct rb_node **n)
1da177e4
LT
317{
318 *n = rb_next(*n);
319}
320
321/**
322 * htb_add_class_to_row - add class to its row
323 *
324 * The class is added to row at priorities marked in mask.
325 * It does nothing if mask == 0.
326 */
87990467
SH
327static inline void htb_add_class_to_row(struct htb_sched *q,
328 struct htb_class *cl, int mask)
1da177e4 329{
1da177e4
LT
330 q->row_mask[cl->level] |= mask;
331 while (mask) {
332 int prio = ffz(~mask);
333 mask &= ~(1 << prio);
87990467 334 htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
1da177e4
LT
335 }
336}
337
3696f625
SH
338/* If this triggers, it is a bug in this code, but it need not be fatal */
339static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
340{
81771b3b 341 if (RB_EMPTY_NODE(rb)) {
3696f625
SH
342 WARN_ON(1);
343 } else {
344 rb_erase(rb, root);
345 RB_CLEAR_NODE(rb);
346 }
347}
348
349
1da177e4
LT
350/**
351 * htb_remove_class_from_row - removes class from its row
352 *
353 * The class is removed from row at priorities marked in mask.
354 * It does nothing if mask == 0.
355 */
87990467
SH
356static inline void htb_remove_class_from_row(struct htb_sched *q,
357 struct htb_class *cl, int mask)
1da177e4
LT
358{
359 int m = 0;
3bf72957 360
1da177e4
LT
361 while (mask) {
362 int prio = ffz(~mask);
3696f625 363
1da177e4 364 mask &= ~(1 << prio);
87990467
SH
365 if (q->ptr[cl->level][prio] == cl->node + prio)
366 htb_next_rb_node(q->ptr[cl->level] + prio);
3696f625
SH
367
368 htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
87990467 369 if (!q->row[cl->level][prio].rb_node)
1da177e4
LT
370 m |= 1 << prio;
371 }
1da177e4
LT
372 q->row_mask[cl->level] &= ~m;
373}
374
375/**
376 * htb_activate_prios - creates active classe's feed chain
377 *
378 * The class is connected to ancestors and/or appropriate rows
10297b99 379 * for priorities it is participating on. cl->cmode must be new
1da177e4
LT
380 * (activated) mode. It does nothing if cl->prio_activity == 0.
381 */
87990467 382static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
1da177e4
LT
383{
384 struct htb_class *p = cl->parent;
87990467 385 long m, mask = cl->prio_activity;
1da177e4
LT
386
387 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
388 m = mask;
389 while (m) {
1da177e4
LT
390 int prio = ffz(~m);
391 m &= ~(1 << prio);
87990467 392
1da177e4
LT
393 if (p->un.inner.feed[prio].rb_node)
394 /* parent already has its feed in use so that
cc7ec456
ED
395 * reset bit in mask as parent is already ok
396 */
1da177e4 397 mask &= ~(1 << prio);
87990467
SH
398
399 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
1da177e4 400 }
1da177e4 401 p->prio_activity |= mask;
87990467
SH
402 cl = p;
403 p = cl->parent;
3bf72957 404
1da177e4
LT
405 }
406 if (cl->cmode == HTB_CAN_SEND && mask)
87990467 407 htb_add_class_to_row(q, cl, mask);
1da177e4
LT
408}
409
410/**
411 * htb_deactivate_prios - remove class from feed chain
412 *
10297b99 413 * cl->cmode must represent old mode (before deactivation). It does
1da177e4
LT
414 * nothing if cl->prio_activity == 0. Class is removed from all feed
415 * chains and rows.
416 */
417static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
418{
419 struct htb_class *p = cl->parent;
87990467 420 long m, mask = cl->prio_activity;
1da177e4
LT
421
422 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
423 m = mask;
424 mask = 0;
1da177e4
LT
425 while (m) {
426 int prio = ffz(~m);
427 m &= ~(1 << prio);
87990467
SH
428
429 if (p->un.inner.ptr[prio] == cl->node + prio) {
1da177e4 430 /* we are removing child which is pointed to from
cc7ec456
ED
431 * parent feed - forget the pointer but remember
432 * classid
433 */
f4c1f3e0 434 p->un.inner.last_ptr_id[prio] = cl->common.classid;
1da177e4
LT
435 p->un.inner.ptr[prio] = NULL;
436 }
87990467 437
3696f625 438 htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
87990467
SH
439
440 if (!p->un.inner.feed[prio].rb_node)
1da177e4
LT
441 mask |= 1 << prio;
442 }
3bf72957 443
1da177e4 444 p->prio_activity &= ~mask;
87990467
SH
445 cl = p;
446 p = cl->parent;
3bf72957 447
1da177e4 448 }
87990467
SH
449 if (cl->cmode == HTB_CAN_SEND && mask)
450 htb_remove_class_from_row(q, cl, mask);
1da177e4
LT
451}
452
56b765b7 453static inline s64 htb_lowater(const struct htb_class *cl)
18a63e86 454{
47083fc0
JDB
455 if (htb_hysteresis)
456 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
457 else
458 return 0;
18a63e86 459}
56b765b7 460static inline s64 htb_hiwater(const struct htb_class *cl)
18a63e86 461{
47083fc0
JDB
462 if (htb_hysteresis)
463 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
464 else
465 return 0;
18a63e86 466}
47083fc0 467
18a63e86 468
1da177e4
LT
469/**
470 * htb_class_mode - computes and returns current class mode
471 *
472 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
473 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
10297b99 474 * from now to time when cl will change its state.
1da177e4 475 * Also it is worth to note that class mode doesn't change simply
10297b99 476 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
1da177e4
LT
477 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
478 * mode transitions per time unit. The speed gain is about 1/6.
479 */
87990467 480static inline enum htb_cmode
56b765b7 481htb_class_mode(struct htb_class *cl, s64 *diff)
1da177e4 482{
56b765b7 483 s64 toks;
1da177e4 484
87990467
SH
485 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
486 *diff = -toks;
487 return HTB_CANT_SEND;
488 }
18a63e86 489
87990467
SH
490 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
491 return HTB_CAN_SEND;
1da177e4 492
87990467
SH
493 *diff = -toks;
494 return HTB_MAY_BORROW;
1da177e4
LT
495}
496
497/**
498 * htb_change_class_mode - changes classe's mode
499 *
500 * This should be the only way how to change classe's mode under normal
501 * cirsumstances. Routine will update feed lists linkage, change mode
502 * and add class to the wait event queue if appropriate. New mode should
503 * be different from old one and cl->pq_key has to be valid if changing
504 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
505 */
87990467 506static void
56b765b7 507htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
87990467
SH
508{
509 enum htb_cmode new_mode = htb_class_mode(cl, diff);
1da177e4
LT
510
511 if (new_mode == cl->cmode)
87990467
SH
512 return;
513
514 if (cl->prio_activity) { /* not necessary: speed optimization */
515 if (cl->cmode != HTB_CANT_SEND)
516 htb_deactivate_prios(q, cl);
1da177e4 517 cl->cmode = new_mode;
87990467
SH
518 if (new_mode != HTB_CANT_SEND)
519 htb_activate_prios(q, cl);
520 } else
1da177e4
LT
521 cl->cmode = new_mode;
522}
523
524/**
10297b99 525 * htb_activate - inserts leaf cl into appropriate active feeds
1da177e4
LT
526 *
527 * Routine learns (new) priority of leaf and activates feed chain
528 * for the prio. It can be called on already active leaf safely.
529 * It also adds leaf into droplist.
530 */
87990467 531static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
1da177e4 532{
547b792c 533 WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
3bf72957 534
1da177e4 535 if (!cl->prio_activity) {
c19f7a34 536 cl->prio_activity = 1 << cl->prio;
87990467
SH
537 htb_activate_prios(q, cl);
538 list_add_tail(&cl->un.leaf.drop_list,
c19f7a34 539 q->drops + cl->prio);
1da177e4
LT
540 }
541}
542
543/**
10297b99 544 * htb_deactivate - remove leaf cl from active feeds
1da177e4
LT
545 *
546 * Make sure that leaf is active. In the other words it can't be called
547 * with non-active leaf. It also removes class from the drop list.
548 */
87990467 549static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
1da177e4 550{
547b792c 551 WARN_ON(!cl->prio_activity);
3bf72957 552
87990467 553 htb_deactivate_prios(q, cl);
1da177e4
LT
554 cl->prio_activity = 0;
555 list_del_init(&cl->un.leaf.drop_list);
556}
557
558static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
559{
f30ab418 560 int uninitialized_var(ret);
87990467
SH
561 struct htb_sched *q = qdisc_priv(sch);
562 struct htb_class *cl = htb_classify(skb, sch, &ret);
563
564 if (cl == HTB_DIRECT) {
565 /* enqueue to helper queue */
566 if (q->direct_queue.qlen < q->direct_qlen) {
567 __skb_queue_tail(&q->direct_queue, skb);
568 q->direct_pkts++;
569 } else {
17045755 570 return qdisc_drop(skb, sch);
87990467 571 }
1da177e4 572#ifdef CONFIG_NET_CLS_ACT
87990467 573 } else if (!cl) {
c27f339a 574 if (ret & __NET_XMIT_BYPASS)
87990467
SH
575 sch->qstats.drops++;
576 kfree_skb(skb);
577 return ret;
1da177e4 578#endif
378a2f09
JP
579 } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
580 if (net_xmit_drop_count(ret)) {
581 sch->qstats.drops++;
582 cl->qstats.drops++;
583 }
69747650 584 return ret;
87990467 585 } else {
87990467
SH
586 htb_activate(q, cl);
587 }
588
589 sch->q.qlen++;
87990467 590 return NET_XMIT_SUCCESS;
1da177e4
LT
591}
592
56b765b7 593static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
59e4220a 594{
56b765b7 595 s64 toks = diff + cl->tokens;
59e4220a
JP
596
597 if (toks > cl->buffer)
598 toks = cl->buffer;
292f1c7f 599 toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
59e4220a
JP
600 if (toks <= -cl->mbuffer)
601 toks = 1 - cl->mbuffer;
602
603 cl->tokens = toks;
604}
605
56b765b7 606static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
59e4220a 607{
56b765b7 608 s64 toks = diff + cl->ctokens;
59e4220a
JP
609
610 if (toks > cl->cbuffer)
611 toks = cl->cbuffer;
292f1c7f 612 toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
59e4220a
JP
613 if (toks <= -cl->mbuffer)
614 toks = 1 - cl->mbuffer;
615
616 cl->ctokens = toks;
617}
618
1da177e4
LT
619/**
620 * htb_charge_class - charges amount "bytes" to leaf and ancestors
621 *
622 * Routine assumes that packet "bytes" long was dequeued from leaf cl
623 * borrowing from "level". It accounts bytes to ceil leaky bucket for
624 * leaf and all ancestors and to rate bucket for ancestors at levels
625 * "level" and higher. It also handles possible change of mode resulting
626 * from the update. Note that mode can also increase here (MAY_BORROW to
627 * CAN_SEND) because we can use more precise clock that event queue here.
628 * In such case we remove class from event queue first.
629 */
87990467 630static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
c9726d68 631 int level, struct sk_buff *skb)
87990467 632{
0abf77e5 633 int bytes = qdisc_pkt_len(skb);
1da177e4 634 enum htb_cmode old_mode;
56b765b7 635 s64 diff;
1da177e4
LT
636
637 while (cl) {
56b765b7 638 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
1da177e4 639 if (cl->level >= level) {
87990467
SH
640 if (cl->level == level)
641 cl->xstats.lends++;
59e4220a 642 htb_accnt_tokens(cl, bytes, diff);
1da177e4
LT
643 } else {
644 cl->xstats.borrows++;
87990467 645 cl->tokens += diff; /* we moved t_c; update tokens */
1da177e4 646 }
59e4220a 647 htb_accnt_ctokens(cl, bytes, diff);
1da177e4 648 cl->t_c = q->now;
1da177e4 649
87990467
SH
650 old_mode = cl->cmode;
651 diff = 0;
652 htb_change_class_mode(q, cl, &diff);
1da177e4
LT
653 if (old_mode != cl->cmode) {
654 if (old_mode != HTB_CAN_SEND)
3696f625 655 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1da177e4 656 if (cl->cmode != HTB_CAN_SEND)
87990467 657 htb_add_to_wait_tree(q, cl, diff);
1da177e4 658 }
1da177e4 659
bfe0d029
ED
660 /* update basic stats except for leaves which are already updated */
661 if (cl->level)
662 bstats_update(&cl->bstats, skb);
663
1da177e4
LT
664 cl = cl->parent;
665 }
666}
667
668/**
669 * htb_do_events - make mode changes to classes at the level
670 *
fb983d45 671 * Scans event queue for pending events and applies them. Returns time of
1224736d 672 * next pending event (0 for no event in pq, q->now for too many events).
fb983d45 673 * Note: Applied are events whose have cl->pq_key <= q->now.
1da177e4 674 */
5343a7f8
ED
675static s64 htb_do_events(struct htb_sched *q, int level,
676 unsigned long start)
1da177e4 677{
8f3ea33a 678 /* don't run for longer than 2 jiffies; 2 is used instead of
cc7ec456
ED
679 * 1 to simplify things when jiffy is going to be incremented
680 * too soon
681 */
a73be040 682 unsigned long stop_at = start + 2;
8f3ea33a 683 while (time_before(jiffies, stop_at)) {
1da177e4 684 struct htb_class *cl;
56b765b7 685 s64 diff;
30bdbe39
AM
686 struct rb_node *p = rb_first(&q->wait_pq[level]);
687
87990467
SH
688 if (!p)
689 return 0;
1da177e4
LT
690
691 cl = rb_entry(p, struct htb_class, pq_node);
fb983d45
PM
692 if (cl->pq_key > q->now)
693 return cl->pq_key;
694
3696f625 695 htb_safe_rb_erase(p, q->wait_pq + level);
56b765b7 696 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
87990467 697 htb_change_class_mode(q, cl, &diff);
1da177e4 698 if (cl->cmode != HTB_CAN_SEND)
87990467 699 htb_add_to_wait_tree(q, cl, diff);
1da177e4 700 }
1224736d
JP
701
702 /* too much load - let's continue after a break for scheduling */
e82181de 703 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
cc7ec456 704 pr_warning("htb: too many events!\n");
e82181de
JP
705 q->warned |= HTB_WARN_TOOMANYEVENTS;
706 }
1224736d
JP
707
708 return q->now;
1da177e4
LT
709}
710
711/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
cc7ec456
ED
712 * is no such one exists.
713 */
87990467
SH
714static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
715 u32 id)
1da177e4
LT
716{
717 struct rb_node *r = NULL;
718 while (n) {
87990467
SH
719 struct htb_class *cl =
720 rb_entry(n, struct htb_class, node[prio]);
87990467 721
f4c1f3e0 722 if (id > cl->common.classid) {
1da177e4 723 n = n->rb_right;
1b5c0077 724 } else if (id < cl->common.classid) {
1da177e4
LT
725 r = n;
726 n = n->rb_left;
1b5c0077
JP
727 } else {
728 return n;
1da177e4
LT
729 }
730 }
731 return r;
732}
733
734/**
735 * htb_lookup_leaf - returns next leaf class in DRR order
736 *
737 * Find leaf where current feed pointers points to.
738 */
87990467
SH
739static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
740 struct rb_node **pptr, u32 * pid)
1da177e4
LT
741{
742 int i;
743 struct {
744 struct rb_node *root;
745 struct rb_node **pptr;
746 u32 *pid;
87990467
SH
747 } stk[TC_HTB_MAXDEPTH], *sp = stk;
748
512bb43e 749 BUG_ON(!tree->rb_node);
1da177e4
LT
750 sp->root = tree->rb_node;
751 sp->pptr = pptr;
752 sp->pid = pid;
753
754 for (i = 0; i < 65535; i++) {
87990467 755 if (!*sp->pptr && *sp->pid) {
10297b99 756 /* ptr was invalidated but id is valid - try to recover
cc7ec456
ED
757 * the original or next ptr
758 */
87990467
SH
759 *sp->pptr =
760 htb_id_find_next_upper(prio, sp->root, *sp->pid);
1da177e4 761 }
87990467 762 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
cc7ec456
ED
763 * can become out of date quickly
764 */
87990467 765 if (!*sp->pptr) { /* we are at right end; rewind & go up */
1da177e4 766 *sp->pptr = sp->root;
87990467 767 while ((*sp->pptr)->rb_left)
1da177e4
LT
768 *sp->pptr = (*sp->pptr)->rb_left;
769 if (sp > stk) {
770 sp--;
512bb43e
JP
771 if (!*sp->pptr) {
772 WARN_ON(1);
87990467 773 return NULL;
512bb43e 774 }
87990467 775 htb_next_rb_node(sp->pptr);
1da177e4
LT
776 }
777 } else {
778 struct htb_class *cl;
87990467
SH
779 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
780 if (!cl->level)
1da177e4
LT
781 return cl;
782 (++sp)->root = cl->un.inner.feed[prio].rb_node;
87990467
SH
783 sp->pptr = cl->un.inner.ptr + prio;
784 sp->pid = cl->un.inner.last_ptr_id + prio;
1da177e4
LT
785 }
786 }
547b792c 787 WARN_ON(1);
1da177e4
LT
788 return NULL;
789}
790
791/* dequeues packet at given priority and level; call only if
cc7ec456
ED
792 * you are sure that there is active class at prio/level
793 */
87990467
SH
794static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
795 int level)
1da177e4
LT
796{
797 struct sk_buff *skb = NULL;
87990467 798 struct htb_class *cl, *start;
1da177e4 799 /* look initial class up in the row */
87990467
SH
800 start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
801 q->ptr[level] + prio,
802 q->last_ptr_id[level] + prio);
803
1da177e4
LT
804 do {
805next:
512bb43e 806 if (unlikely(!cl))
87990467 807 return NULL;
1da177e4
LT
808
809 /* class can be empty - it is unlikely but can be true if leaf
cc7ec456
ED
810 * qdisc drops packets in enqueue routine or if someone used
811 * graft operation on the leaf since last dequeue;
812 * simply deactivate and skip such class
813 */
1da177e4
LT
814 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
815 struct htb_class *next;
87990467 816 htb_deactivate(q, cl);
1da177e4
LT
817
818 /* row/level might become empty */
819 if ((q->row_mask[level] & (1 << prio)) == 0)
87990467 820 return NULL;
1da177e4 821
87990467
SH
822 next = htb_lookup_leaf(q->row[level] + prio,
823 prio, q->ptr[level] + prio,
824 q->last_ptr_id[level] + prio);
825
826 if (cl == start) /* fix start if we just deleted it */
1da177e4
LT
827 start = next;
828 cl = next;
829 goto next;
830 }
87990467
SH
831
832 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
833 if (likely(skb != NULL))
1da177e4 834 break;
633fe66e 835
b00355db 836 qdisc_warn_nonwc("htb", cl->un.leaf.q);
87990467
SH
837 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
838 ptr[0]) + prio);
839 cl = htb_lookup_leaf(q->row[level] + prio, prio,
840 q->ptr[level] + prio,
841 q->last_ptr_id[level] + prio);
1da177e4
LT
842
843 } while (cl != start);
844
845 if (likely(skb != NULL)) {
196d97f6 846 bstats_update(&cl->bstats, skb);
0abf77e5
JK
847 cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
848 if (cl->un.leaf.deficit[level] < 0) {
c19f7a34 849 cl->un.leaf.deficit[level] += cl->quantum;
87990467
SH
850 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
851 ptr[0]) + prio);
1da177e4
LT
852 }
853 /* this used to be after charge_class but this constelation
cc7ec456
ED
854 * gives us slightly better performance
855 */
1da177e4 856 if (!cl->un.leaf.q->q.qlen)
87990467 857 htb_deactivate(q, cl);
c9726d68 858 htb_charge_class(q, cl, level, skb);
1da177e4
LT
859 }
860 return skb;
861}
862
1da177e4
LT
863static struct sk_buff *htb_dequeue(struct Qdisc *sch)
864{
9190b3b3 865 struct sk_buff *skb;
1da177e4
LT
866 struct htb_sched *q = qdisc_priv(sch);
867 int level;
5343a7f8 868 s64 next_event;
a73be040 869 unsigned long start_at;
1da177e4
LT
870
871 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
87990467
SH
872 skb = __skb_dequeue(&q->direct_queue);
873 if (skb != NULL) {
9190b3b3
ED
874ok:
875 qdisc_bstats_update(sch, skb);
fd245a4a 876 qdisc_unthrottled(sch);
1da177e4
LT
877 sch->q.qlen--;
878 return skb;
879 }
880
87990467
SH
881 if (!sch->q.qlen)
882 goto fin;
56b765b7 883 q->now = ktime_to_ns(ktime_get());
a73be040 884 start_at = jiffies;
1da177e4 885
d2fe85da 886 next_event = q->now + 5LLU * NSEC_PER_SEC;
633fe66e 887
1da177e4
LT
888 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
889 /* common case optimization - skip event handler quickly */
890 int m;
5343a7f8 891 s64 event;
fb983d45
PM
892
893 if (q->now >= q->near_ev_cache[level]) {
a73be040 894 event = htb_do_events(q, level, start_at);
2e4b3b0e 895 if (!event)
56b765b7 896 event = q->now + NSEC_PER_SEC;
2e4b3b0e 897 q->near_ev_cache[level] = event;
1da177e4 898 } else
fb983d45
PM
899 event = q->near_ev_cache[level];
900
c0851347 901 if (next_event > event)
fb983d45 902 next_event = event;
87990467 903
1da177e4
LT
904 m = ~q->row_mask[level];
905 while (m != (int)(-1)) {
87990467 906 int prio = ffz(m);
cc7ec456 907
1da177e4 908 m |= 1 << prio;
87990467 909 skb = htb_dequeue_tree(q, prio, level);
9190b3b3
ED
910 if (likely(skb != NULL))
911 goto ok;
1da177e4
LT
912 }
913 }
fb983d45 914 sch->qstats.overlimits++;
56b765b7
V
915 if (likely(next_event > q->now)) {
916 if (!test_bit(__QDISC_STATE_DEACTIVATED,
917 &qdisc_root_sleeping(q->watchdog.qdisc)->state)) {
918 ktime_t time = ns_to_ktime(next_event);
919 qdisc_throttled(q->watchdog.qdisc);
920 hrtimer_start(&q->watchdog.timer, time,
921 HRTIMER_MODE_ABS);
922 }
923 } else {
1224736d 924 schedule_work(&q->work);
56b765b7 925 }
1da177e4 926fin:
1da177e4
LT
927 return skb;
928}
929
930/* try to drop from each class (by prio) until one succeed */
87990467 931static unsigned int htb_drop(struct Qdisc *sch)
1da177e4
LT
932{
933 struct htb_sched *q = qdisc_priv(sch);
934 int prio;
935
936 for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
937 struct list_head *p;
87990467 938 list_for_each(p, q->drops + prio) {
1da177e4
LT
939 struct htb_class *cl = list_entry(p, struct htb_class,
940 un.leaf.drop_list);
941 unsigned int len;
87990467
SH
942 if (cl->un.leaf.q->ops->drop &&
943 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
1da177e4
LT
944 sch->q.qlen--;
945 if (!cl->un.leaf.q->q.qlen)
87990467 946 htb_deactivate(q, cl);
1da177e4
LT
947 return len;
948 }
949 }
950 }
951 return 0;
952}
953
954/* reset all classes */
955/* always caled under BH & queue lock */
87990467 956static void htb_reset(struct Qdisc *sch)
1da177e4
LT
957{
958 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 959 struct htb_class *cl;
f4c1f3e0 960 unsigned int i;
0cef296d 961
f4c1f3e0 962 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 963 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1da177e4 964 if (cl->level)
87990467 965 memset(&cl->un.inner, 0, sizeof(cl->un.inner));
1da177e4 966 else {
87990467 967 if (cl->un.leaf.q)
1da177e4
LT
968 qdisc_reset(cl->un.leaf.q);
969 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
970 }
971 cl->prio_activity = 0;
972 cl->cmode = HTB_CAN_SEND;
1da177e4
LT
973
974 }
975 }
fb983d45 976 qdisc_watchdog_cancel(&q->watchdog);
1da177e4
LT
977 __skb_queue_purge(&q->direct_queue);
978 sch->q.qlen = 0;
87990467
SH
979 memset(q->row, 0, sizeof(q->row));
980 memset(q->row_mask, 0, sizeof(q->row_mask));
981 memset(q->wait_pq, 0, sizeof(q->wait_pq));
982 memset(q->ptr, 0, sizeof(q->ptr));
1da177e4 983 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 984 INIT_LIST_HEAD(q->drops + i);
1da177e4
LT
985}
986
27a3421e
PM
987static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
988 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
989 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
990 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
991 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
6906f4ed 992 [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
27a3421e
PM
993};
994
1224736d
JP
995static void htb_work_func(struct work_struct *work)
996{
997 struct htb_sched *q = container_of(work, struct htb_sched, work);
998 struct Qdisc *sch = q->watchdog.qdisc;
999
1000 __netif_schedule(qdisc_root(sch));
1001}
1002
1e90474c 1003static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
1004{
1005 struct htb_sched *q = qdisc_priv(sch);
6906f4ed 1006 struct nlattr *tb[TCA_HTB_MAX + 1];
1da177e4 1007 struct tc_htb_glob *gopt;
cee63723 1008 int err;
1da177e4 1009 int i;
cee63723
PM
1010
1011 if (!opt)
1012 return -EINVAL;
1013
6906f4ed 1014 err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
cee63723
PM
1015 if (err < 0)
1016 return err;
1017
6906f4ed 1018 if (!tb[TCA_HTB_INIT])
1da177e4 1019 return -EINVAL;
6906f4ed 1020
1e90474c 1021 gopt = nla_data(tb[TCA_HTB_INIT]);
6906f4ed 1022 if (gopt->version != HTB_VER >> 16)
1da177e4 1023 return -EINVAL;
1da177e4 1024
f4c1f3e0
PM
1025 err = qdisc_class_hash_init(&q->clhash);
1026 if (err < 0)
1027 return err;
1da177e4 1028 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 1029 INIT_LIST_HEAD(q->drops + i);
1da177e4 1030
fb983d45 1031 qdisc_watchdog_init(&q->watchdog, sch);
1224736d 1032 INIT_WORK(&q->work, htb_work_func);
1da177e4
LT
1033 skb_queue_head_init(&q->direct_queue);
1034
6906f4ed
ED
1035 if (tb[TCA_HTB_DIRECT_QLEN])
1036 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1037 else {
1038 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1039 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1040 q->direct_qlen = 2;
1041 }
1da177e4
LT
1042 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1043 q->rate2quantum = 1;
1044 q->defcls = gopt->defcls;
1045
1046 return 0;
1047}
1048
1049static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1050{
102396ae 1051 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1da177e4 1052 struct htb_sched *q = qdisc_priv(sch);
4b3550ef 1053 struct nlattr *nest;
1da177e4 1054 struct tc_htb_glob gopt;
4b3550ef 1055
7698b4fc 1056 spin_lock_bh(root_lock);
1da177e4 1057
4b3550ef 1058 gopt.direct_pkts = q->direct_pkts;
1da177e4
LT
1059 gopt.version = HTB_VER;
1060 gopt.rate2quantum = q->rate2quantum;
1061 gopt.defcls = q->defcls;
3bf72957 1062 gopt.debug = 0;
4b3550ef
PM
1063
1064 nest = nla_nest_start(skb, TCA_OPTIONS);
1065 if (nest == NULL)
1066 goto nla_put_failure;
6906f4ed
ED
1067 if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1068 nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1b34ec43 1069 goto nla_put_failure;
4b3550ef
PM
1070 nla_nest_end(skb, nest);
1071
7698b4fc 1072 spin_unlock_bh(root_lock);
1da177e4 1073 return skb->len;
4b3550ef 1074
1e90474c 1075nla_put_failure:
7698b4fc 1076 spin_unlock_bh(root_lock);
4b3550ef 1077 nla_nest_cancel(skb, nest);
1da177e4
LT
1078 return -1;
1079}
1080
1081static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
87990467 1082 struct sk_buff *skb, struct tcmsg *tcm)
1da177e4 1083{
87990467 1084 struct htb_class *cl = (struct htb_class *)arg;
102396ae 1085 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
4b3550ef 1086 struct nlattr *nest;
1da177e4
LT
1087 struct tc_htb_opt opt;
1088
7698b4fc 1089 spin_lock_bh(root_lock);
f4c1f3e0
PM
1090 tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1091 tcm->tcm_handle = cl->common.classid;
1da177e4
LT
1092 if (!cl->level && cl->un.leaf.q)
1093 tcm->tcm_info = cl->un.leaf.q->handle;
1094
4b3550ef
PM
1095 nest = nla_nest_start(skb, TCA_OPTIONS);
1096 if (nest == NULL)
1097 goto nla_put_failure;
1da177e4 1098
87990467 1099 memset(&opt, 0, sizeof(opt));
1da177e4 1100
01cb71d2 1101 psched_ratecfg_getrate(&opt.rate, &cl->rate);
9c10f411 1102 opt.buffer = PSCHED_NS2TICKS(cl->buffer);
01cb71d2 1103 psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
9c10f411 1104 opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
c19f7a34
JP
1105 opt.quantum = cl->quantum;
1106 opt.prio = cl->prio;
87990467 1107 opt.level = cl->level;
1b34ec43
DM
1108 if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1109 goto nla_put_failure;
4b3550ef
PM
1110
1111 nla_nest_end(skb, nest);
7698b4fc 1112 spin_unlock_bh(root_lock);
1da177e4 1113 return skb->len;
4b3550ef 1114
1e90474c 1115nla_put_failure:
7698b4fc 1116 spin_unlock_bh(root_lock);
4b3550ef 1117 nla_nest_cancel(skb, nest);
1da177e4
LT
1118 return -1;
1119}
1120
1121static int
87990467 1122htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1da177e4 1123{
87990467 1124 struct htb_class *cl = (struct htb_class *)arg;
1da177e4 1125
1da177e4
LT
1126 if (!cl->level && cl->un.leaf.q)
1127 cl->qstats.qlen = cl->un.leaf.q->q.qlen;
5343a7f8
ED
1128 cl->xstats.tokens = PSCHED_NS2TICKS(cl->tokens);
1129 cl->xstats.ctokens = PSCHED_NS2TICKS(cl->ctokens);
1da177e4
LT
1130
1131 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
d250a5f9 1132 gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
1da177e4
LT
1133 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1134 return -1;
1135
1136 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1137}
1138
1139static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
87990467 1140 struct Qdisc **old)
1da177e4 1141{
87990467 1142 struct htb_class *cl = (struct htb_class *)arg;
1da177e4 1143
5b9a9ccf
PM
1144 if (cl->level)
1145 return -EINVAL;
1146 if (new == NULL &&
3511c913 1147 (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
5b9a9ccf
PM
1148 cl->common.classid)) == NULL)
1149 return -ENOBUFS;
1150
1151 sch_tree_lock(sch);
1152 *old = cl->un.leaf.q;
1153 cl->un.leaf.q = new;
1154 if (*old != NULL) {
1155 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1156 qdisc_reset(*old);
1da177e4 1157 }
5b9a9ccf
PM
1158 sch_tree_unlock(sch);
1159 return 0;
1da177e4
LT
1160}
1161
87990467 1162static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1da177e4 1163{
87990467 1164 struct htb_class *cl = (struct htb_class *)arg;
5b9a9ccf 1165 return !cl->level ? cl->un.leaf.q : NULL;
1da177e4
LT
1166}
1167
256d61b8
PM
1168static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1169{
1170 struct htb_class *cl = (struct htb_class *)arg;
1171
1172 if (cl->un.leaf.q->q.qlen == 0)
1173 htb_deactivate(qdisc_priv(sch), cl);
1174}
1175
1da177e4
LT
1176static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1177{
87990467
SH
1178 struct htb_class *cl = htb_find(classid, sch);
1179 if (cl)
1da177e4
LT
1180 cl->refcnt++;
1181 return (unsigned long)cl;
1182}
1183
160d5e10
JP
1184static inline int htb_parent_last_child(struct htb_class *cl)
1185{
1186 if (!cl->parent)
1187 /* the root class */
1188 return 0;
42077599 1189 if (cl->parent->children > 1)
160d5e10
JP
1190 /* not the last child */
1191 return 0;
160d5e10
JP
1192 return 1;
1193}
1194
3ba08b00
JP
1195static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1196 struct Qdisc *new_q)
160d5e10
JP
1197{
1198 struct htb_class *parent = cl->parent;
1199
547b792c 1200 WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
160d5e10 1201
3ba08b00
JP
1202 if (parent->cmode != HTB_CAN_SEND)
1203 htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
1204
160d5e10
JP
1205 parent->level = 0;
1206 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1207 INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1208 parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
160d5e10
JP
1209 parent->tokens = parent->buffer;
1210 parent->ctokens = parent->cbuffer;
5343a7f8 1211 parent->t_c = ktime_to_ns(ktime_get());
160d5e10
JP
1212 parent->cmode = HTB_CAN_SEND;
1213}
1214
87990467 1215static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1da177e4 1216{
1da177e4 1217 if (!cl->level) {
547b792c 1218 WARN_ON(!cl->un.leaf.q);
1da177e4
LT
1219 qdisc_destroy(cl->un.leaf.q);
1220 }
ee39e10c 1221 gen_kill_estimator(&cl->bstats, &cl->rate_est);
ff31ab56 1222 tcf_destroy_chain(&cl->filter_list);
1da177e4
LT
1223 kfree(cl);
1224}
1225
87990467 1226static void htb_destroy(struct Qdisc *sch)
1da177e4
LT
1227{
1228 struct htb_sched *q = qdisc_priv(sch);
b67bfe0d 1229 struct hlist_node *next;
fbd8f137
PM
1230 struct htb_class *cl;
1231 unsigned int i;
1da177e4 1232
1224736d 1233 cancel_work_sync(&q->work);
fb983d45 1234 qdisc_watchdog_cancel(&q->watchdog);
1da177e4 1235 /* This line used to be after htb_destroy_class call below
cc7ec456
ED
1236 * and surprisingly it worked in 2.4. But it must precede it
1237 * because filter need its target class alive to be able to call
1238 * unbind_filter on it (without Oops).
1239 */
ff31ab56 1240 tcf_destroy_chain(&q->filter_list);
87990467 1241
f4c1f3e0 1242 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 1243 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode)
fbd8f137
PM
1244 tcf_destroy_chain(&cl->filter_list);
1245 }
f4c1f3e0 1246 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 1247 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
f4c1f3e0 1248 common.hnode)
fbd8f137
PM
1249 htb_destroy_class(sch, cl);
1250 }
f4c1f3e0 1251 qdisc_class_hash_destroy(&q->clhash);
1da177e4
LT
1252 __skb_queue_purge(&q->direct_queue);
1253}
1254
1255static int htb_delete(struct Qdisc *sch, unsigned long arg)
1256{
1257 struct htb_sched *q = qdisc_priv(sch);
87990467 1258 struct htb_class *cl = (struct htb_class *)arg;
256d61b8 1259 unsigned int qlen;
160d5e10
JP
1260 struct Qdisc *new_q = NULL;
1261 int last_child = 0;
1da177e4
LT
1262
1263 // TODO: why don't allow to delete subtree ? references ? does
1264 // tc subsys quarantee us that in htb_destroy it holds no class
1265 // refs so that we can remove children safely there ?
42077599 1266 if (cl->children || cl->filter_cnt)
1da177e4 1267 return -EBUSY;
87990467 1268
160d5e10 1269 if (!cl->level && htb_parent_last_child(cl)) {
3511c913 1270 new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
bb949fbd 1271 cl->parent->common.classid);
160d5e10
JP
1272 last_child = 1;
1273 }
1274
1da177e4 1275 sch_tree_lock(sch);
87990467 1276
814a175e 1277 if (!cl->level) {
256d61b8 1278 qlen = cl->un.leaf.q->q.qlen;
814a175e 1279 qdisc_reset(cl->un.leaf.q);
256d61b8 1280 qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
814a175e
PM
1281 }
1282
f4c1f3e0
PM
1283 /* delete from hash and active; remainder in destroy_class */
1284 qdisc_class_hash_remove(&q->clhash, &cl->common);
26b284de
JP
1285 if (cl->parent)
1286 cl->parent->children--;
c38c83cb 1287
1da177e4 1288 if (cl->prio_activity)
87990467 1289 htb_deactivate(q, cl);
1da177e4 1290
fbd8f137
PM
1291 if (cl->cmode != HTB_CAN_SEND)
1292 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1293
160d5e10 1294 if (last_child)
3ba08b00 1295 htb_parent_to_leaf(q, cl, new_q);
160d5e10 1296
7cd0a638
JP
1297 BUG_ON(--cl->refcnt == 0);
1298 /*
1299 * This shouldn't happen: we "hold" one cops->get() when called
1300 * from tc_ctl_tclass; the destroy method is done from cops->put().
1301 */
1da177e4
LT
1302
1303 sch_tree_unlock(sch);
1304 return 0;
1305}
1306
1307static void htb_put(struct Qdisc *sch, unsigned long arg)
1308{
87990467 1309 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1310
1311 if (--cl->refcnt == 0)
87990467 1312 htb_destroy_class(sch, cl);
1da177e4
LT
1313}
1314
87990467 1315static int htb_change_class(struct Qdisc *sch, u32 classid,
1e90474c 1316 u32 parentid, struct nlattr **tca,
87990467 1317 unsigned long *arg)
1da177e4
LT
1318{
1319 int err = -EINVAL;
1320 struct htb_sched *q = qdisc_priv(sch);
87990467 1321 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1e90474c 1322 struct nlattr *opt = tca[TCA_OPTIONS];
6906f4ed 1323 struct nlattr *tb[TCA_HTB_MAX + 1];
1da177e4
LT
1324 struct tc_htb_opt *hopt;
1325
1326 /* extract all subattrs from opt attr */
cee63723
PM
1327 if (!opt)
1328 goto failure;
1329
e18434c4 1330 err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
cee63723
PM
1331 if (err < 0)
1332 goto failure;
1333
1334 err = -EINVAL;
27a3421e 1335 if (tb[TCA_HTB_PARMS] == NULL)
1da177e4 1336 goto failure;
1da177e4 1337
87990467
SH
1338 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1339
1e90474c 1340 hopt = nla_data(tb[TCA_HTB_PARMS]);
196d97f6 1341 if (!hopt->rate.rate || !hopt->ceil.rate)
87990467 1342 goto failure;
1da177e4 1343
87990467 1344 if (!cl) { /* new class */
1da177e4 1345 struct Qdisc *new_q;
3696f625 1346 int prio;
ee39e10c 1347 struct {
1e90474c 1348 struct nlattr nla;
ee39e10c
PM
1349 struct gnet_estimator opt;
1350 } est = {
1e90474c
PM
1351 .nla = {
1352 .nla_len = nla_attr_size(sizeof(est.opt)),
1353 .nla_type = TCA_RATE,
ee39e10c
PM
1354 },
1355 .opt = {
1356 /* 4s interval, 16s averaging constant */
1357 .interval = 2,
1358 .ewma_log = 2,
1359 },
1360 };
3696f625 1361
1da177e4 1362 /* check for valid classid */
f64f9e71
JP
1363 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1364 htb_find(classid, sch))
1da177e4
LT
1365 goto failure;
1366
1367 /* check maximal depth */
1368 if (parent && parent->parent && parent->parent->level < 2) {
cc7ec456 1369 pr_err("htb: tree is too deep\n");
1da177e4
LT
1370 goto failure;
1371 }
1372 err = -ENOBUFS;
cc7ec456
ED
1373 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1374 if (!cl)
1da177e4 1375 goto failure;
87990467 1376
64153ce0
ED
1377 if (htb_rate_est || tca[TCA_RATE]) {
1378 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1379 qdisc_root_sleeping_lock(sch),
1380 tca[TCA_RATE] ? : &est.nla);
1381 if (err) {
1382 kfree(cl);
1383 goto failure;
1384 }
71bcb09a
SH
1385 }
1386
1da177e4 1387 cl->refcnt = 1;
42077599 1388 cl->children = 0;
1da177e4 1389 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
3696f625
SH
1390 RB_CLEAR_NODE(&cl->pq_node);
1391
1392 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1393 RB_CLEAR_NODE(&cl->node[prio]);
1da177e4
LT
1394
1395 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
cc7ec456
ED
1396 * so that can't be used inside of sch_tree_lock
1397 * -- thanks to Karlis Peisenieks
1398 */
3511c913 1399 new_q = qdisc_create_dflt(sch->dev_queue,
bb949fbd 1400 &pfifo_qdisc_ops, classid);
1da177e4
LT
1401 sch_tree_lock(sch);
1402 if (parent && !parent->level) {
256d61b8
PM
1403 unsigned int qlen = parent->un.leaf.q->q.qlen;
1404
1da177e4 1405 /* turn parent into inner node */
256d61b8
PM
1406 qdisc_reset(parent->un.leaf.q);
1407 qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
87990467
SH
1408 qdisc_destroy(parent->un.leaf.q);
1409 if (parent->prio_activity)
1410 htb_deactivate(q, parent);
1da177e4
LT
1411
1412 /* remove from evt list because of level change */
1413 if (parent->cmode != HTB_CAN_SEND) {
3696f625 1414 htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
1da177e4
LT
1415 parent->cmode = HTB_CAN_SEND;
1416 }
1417 parent->level = (parent->parent ? parent->parent->level
87990467
SH
1418 : TC_HTB_MAXDEPTH) - 1;
1419 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1da177e4
LT
1420 }
1421 /* leaf (we) needs elementary qdisc */
1422 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1423
f4c1f3e0 1424 cl->common.classid = classid;
87990467 1425 cl->parent = parent;
1da177e4
LT
1426
1427 /* set class to be in HTB_CAN_SEND state */
b9a7afde
JP
1428 cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1429 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
5343a7f8
ED
1430 cl->mbuffer = 60ULL * NSEC_PER_SEC; /* 1min */
1431 cl->t_c = ktime_to_ns(ktime_get());
1da177e4
LT
1432 cl->cmode = HTB_CAN_SEND;
1433
1434 /* attach to the hash list and parent's family */
f4c1f3e0 1435 qdisc_class_hash_insert(&q->clhash, &cl->common);
42077599
PM
1436 if (parent)
1437 parent->children++;
ee39e10c 1438 } else {
71bcb09a
SH
1439 if (tca[TCA_RATE]) {
1440 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1441 qdisc_root_sleeping_lock(sch),
1442 tca[TCA_RATE]);
1443 if (err)
1444 return err;
1445 }
87990467 1446 sch_tree_lock(sch);
ee39e10c 1447 }
1da177e4
LT
1448
1449 /* it used to be a nasty bug here, we have to check that node
cc7ec456
ED
1450 * is really leaf before changing cl->un.leaf !
1451 */
1da177e4 1452 if (!cl->level) {
196d97f6 1453 cl->quantum = hopt->rate.rate / q->rate2quantum;
c19f7a34 1454 if (!hopt->quantum && cl->quantum < 1000) {
cc7ec456 1455 pr_warning(
87990467 1456 "HTB: quantum of class %X is small. Consider r2q change.\n",
f4c1f3e0 1457 cl->common.classid);
c19f7a34 1458 cl->quantum = 1000;
1da177e4 1459 }
c19f7a34 1460 if (!hopt->quantum && cl->quantum > 200000) {
cc7ec456 1461 pr_warning(
87990467 1462 "HTB: quantum of class %X is big. Consider r2q change.\n",
f4c1f3e0 1463 cl->common.classid);
c19f7a34 1464 cl->quantum = 200000;
1da177e4
LT
1465 }
1466 if (hopt->quantum)
c19f7a34
JP
1467 cl->quantum = hopt->quantum;
1468 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1469 cl->prio = TC_HTB_NUMPRIO - 1;
1da177e4
LT
1470 }
1471
01cb71d2
ED
1472 psched_ratecfg_precompute(&cl->rate, &hopt->rate);
1473 psched_ratecfg_precompute(&cl->ceil, &hopt->ceil);
56b765b7 1474
324f5aa5
JP
1475 cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
1476 cl->cbuffer = PSCHED_TICKS2NS(hopt->buffer);
56b765b7 1477
1da177e4
LT
1478 sch_tree_unlock(sch);
1479
f4c1f3e0
PM
1480 qdisc_class_hash_grow(sch, &q->clhash);
1481
1da177e4
LT
1482 *arg = (unsigned long)cl;
1483 return 0;
1484
1485failure:
1da177e4
LT
1486 return err;
1487}
1488
1489static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1490{
1491 struct htb_sched *q = qdisc_priv(sch);
1492 struct htb_class *cl = (struct htb_class *)arg;
1493 struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
3bf72957 1494
1da177e4
LT
1495 return fl;
1496}
1497
1498static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
87990467 1499 u32 classid)
1da177e4 1500{
87990467 1501 struct htb_class *cl = htb_find(classid, sch);
3bf72957 1502
1da177e4 1503 /*if (cl && !cl->level) return 0;
cc7ec456
ED
1504 * The line above used to be there to prevent attaching filters to
1505 * leaves. But at least tc_index filter uses this just to get class
1506 * for other reasons so that we have to allow for it.
1507 * ----
1508 * 19.6.2002 As Werner explained it is ok - bind filter is just
1509 * another way to "lock" the class - unlike "get" this lock can
1510 * be broken by class during destroy IIUC.
1da177e4 1511 */
87990467
SH
1512 if (cl)
1513 cl->filter_cnt++;
1da177e4
LT
1514 return (unsigned long)cl;
1515}
1516
1517static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1518{
1da177e4 1519 struct htb_class *cl = (struct htb_class *)arg;
3bf72957 1520
87990467
SH
1521 if (cl)
1522 cl->filter_cnt--;
1da177e4
LT
1523}
1524
1525static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1526{
1527 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 1528 struct htb_class *cl;
f4c1f3e0 1529 unsigned int i;
1da177e4
LT
1530
1531 if (arg->stop)
1532 return;
1533
f4c1f3e0 1534 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 1535 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1da177e4
LT
1536 if (arg->count < arg->skip) {
1537 arg->count++;
1538 continue;
1539 }
1540 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1541 arg->stop = 1;
1542 return;
1543 }
1544 arg->count++;
1545 }
1546 }
1547}
1548
20fea08b 1549static const struct Qdisc_class_ops htb_class_ops = {
1da177e4
LT
1550 .graft = htb_graft,
1551 .leaf = htb_leaf,
256d61b8 1552 .qlen_notify = htb_qlen_notify,
1da177e4
LT
1553 .get = htb_get,
1554 .put = htb_put,
1555 .change = htb_change_class,
1556 .delete = htb_delete,
1557 .walk = htb_walk,
1558 .tcf_chain = htb_find_tcf,
1559 .bind_tcf = htb_bind_filter,
1560 .unbind_tcf = htb_unbind_filter,
1561 .dump = htb_dump_class,
1562 .dump_stats = htb_dump_class_stats,
1563};
1564
20fea08b 1565static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1da177e4
LT
1566 .cl_ops = &htb_class_ops,
1567 .id = "htb",
1568 .priv_size = sizeof(struct htb_sched),
1569 .enqueue = htb_enqueue,
1570 .dequeue = htb_dequeue,
77be155c 1571 .peek = qdisc_peek_dequeued,
1da177e4
LT
1572 .drop = htb_drop,
1573 .init = htb_init,
1574 .reset = htb_reset,
1575 .destroy = htb_destroy,
1da177e4
LT
1576 .dump = htb_dump,
1577 .owner = THIS_MODULE,
1578};
1579
1580static int __init htb_module_init(void)
1581{
87990467 1582 return register_qdisc(&htb_qdisc_ops);
1da177e4 1583}
87990467 1584static void __exit htb_module_exit(void)
1da177e4 1585{
87990467 1586 unregister_qdisc(&htb_qdisc_ops);
1da177e4 1587}
87990467 1588
1da177e4
LT
1589module_init(htb_module_init)
1590module_exit(htb_module_exit)
1591MODULE_LICENSE("GPL");