]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/sched/sch_htb.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[mirror_ubuntu-artful-kernel.git] / net / sched / sch_htb.c
CommitLineData
87990467 1/*
1da177e4
LT
2 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Martin Devera, <devik@cdi.cz>
10 *
11 * Credits (in time order) for older HTB versions:
12 * Stef Coene <stef.coene@docum.org>
13 * HTB support at LARTC mailing list
10297b99 14 * Ondrej Kraus, <krauso@barr.cz>
1da177e4
LT
15 * found missing INIT_QDISC(htb)
16 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
17 * helped a lot to locate nasty class stall bug
18 * Andi Kleen, Jamal Hadi, Bert Hubert
19 * code review and helpful comments on shaping
20 * Tomasz Wrona, <tw@eter.tym.pl>
21 * created test case so that I was able to fix nasty bug
22 * Wilfried Weissmann
23 * spotted bug in dequeue code and helped with fix
24 * Jiri Fojtasek
25 * fixed requeue routine
26 * and many others. thanks.
1da177e4 27 */
1da177e4 28#include <linux/module.h>
47083fc0 29#include <linux/moduleparam.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/kernel.h>
1da177e4 32#include <linux/string.h>
1da177e4 33#include <linux/errno.h>
1da177e4
LT
34#include <linux/skbuff.h>
35#include <linux/list.h>
36#include <linux/compiler.h>
0ba48053 37#include <linux/rbtree.h>
1224736d 38#include <linux/workqueue.h>
5a0e3ad6 39#include <linux/slab.h>
dc5fc579 40#include <net/netlink.h>
292f1c7f 41#include <net/sch_generic.h>
1da177e4 42#include <net/pkt_sched.h>
1da177e4
LT
43
44/* HTB algorithm.
45 Author: devik@cdi.cz
46 ========================================================================
47 HTB is like TBF with multiple classes. It is also similar to CBQ because
10297b99 48 it allows to assign priority to each class in hierarchy.
1da177e4
LT
49 In fact it is another implementation of Floyd's formal sharing.
50
51 Levels:
10297b99 52 Each class is assigned level. Leaf has ALWAYS level 0 and root
1da177e4
LT
53 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
54 one less than their parent.
55*/
56
47083fc0 57static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
87990467 58#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
1da177e4
LT
59
60#if HTB_VER >> 16 != TC_HTB_PROTOVER
61#error "Mismatched sch_htb.c and pkt_sch.h"
62#endif
63
47083fc0
JDB
64/* Module parameter and sysfs export */
65module_param (htb_hysteresis, int, 0640);
66MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
67
64153ce0
ED
68static int htb_rate_est = 0; /* htb classes have a default rate estimator */
69module_param(htb_rate_est, int, 0640);
70MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
71
1da177e4
LT
72/* used internaly to keep status of single class */
73enum htb_cmode {
87990467
SH
74 HTB_CANT_SEND, /* class can't send and can't borrow */
75 HTB_MAY_BORROW, /* class can't send but may borrow */
76 HTB_CAN_SEND /* class can send */
1da177e4
LT
77};
78
c9364636
ED
79struct htb_prio {
80 union {
81 struct rb_root row;
82 struct rb_root feed;
83 };
84 struct rb_node *ptr;
85 /* When class changes from state 1->2 and disconnects from
86 * parent's feed then we lost ptr value and start from the
87 * first child again. Here we store classid of the
88 * last valid ptr (used when ptr is NULL).
89 */
90 u32 last_ptr_id;
91};
92
ca4ec90b
ED
93/* interior & leaf nodes; props specific to leaves are marked L:
94 * To reduce false sharing, place mostly read fields at beginning,
95 * and mostly written ones at the end.
96 */
87990467 97struct htb_class {
f4c1f3e0 98 struct Qdisc_class_common common;
ca4ec90b
ED
99 struct psched_ratecfg rate;
100 struct psched_ratecfg ceil;
101 s64 buffer, cbuffer;/* token bucket depth/rate */
102 s64 mbuffer; /* max wait time */
cbd37556 103 u32 prio; /* these two are used only by leaves... */
ca4ec90b
ED
104 int quantum; /* but stored for parent-to-leaf return */
105
106 struct tcf_proto *filter_list; /* class attached filters */
107 int filter_cnt;
108 int refcnt; /* usage count of this class */
109
110 int level; /* our level (see above) */
111 unsigned int children;
112 struct htb_class *parent; /* parent class */
113
45203a3b 114 struct gnet_stats_rate_est64 rate_est;
1da177e4 115
ca4ec90b
ED
116 /*
117 * Written often fields
118 */
119 struct gnet_stats_basic_packed bstats;
120 struct gnet_stats_queue qstats;
121 struct tc_htb_xstats xstats; /* our special stats */
87990467 122
ca4ec90b
ED
123 /* token bucket parameters */
124 s64 tokens, ctokens;/* current number of tokens */
125 s64 t_c; /* checkpoint time */
c19f7a34 126
87990467
SH
127 union {
128 struct htb_class_leaf {
87990467 129 struct list_head drop_list;
c9364636
ED
130 int deficit[TC_HTB_MAXDEPTH];
131 struct Qdisc *q;
87990467
SH
132 } leaf;
133 struct htb_class_inner {
c9364636 134 struct htb_prio clprio[TC_HTB_NUMPRIO];
87990467
SH
135 } inner;
136 } un;
ca4ec90b 137 s64 pq_key;
87990467 138
ca4ec90b
ED
139 int prio_activity; /* for which prios are we active */
140 enum htb_cmode cmode; /* current mode of the class */
141 struct rb_node pq_node; /* node for event queue */
142 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
1da177e4
LT
143};
144
c9364636
ED
145struct htb_level {
146 struct rb_root wait_pq;
147 struct htb_prio hprio[TC_HTB_NUMPRIO];
148};
149
87990467 150struct htb_sched {
f4c1f3e0 151 struct Qdisc_class_hash clhash;
c9364636
ED
152 int defcls; /* class where unclassified flows go to */
153 int rate2quantum; /* quant = rate / rate2quantum */
1da177e4 154
c9364636
ED
155 /* filters for qdisc itself */
156 struct tcf_proto *filter_list;
1da177e4 157
c9364636
ED
158#define HTB_WARN_TOOMANYEVENTS 0x1
159 unsigned int warned; /* only one warning */
160 int direct_qlen;
161 struct work_struct work;
1da177e4 162
c9364636
ED
163 /* non shaped skbs; let them go directly thru */
164 struct sk_buff_head direct_queue;
165 long direct_pkts;
1da177e4 166
c9364636 167 struct qdisc_watchdog watchdog;
1da177e4 168
c9364636
ED
169 s64 now; /* cached dequeue time */
170 struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
1da177e4 171
c9364636
ED
172 /* time of nearest event per level (row) */
173 s64 near_ev_cache[TC_HTB_MAXDEPTH];
87990467 174
c9364636 175 int row_mask[TC_HTB_MAXDEPTH];
e82181de 176
c9364636 177 struct htb_level hlevel[TC_HTB_MAXDEPTH];
1da177e4
LT
178};
179
1da177e4 180/* find class in global hash table using given handle */
87990467 181static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
1da177e4
LT
182{
183 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 184 struct Qdisc_class_common *clc;
0cef296d 185
f4c1f3e0
PM
186 clc = qdisc_class_find(&q->clhash, handle);
187 if (clc == NULL)
1da177e4 188 return NULL;
f4c1f3e0 189 return container_of(clc, struct htb_class, common);
1da177e4
LT
190}
191
192/**
193 * htb_classify - classify a packet into class
194 *
195 * It returns NULL if the packet should be dropped or -1 if the packet
196 * should be passed directly thru. In all other cases leaf class is returned.
197 * We allow direct class selection by classid in priority. The we examine
198 * filters in qdisc and in inner nodes (if higher filter points to the inner
199 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
10297b99 200 * internal fifo (direct). These packets then go directly thru. If we still
25985edc 201 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
1da177e4
LT
202 * then finish and return direct queue.
203 */
cc7ec456 204#define HTB_DIRECT ((struct htb_class *)-1L)
1da177e4 205
87990467
SH
206static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
207 int *qerr)
1da177e4
LT
208{
209 struct htb_sched *q = qdisc_priv(sch);
210 struct htb_class *cl;
211 struct tcf_result res;
212 struct tcf_proto *tcf;
213 int result;
214
215 /* allow to select class by setting skb->priority to valid classid;
cc7ec456
ED
216 * note that nfmark can be used too by attaching filter fw with no
217 * rules in it
218 */
1da177e4 219 if (skb->priority == sch->handle)
87990467 220 return HTB_DIRECT; /* X:0 (direct flow) selected */
cc7ec456
ED
221 cl = htb_find(skb->priority, sch);
222 if (cl && cl->level == 0)
1da177e4
LT
223 return cl;
224
c27f339a 225 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1da177e4
LT
226 tcf = q->filter_list;
227 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
228#ifdef CONFIG_NET_CLS_ACT
229 switch (result) {
230 case TC_ACT_QUEUED:
87990467 231 case TC_ACT_STOLEN:
378a2f09 232 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
1da177e4
LT
233 case TC_ACT_SHOT:
234 return NULL;
235 }
1da177e4 236#endif
cc7ec456
ED
237 cl = (void *)res.class;
238 if (!cl) {
1da177e4 239 if (res.classid == sch->handle)
87990467 240 return HTB_DIRECT; /* X:0 (direct flow) */
cc7ec456
ED
241 cl = htb_find(res.classid, sch);
242 if (!cl)
87990467 243 break; /* filter selected invalid classid */
1da177e4
LT
244 }
245 if (!cl->level)
87990467 246 return cl; /* we hit leaf; return it */
1da177e4
LT
247
248 /* we have got inner class; apply inner filter chain */
249 tcf = cl->filter_list;
250 }
251 /* classification failed; try to use default class */
87990467 252 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
1da177e4 253 if (!cl || cl->level)
87990467 254 return HTB_DIRECT; /* bad default .. this is safe bet */
1da177e4
LT
255 return cl;
256}
257
1da177e4
LT
258/**
259 * htb_add_to_id_tree - adds class to the round robin list
260 *
261 * Routine adds class to the list (actually tree) sorted by classid.
262 * Make sure that class is not already on such list for given prio.
263 */
87990467
SH
264static void htb_add_to_id_tree(struct rb_root *root,
265 struct htb_class *cl, int prio)
1da177e4
LT
266{
267 struct rb_node **p = &root->rb_node, *parent = NULL;
3bf72957 268
1da177e4 269 while (*p) {
87990467
SH
270 struct htb_class *c;
271 parent = *p;
1da177e4 272 c = rb_entry(parent, struct htb_class, node[prio]);
3bf72957 273
f4c1f3e0 274 if (cl->common.classid > c->common.classid)
1da177e4 275 p = &parent->rb_right;
87990467 276 else
1da177e4
LT
277 p = &parent->rb_left;
278 }
279 rb_link_node(&cl->node[prio], parent, p);
280 rb_insert_color(&cl->node[prio], root);
281}
282
283/**
284 * htb_add_to_wait_tree - adds class to the event queue with delay
285 *
286 * The class is added to priority event queue to indicate that class will
287 * change its mode in cl->pq_key microseconds. Make sure that class is not
288 * already in the queue.
289 */
87990467 290static void htb_add_to_wait_tree(struct htb_sched *q,
56b765b7 291 struct htb_class *cl, s64 delay)
1da177e4 292{
c9364636 293 struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
3bf72957 294
fb983d45
PM
295 cl->pq_key = q->now + delay;
296 if (cl->pq_key == q->now)
1da177e4
LT
297 cl->pq_key++;
298
299 /* update the nearest event cache */
fb983d45 300 if (q->near_ev_cache[cl->level] > cl->pq_key)
1da177e4 301 q->near_ev_cache[cl->level] = cl->pq_key;
87990467 302
1da177e4 303 while (*p) {
87990467
SH
304 struct htb_class *c;
305 parent = *p;
1da177e4 306 c = rb_entry(parent, struct htb_class, pq_node);
fb983d45 307 if (cl->pq_key >= c->pq_key)
1da177e4 308 p = &parent->rb_right;
87990467 309 else
1da177e4
LT
310 p = &parent->rb_left;
311 }
312 rb_link_node(&cl->pq_node, parent, p);
c9364636 313 rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
1da177e4
LT
314}
315
316/**
317 * htb_next_rb_node - finds next node in binary tree
318 *
319 * When we are past last key we return NULL.
320 * Average complexity is 2 steps per call.
321 */
3696f625 322static inline void htb_next_rb_node(struct rb_node **n)
1da177e4
LT
323{
324 *n = rb_next(*n);
325}
326
327/**
328 * htb_add_class_to_row - add class to its row
329 *
330 * The class is added to row at priorities marked in mask.
331 * It does nothing if mask == 0.
332 */
87990467
SH
333static inline void htb_add_class_to_row(struct htb_sched *q,
334 struct htb_class *cl, int mask)
1da177e4 335{
1da177e4
LT
336 q->row_mask[cl->level] |= mask;
337 while (mask) {
338 int prio = ffz(~mask);
339 mask &= ~(1 << prio);
c9364636 340 htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
1da177e4
LT
341 }
342}
343
3696f625
SH
344/* If this triggers, it is a bug in this code, but it need not be fatal */
345static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
346{
81771b3b 347 if (RB_EMPTY_NODE(rb)) {
3696f625
SH
348 WARN_ON(1);
349 } else {
350 rb_erase(rb, root);
351 RB_CLEAR_NODE(rb);
352 }
353}
354
355
1da177e4
LT
356/**
357 * htb_remove_class_from_row - removes class from its row
358 *
359 * The class is removed from row at priorities marked in mask.
360 * It does nothing if mask == 0.
361 */
87990467
SH
362static inline void htb_remove_class_from_row(struct htb_sched *q,
363 struct htb_class *cl, int mask)
1da177e4
LT
364{
365 int m = 0;
c9364636 366 struct htb_level *hlevel = &q->hlevel[cl->level];
3bf72957 367
1da177e4
LT
368 while (mask) {
369 int prio = ffz(~mask);
c9364636 370 struct htb_prio *hprio = &hlevel->hprio[prio];
3696f625 371
1da177e4 372 mask &= ~(1 << prio);
c9364636
ED
373 if (hprio->ptr == cl->node + prio)
374 htb_next_rb_node(&hprio->ptr);
3696f625 375
c9364636
ED
376 htb_safe_rb_erase(cl->node + prio, &hprio->row);
377 if (!hprio->row.rb_node)
1da177e4
LT
378 m |= 1 << prio;
379 }
1da177e4
LT
380 q->row_mask[cl->level] &= ~m;
381}
382
383/**
384 * htb_activate_prios - creates active classe's feed chain
385 *
386 * The class is connected to ancestors and/or appropriate rows
10297b99 387 * for priorities it is participating on. cl->cmode must be new
1da177e4
LT
388 * (activated) mode. It does nothing if cl->prio_activity == 0.
389 */
87990467 390static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
1da177e4
LT
391{
392 struct htb_class *p = cl->parent;
87990467 393 long m, mask = cl->prio_activity;
1da177e4
LT
394
395 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
396 m = mask;
397 while (m) {
1da177e4
LT
398 int prio = ffz(~m);
399 m &= ~(1 << prio);
87990467 400
c9364636 401 if (p->un.inner.clprio[prio].feed.rb_node)
1da177e4 402 /* parent already has its feed in use so that
cc7ec456
ED
403 * reset bit in mask as parent is already ok
404 */
1da177e4 405 mask &= ~(1 << prio);
87990467 406
c9364636 407 htb_add_to_id_tree(&p->un.inner.clprio[prio].feed, cl, prio);
1da177e4 408 }
1da177e4 409 p->prio_activity |= mask;
87990467
SH
410 cl = p;
411 p = cl->parent;
3bf72957 412
1da177e4
LT
413 }
414 if (cl->cmode == HTB_CAN_SEND && mask)
87990467 415 htb_add_class_to_row(q, cl, mask);
1da177e4
LT
416}
417
418/**
419 * htb_deactivate_prios - remove class from feed chain
420 *
10297b99 421 * cl->cmode must represent old mode (before deactivation). It does
1da177e4
LT
422 * nothing if cl->prio_activity == 0. Class is removed from all feed
423 * chains and rows.
424 */
425static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
426{
427 struct htb_class *p = cl->parent;
87990467 428 long m, mask = cl->prio_activity;
1da177e4
LT
429
430 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
431 m = mask;
432 mask = 0;
1da177e4
LT
433 while (m) {
434 int prio = ffz(~m);
435 m &= ~(1 << prio);
87990467 436
c9364636 437 if (p->un.inner.clprio[prio].ptr == cl->node + prio) {
1da177e4 438 /* we are removing child which is pointed to from
cc7ec456
ED
439 * parent feed - forget the pointer but remember
440 * classid
441 */
c9364636
ED
442 p->un.inner.clprio[prio].last_ptr_id = cl->common.classid;
443 p->un.inner.clprio[prio].ptr = NULL;
1da177e4 444 }
87990467 445
c9364636
ED
446 htb_safe_rb_erase(cl->node + prio,
447 &p->un.inner.clprio[prio].feed);
87990467 448
c9364636 449 if (!p->un.inner.clprio[prio].feed.rb_node)
1da177e4
LT
450 mask |= 1 << prio;
451 }
3bf72957 452
1da177e4 453 p->prio_activity &= ~mask;
87990467
SH
454 cl = p;
455 p = cl->parent;
3bf72957 456
1da177e4 457 }
87990467
SH
458 if (cl->cmode == HTB_CAN_SEND && mask)
459 htb_remove_class_from_row(q, cl, mask);
1da177e4
LT
460}
461
56b765b7 462static inline s64 htb_lowater(const struct htb_class *cl)
18a63e86 463{
47083fc0
JDB
464 if (htb_hysteresis)
465 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
466 else
467 return 0;
18a63e86 468}
56b765b7 469static inline s64 htb_hiwater(const struct htb_class *cl)
18a63e86 470{
47083fc0
JDB
471 if (htb_hysteresis)
472 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
473 else
474 return 0;
18a63e86 475}
47083fc0 476
18a63e86 477
1da177e4
LT
478/**
479 * htb_class_mode - computes and returns current class mode
480 *
481 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
482 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
10297b99 483 * from now to time when cl will change its state.
1da177e4 484 * Also it is worth to note that class mode doesn't change simply
10297b99 485 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
1da177e4
LT
486 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
487 * mode transitions per time unit. The speed gain is about 1/6.
488 */
87990467 489static inline enum htb_cmode
56b765b7 490htb_class_mode(struct htb_class *cl, s64 *diff)
1da177e4 491{
56b765b7 492 s64 toks;
1da177e4 493
87990467
SH
494 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
495 *diff = -toks;
496 return HTB_CANT_SEND;
497 }
18a63e86 498
87990467
SH
499 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
500 return HTB_CAN_SEND;
1da177e4 501
87990467
SH
502 *diff = -toks;
503 return HTB_MAY_BORROW;
1da177e4
LT
504}
505
506/**
507 * htb_change_class_mode - changes classe's mode
508 *
509 * This should be the only way how to change classe's mode under normal
510 * cirsumstances. Routine will update feed lists linkage, change mode
511 * and add class to the wait event queue if appropriate. New mode should
512 * be different from old one and cl->pq_key has to be valid if changing
513 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
514 */
87990467 515static void
56b765b7 516htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
87990467
SH
517{
518 enum htb_cmode new_mode = htb_class_mode(cl, diff);
1da177e4
LT
519
520 if (new_mode == cl->cmode)
87990467
SH
521 return;
522
523 if (cl->prio_activity) { /* not necessary: speed optimization */
524 if (cl->cmode != HTB_CANT_SEND)
525 htb_deactivate_prios(q, cl);
1da177e4 526 cl->cmode = new_mode;
87990467
SH
527 if (new_mode != HTB_CANT_SEND)
528 htb_activate_prios(q, cl);
529 } else
1da177e4
LT
530 cl->cmode = new_mode;
531}
532
533/**
10297b99 534 * htb_activate - inserts leaf cl into appropriate active feeds
1da177e4
LT
535 *
536 * Routine learns (new) priority of leaf and activates feed chain
537 * for the prio. It can be called on already active leaf safely.
538 * It also adds leaf into droplist.
539 */
87990467 540static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
1da177e4 541{
547b792c 542 WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
3bf72957 543
1da177e4 544 if (!cl->prio_activity) {
c19f7a34 545 cl->prio_activity = 1 << cl->prio;
87990467
SH
546 htb_activate_prios(q, cl);
547 list_add_tail(&cl->un.leaf.drop_list,
c19f7a34 548 q->drops + cl->prio);
1da177e4
LT
549 }
550}
551
552/**
10297b99 553 * htb_deactivate - remove leaf cl from active feeds
1da177e4
LT
554 *
555 * Make sure that leaf is active. In the other words it can't be called
556 * with non-active leaf. It also removes class from the drop list.
557 */
87990467 558static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
1da177e4 559{
547b792c 560 WARN_ON(!cl->prio_activity);
3bf72957 561
87990467 562 htb_deactivate_prios(q, cl);
1da177e4
LT
563 cl->prio_activity = 0;
564 list_del_init(&cl->un.leaf.drop_list);
565}
566
567static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
568{
f30ab418 569 int uninitialized_var(ret);
87990467
SH
570 struct htb_sched *q = qdisc_priv(sch);
571 struct htb_class *cl = htb_classify(skb, sch, &ret);
572
573 if (cl == HTB_DIRECT) {
574 /* enqueue to helper queue */
575 if (q->direct_queue.qlen < q->direct_qlen) {
576 __skb_queue_tail(&q->direct_queue, skb);
577 q->direct_pkts++;
578 } else {
17045755 579 return qdisc_drop(skb, sch);
87990467 580 }
1da177e4 581#ifdef CONFIG_NET_CLS_ACT
87990467 582 } else if (!cl) {
c27f339a 583 if (ret & __NET_XMIT_BYPASS)
87990467
SH
584 sch->qstats.drops++;
585 kfree_skb(skb);
586 return ret;
1da177e4 587#endif
378a2f09
JP
588 } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
589 if (net_xmit_drop_count(ret)) {
590 sch->qstats.drops++;
591 cl->qstats.drops++;
592 }
69747650 593 return ret;
87990467 594 } else {
87990467
SH
595 htb_activate(q, cl);
596 }
597
598 sch->q.qlen++;
87990467 599 return NET_XMIT_SUCCESS;
1da177e4
LT
600}
601
56b765b7 602static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
59e4220a 603{
56b765b7 604 s64 toks = diff + cl->tokens;
59e4220a
JP
605
606 if (toks > cl->buffer)
607 toks = cl->buffer;
292f1c7f 608 toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
59e4220a
JP
609 if (toks <= -cl->mbuffer)
610 toks = 1 - cl->mbuffer;
611
612 cl->tokens = toks;
613}
614
56b765b7 615static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
59e4220a 616{
56b765b7 617 s64 toks = diff + cl->ctokens;
59e4220a
JP
618
619 if (toks > cl->cbuffer)
620 toks = cl->cbuffer;
292f1c7f 621 toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
59e4220a
JP
622 if (toks <= -cl->mbuffer)
623 toks = 1 - cl->mbuffer;
624
625 cl->ctokens = toks;
626}
627
1da177e4
LT
628/**
629 * htb_charge_class - charges amount "bytes" to leaf and ancestors
630 *
631 * Routine assumes that packet "bytes" long was dequeued from leaf cl
632 * borrowing from "level". It accounts bytes to ceil leaky bucket for
633 * leaf and all ancestors and to rate bucket for ancestors at levels
634 * "level" and higher. It also handles possible change of mode resulting
635 * from the update. Note that mode can also increase here (MAY_BORROW to
636 * CAN_SEND) because we can use more precise clock that event queue here.
637 * In such case we remove class from event queue first.
638 */
87990467 639static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
c9726d68 640 int level, struct sk_buff *skb)
87990467 641{
0abf77e5 642 int bytes = qdisc_pkt_len(skb);
1da177e4 643 enum htb_cmode old_mode;
56b765b7 644 s64 diff;
1da177e4
LT
645
646 while (cl) {
56b765b7 647 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
1da177e4 648 if (cl->level >= level) {
87990467
SH
649 if (cl->level == level)
650 cl->xstats.lends++;
59e4220a 651 htb_accnt_tokens(cl, bytes, diff);
1da177e4
LT
652 } else {
653 cl->xstats.borrows++;
87990467 654 cl->tokens += diff; /* we moved t_c; update tokens */
1da177e4 655 }
59e4220a 656 htb_accnt_ctokens(cl, bytes, diff);
1da177e4 657 cl->t_c = q->now;
1da177e4 658
87990467
SH
659 old_mode = cl->cmode;
660 diff = 0;
661 htb_change_class_mode(q, cl, &diff);
1da177e4
LT
662 if (old_mode != cl->cmode) {
663 if (old_mode != HTB_CAN_SEND)
c9364636 664 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
1da177e4 665 if (cl->cmode != HTB_CAN_SEND)
87990467 666 htb_add_to_wait_tree(q, cl, diff);
1da177e4 667 }
1da177e4 668
bfe0d029
ED
669 /* update basic stats except for leaves which are already updated */
670 if (cl->level)
671 bstats_update(&cl->bstats, skb);
672
1da177e4
LT
673 cl = cl->parent;
674 }
675}
676
677/**
678 * htb_do_events - make mode changes to classes at the level
679 *
fb983d45 680 * Scans event queue for pending events and applies them. Returns time of
1224736d 681 * next pending event (0 for no event in pq, q->now for too many events).
fb983d45 682 * Note: Applied are events whose have cl->pq_key <= q->now.
1da177e4 683 */
c9364636 684static s64 htb_do_events(struct htb_sched *q, const int level,
5343a7f8 685 unsigned long start)
1da177e4 686{
8f3ea33a 687 /* don't run for longer than 2 jiffies; 2 is used instead of
cc7ec456
ED
688 * 1 to simplify things when jiffy is going to be incremented
689 * too soon
690 */
a73be040 691 unsigned long stop_at = start + 2;
c9364636
ED
692 struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
693
8f3ea33a 694 while (time_before(jiffies, stop_at)) {
1da177e4 695 struct htb_class *cl;
56b765b7 696 s64 diff;
c9364636 697 struct rb_node *p = rb_first(wait_pq);
30bdbe39 698
87990467
SH
699 if (!p)
700 return 0;
1da177e4
LT
701
702 cl = rb_entry(p, struct htb_class, pq_node);
fb983d45
PM
703 if (cl->pq_key > q->now)
704 return cl->pq_key;
705
c9364636 706 htb_safe_rb_erase(p, wait_pq);
56b765b7 707 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
87990467 708 htb_change_class_mode(q, cl, &diff);
1da177e4 709 if (cl->cmode != HTB_CAN_SEND)
87990467 710 htb_add_to_wait_tree(q, cl, diff);
1da177e4 711 }
1224736d
JP
712
713 /* too much load - let's continue after a break for scheduling */
e82181de 714 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
cc7ec456 715 pr_warning("htb: too many events!\n");
e82181de
JP
716 q->warned |= HTB_WARN_TOOMANYEVENTS;
717 }
1224736d
JP
718
719 return q->now;
1da177e4
LT
720}
721
722/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
cc7ec456
ED
723 * is no such one exists.
724 */
87990467
SH
725static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
726 u32 id)
1da177e4
LT
727{
728 struct rb_node *r = NULL;
729 while (n) {
87990467
SH
730 struct htb_class *cl =
731 rb_entry(n, struct htb_class, node[prio]);
87990467 732
f4c1f3e0 733 if (id > cl->common.classid) {
1da177e4 734 n = n->rb_right;
1b5c0077 735 } else if (id < cl->common.classid) {
1da177e4
LT
736 r = n;
737 n = n->rb_left;
1b5c0077
JP
738 } else {
739 return n;
1da177e4
LT
740 }
741 }
742 return r;
743}
744
745/**
746 * htb_lookup_leaf - returns next leaf class in DRR order
747 *
748 * Find leaf where current feed pointers points to.
749 */
c9364636 750static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
1da177e4
LT
751{
752 int i;
753 struct {
754 struct rb_node *root;
755 struct rb_node **pptr;
756 u32 *pid;
87990467
SH
757 } stk[TC_HTB_MAXDEPTH], *sp = stk;
758
c9364636
ED
759 BUG_ON(!hprio->row.rb_node);
760 sp->root = hprio->row.rb_node;
761 sp->pptr = &hprio->ptr;
762 sp->pid = &hprio->last_ptr_id;
1da177e4
LT
763
764 for (i = 0; i < 65535; i++) {
87990467 765 if (!*sp->pptr && *sp->pid) {
10297b99 766 /* ptr was invalidated but id is valid - try to recover
cc7ec456
ED
767 * the original or next ptr
768 */
87990467
SH
769 *sp->pptr =
770 htb_id_find_next_upper(prio, sp->root, *sp->pid);
1da177e4 771 }
87990467 772 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
cc7ec456
ED
773 * can become out of date quickly
774 */
87990467 775 if (!*sp->pptr) { /* we are at right end; rewind & go up */
1da177e4 776 *sp->pptr = sp->root;
87990467 777 while ((*sp->pptr)->rb_left)
1da177e4
LT
778 *sp->pptr = (*sp->pptr)->rb_left;
779 if (sp > stk) {
780 sp--;
512bb43e
JP
781 if (!*sp->pptr) {
782 WARN_ON(1);
87990467 783 return NULL;
512bb43e 784 }
87990467 785 htb_next_rb_node(sp->pptr);
1da177e4
LT
786 }
787 } else {
788 struct htb_class *cl;
c9364636
ED
789 struct htb_prio *clp;
790
87990467
SH
791 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
792 if (!cl->level)
1da177e4 793 return cl;
c9364636
ED
794 clp = &cl->un.inner.clprio[prio];
795 (++sp)->root = clp->feed.rb_node;
796 sp->pptr = &clp->ptr;
797 sp->pid = &clp->last_ptr_id;
1da177e4
LT
798 }
799 }
547b792c 800 WARN_ON(1);
1da177e4
LT
801 return NULL;
802}
803
804/* dequeues packet at given priority and level; call only if
cc7ec456
ED
805 * you are sure that there is active class at prio/level
806 */
c9364636
ED
807static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
808 const int level)
1da177e4
LT
809{
810 struct sk_buff *skb = NULL;
87990467 811 struct htb_class *cl, *start;
c9364636
ED
812 struct htb_level *hlevel = &q->hlevel[level];
813 struct htb_prio *hprio = &hlevel->hprio[prio];
814
1da177e4 815 /* look initial class up in the row */
c9364636 816 start = cl = htb_lookup_leaf(hprio, prio);
87990467 817
1da177e4
LT
818 do {
819next:
512bb43e 820 if (unlikely(!cl))
87990467 821 return NULL;
1da177e4
LT
822
823 /* class can be empty - it is unlikely but can be true if leaf
cc7ec456
ED
824 * qdisc drops packets in enqueue routine or if someone used
825 * graft operation on the leaf since last dequeue;
826 * simply deactivate and skip such class
827 */
1da177e4
LT
828 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
829 struct htb_class *next;
87990467 830 htb_deactivate(q, cl);
1da177e4
LT
831
832 /* row/level might become empty */
833 if ((q->row_mask[level] & (1 << prio)) == 0)
87990467 834 return NULL;
1da177e4 835
c9364636 836 next = htb_lookup_leaf(hprio, prio);
87990467
SH
837
838 if (cl == start) /* fix start if we just deleted it */
1da177e4
LT
839 start = next;
840 cl = next;
841 goto next;
842 }
87990467
SH
843
844 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
845 if (likely(skb != NULL))
1da177e4 846 break;
633fe66e 847
b00355db 848 qdisc_warn_nonwc("htb", cl->un.leaf.q);
c9364636
ED
849 htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr:
850 &q->hlevel[0].hprio[prio].ptr);
851 cl = htb_lookup_leaf(hprio, prio);
1da177e4
LT
852
853 } while (cl != start);
854
855 if (likely(skb != NULL)) {
196d97f6 856 bstats_update(&cl->bstats, skb);
0abf77e5
JK
857 cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
858 if (cl->un.leaf.deficit[level] < 0) {
c19f7a34 859 cl->un.leaf.deficit[level] += cl->quantum;
c9364636
ED
860 htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr :
861 &q->hlevel[0].hprio[prio].ptr);
1da177e4
LT
862 }
863 /* this used to be after charge_class but this constelation
cc7ec456
ED
864 * gives us slightly better performance
865 */
1da177e4 866 if (!cl->un.leaf.q->q.qlen)
87990467 867 htb_deactivate(q, cl);
c9726d68 868 htb_charge_class(q, cl, level, skb);
1da177e4
LT
869 }
870 return skb;
871}
872
1da177e4
LT
873static struct sk_buff *htb_dequeue(struct Qdisc *sch)
874{
9190b3b3 875 struct sk_buff *skb;
1da177e4
LT
876 struct htb_sched *q = qdisc_priv(sch);
877 int level;
5343a7f8 878 s64 next_event;
a73be040 879 unsigned long start_at;
1da177e4
LT
880
881 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
87990467
SH
882 skb = __skb_dequeue(&q->direct_queue);
883 if (skb != NULL) {
9190b3b3
ED
884ok:
885 qdisc_bstats_update(sch, skb);
fd245a4a 886 qdisc_unthrottled(sch);
1da177e4
LT
887 sch->q.qlen--;
888 return skb;
889 }
890
87990467
SH
891 if (!sch->q.qlen)
892 goto fin;
56b765b7 893 q->now = ktime_to_ns(ktime_get());
a73be040 894 start_at = jiffies;
1da177e4 895
d2fe85da 896 next_event = q->now + 5LLU * NSEC_PER_SEC;
633fe66e 897
1da177e4
LT
898 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
899 /* common case optimization - skip event handler quickly */
900 int m;
c9364636 901 s64 event = q->near_ev_cache[level];
fb983d45 902
c9364636 903 if (q->now >= event) {
a73be040 904 event = htb_do_events(q, level, start_at);
2e4b3b0e 905 if (!event)
56b765b7 906 event = q->now + NSEC_PER_SEC;
2e4b3b0e 907 q->near_ev_cache[level] = event;
c9364636 908 }
fb983d45 909
c0851347 910 if (next_event > event)
fb983d45 911 next_event = event;
87990467 912
1da177e4
LT
913 m = ~q->row_mask[level];
914 while (m != (int)(-1)) {
87990467 915 int prio = ffz(m);
cc7ec456 916
1da177e4 917 m |= 1 << prio;
87990467 918 skb = htb_dequeue_tree(q, prio, level);
9190b3b3
ED
919 if (likely(skb != NULL))
920 goto ok;
1da177e4
LT
921 }
922 }
fb983d45 923 sch->qstats.overlimits++;
56b765b7
V
924 if (likely(next_event > q->now)) {
925 if (!test_bit(__QDISC_STATE_DEACTIVATED,
926 &qdisc_root_sleeping(q->watchdog.qdisc)->state)) {
927 ktime_t time = ns_to_ktime(next_event);
928 qdisc_throttled(q->watchdog.qdisc);
929 hrtimer_start(&q->watchdog.timer, time,
930 HRTIMER_MODE_ABS);
931 }
932 } else {
1224736d 933 schedule_work(&q->work);
56b765b7 934 }
1da177e4 935fin:
1da177e4
LT
936 return skb;
937}
938
939/* try to drop from each class (by prio) until one succeed */
87990467 940static unsigned int htb_drop(struct Qdisc *sch)
1da177e4
LT
941{
942 struct htb_sched *q = qdisc_priv(sch);
943 int prio;
944
945 for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
946 struct list_head *p;
87990467 947 list_for_each(p, q->drops + prio) {
1da177e4
LT
948 struct htb_class *cl = list_entry(p, struct htb_class,
949 un.leaf.drop_list);
950 unsigned int len;
87990467
SH
951 if (cl->un.leaf.q->ops->drop &&
952 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
1da177e4
LT
953 sch->q.qlen--;
954 if (!cl->un.leaf.q->q.qlen)
87990467 955 htb_deactivate(q, cl);
1da177e4
LT
956 return len;
957 }
958 }
959 }
960 return 0;
961}
962
963/* reset all classes */
964/* always caled under BH & queue lock */
87990467 965static void htb_reset(struct Qdisc *sch)
1da177e4
LT
966{
967 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 968 struct htb_class *cl;
f4c1f3e0 969 unsigned int i;
0cef296d 970
f4c1f3e0 971 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 972 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1da177e4 973 if (cl->level)
87990467 974 memset(&cl->un.inner, 0, sizeof(cl->un.inner));
1da177e4 975 else {
87990467 976 if (cl->un.leaf.q)
1da177e4
LT
977 qdisc_reset(cl->un.leaf.q);
978 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
979 }
980 cl->prio_activity = 0;
981 cl->cmode = HTB_CAN_SEND;
1da177e4
LT
982
983 }
984 }
fb983d45 985 qdisc_watchdog_cancel(&q->watchdog);
1da177e4
LT
986 __skb_queue_purge(&q->direct_queue);
987 sch->q.qlen = 0;
c9364636 988 memset(q->hlevel, 0, sizeof(q->hlevel));
87990467 989 memset(q->row_mask, 0, sizeof(q->row_mask));
1da177e4 990 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 991 INIT_LIST_HEAD(q->drops + i);
1da177e4
LT
992}
993
27a3421e
PM
994static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
995 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
996 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
997 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
998 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
6906f4ed 999 [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
27a3421e
PM
1000};
1001
1224736d
JP
1002static void htb_work_func(struct work_struct *work)
1003{
1004 struct htb_sched *q = container_of(work, struct htb_sched, work);
1005 struct Qdisc *sch = q->watchdog.qdisc;
1006
1007 __netif_schedule(qdisc_root(sch));
1008}
1009
1e90474c 1010static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
1011{
1012 struct htb_sched *q = qdisc_priv(sch);
6906f4ed 1013 struct nlattr *tb[TCA_HTB_MAX + 1];
1da177e4 1014 struct tc_htb_glob *gopt;
cee63723 1015 int err;
1da177e4 1016 int i;
cee63723
PM
1017
1018 if (!opt)
1019 return -EINVAL;
1020
6906f4ed 1021 err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
cee63723
PM
1022 if (err < 0)
1023 return err;
1024
6906f4ed 1025 if (!tb[TCA_HTB_INIT])
1da177e4 1026 return -EINVAL;
6906f4ed 1027
1e90474c 1028 gopt = nla_data(tb[TCA_HTB_INIT]);
6906f4ed 1029 if (gopt->version != HTB_VER >> 16)
1da177e4 1030 return -EINVAL;
1da177e4 1031
f4c1f3e0
PM
1032 err = qdisc_class_hash_init(&q->clhash);
1033 if (err < 0)
1034 return err;
1da177e4 1035 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 1036 INIT_LIST_HEAD(q->drops + i);
1da177e4 1037
fb983d45 1038 qdisc_watchdog_init(&q->watchdog, sch);
1224736d 1039 INIT_WORK(&q->work, htb_work_func);
1da177e4
LT
1040 skb_queue_head_init(&q->direct_queue);
1041
6906f4ed
ED
1042 if (tb[TCA_HTB_DIRECT_QLEN])
1043 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1044 else {
1045 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1046 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1047 q->direct_qlen = 2;
1048 }
1da177e4
LT
1049 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1050 q->rate2quantum = 1;
1051 q->defcls = gopt->defcls;
1052
1053 return 0;
1054}
1055
1056static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1057{
102396ae 1058 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1da177e4 1059 struct htb_sched *q = qdisc_priv(sch);
4b3550ef 1060 struct nlattr *nest;
1da177e4 1061 struct tc_htb_glob gopt;
4b3550ef 1062
7698b4fc 1063 spin_lock_bh(root_lock);
1da177e4 1064
4b3550ef 1065 gopt.direct_pkts = q->direct_pkts;
1da177e4
LT
1066 gopt.version = HTB_VER;
1067 gopt.rate2quantum = q->rate2quantum;
1068 gopt.defcls = q->defcls;
3bf72957 1069 gopt.debug = 0;
4b3550ef
PM
1070
1071 nest = nla_nest_start(skb, TCA_OPTIONS);
1072 if (nest == NULL)
1073 goto nla_put_failure;
6906f4ed
ED
1074 if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1075 nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1b34ec43 1076 goto nla_put_failure;
4b3550ef
PM
1077 nla_nest_end(skb, nest);
1078
7698b4fc 1079 spin_unlock_bh(root_lock);
1da177e4 1080 return skb->len;
4b3550ef 1081
1e90474c 1082nla_put_failure:
7698b4fc 1083 spin_unlock_bh(root_lock);
4b3550ef 1084 nla_nest_cancel(skb, nest);
1da177e4
LT
1085 return -1;
1086}
1087
1088static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
87990467 1089 struct sk_buff *skb, struct tcmsg *tcm)
1da177e4 1090{
87990467 1091 struct htb_class *cl = (struct htb_class *)arg;
102396ae 1092 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
4b3550ef 1093 struct nlattr *nest;
1da177e4
LT
1094 struct tc_htb_opt opt;
1095
7698b4fc 1096 spin_lock_bh(root_lock);
f4c1f3e0
PM
1097 tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1098 tcm->tcm_handle = cl->common.classid;
1da177e4
LT
1099 if (!cl->level && cl->un.leaf.q)
1100 tcm->tcm_info = cl->un.leaf.q->handle;
1101
4b3550ef
PM
1102 nest = nla_nest_start(skb, TCA_OPTIONS);
1103 if (nest == NULL)
1104 goto nla_put_failure;
1da177e4 1105
87990467 1106 memset(&opt, 0, sizeof(opt));
1da177e4 1107
01cb71d2 1108 psched_ratecfg_getrate(&opt.rate, &cl->rate);
9c10f411 1109 opt.buffer = PSCHED_NS2TICKS(cl->buffer);
01cb71d2 1110 psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
9c10f411 1111 opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
c19f7a34
JP
1112 opt.quantum = cl->quantum;
1113 opt.prio = cl->prio;
87990467 1114 opt.level = cl->level;
1b34ec43
DM
1115 if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1116 goto nla_put_failure;
4b3550ef
PM
1117
1118 nla_nest_end(skb, nest);
7698b4fc 1119 spin_unlock_bh(root_lock);
1da177e4 1120 return skb->len;
4b3550ef 1121
1e90474c 1122nla_put_failure:
7698b4fc 1123 spin_unlock_bh(root_lock);
4b3550ef 1124 nla_nest_cancel(skb, nest);
1da177e4
LT
1125 return -1;
1126}
1127
1128static int
87990467 1129htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1da177e4 1130{
87990467 1131 struct htb_class *cl = (struct htb_class *)arg;
1da177e4 1132
1da177e4
LT
1133 if (!cl->level && cl->un.leaf.q)
1134 cl->qstats.qlen = cl->un.leaf.q->q.qlen;
5343a7f8
ED
1135 cl->xstats.tokens = PSCHED_NS2TICKS(cl->tokens);
1136 cl->xstats.ctokens = PSCHED_NS2TICKS(cl->ctokens);
1da177e4
LT
1137
1138 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
d250a5f9 1139 gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
1da177e4
LT
1140 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1141 return -1;
1142
1143 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1144}
1145
1146static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
87990467 1147 struct Qdisc **old)
1da177e4 1148{
87990467 1149 struct htb_class *cl = (struct htb_class *)arg;
1da177e4 1150
5b9a9ccf
PM
1151 if (cl->level)
1152 return -EINVAL;
1153 if (new == NULL &&
3511c913 1154 (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
5b9a9ccf
PM
1155 cl->common.classid)) == NULL)
1156 return -ENOBUFS;
1157
1158 sch_tree_lock(sch);
1159 *old = cl->un.leaf.q;
1160 cl->un.leaf.q = new;
1161 if (*old != NULL) {
1162 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1163 qdisc_reset(*old);
1da177e4 1164 }
5b9a9ccf
PM
1165 sch_tree_unlock(sch);
1166 return 0;
1da177e4
LT
1167}
1168
87990467 1169static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1da177e4 1170{
87990467 1171 struct htb_class *cl = (struct htb_class *)arg;
5b9a9ccf 1172 return !cl->level ? cl->un.leaf.q : NULL;
1da177e4
LT
1173}
1174
256d61b8
PM
1175static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1176{
1177 struct htb_class *cl = (struct htb_class *)arg;
1178
1179 if (cl->un.leaf.q->q.qlen == 0)
1180 htb_deactivate(qdisc_priv(sch), cl);
1181}
1182
1da177e4
LT
1183static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1184{
87990467
SH
1185 struct htb_class *cl = htb_find(classid, sch);
1186 if (cl)
1da177e4
LT
1187 cl->refcnt++;
1188 return (unsigned long)cl;
1189}
1190
160d5e10
JP
1191static inline int htb_parent_last_child(struct htb_class *cl)
1192{
1193 if (!cl->parent)
1194 /* the root class */
1195 return 0;
42077599 1196 if (cl->parent->children > 1)
160d5e10
JP
1197 /* not the last child */
1198 return 0;
160d5e10
JP
1199 return 1;
1200}
1201
3ba08b00
JP
1202static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1203 struct Qdisc *new_q)
160d5e10
JP
1204{
1205 struct htb_class *parent = cl->parent;
1206
547b792c 1207 WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
160d5e10 1208
3ba08b00 1209 if (parent->cmode != HTB_CAN_SEND)
c9364636
ED
1210 htb_safe_rb_erase(&parent->pq_node,
1211 &q->hlevel[parent->level].wait_pq);
3ba08b00 1212
160d5e10
JP
1213 parent->level = 0;
1214 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1215 INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1216 parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
160d5e10
JP
1217 parent->tokens = parent->buffer;
1218 parent->ctokens = parent->cbuffer;
5343a7f8 1219 parent->t_c = ktime_to_ns(ktime_get());
160d5e10
JP
1220 parent->cmode = HTB_CAN_SEND;
1221}
1222
87990467 1223static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1da177e4 1224{
1da177e4 1225 if (!cl->level) {
547b792c 1226 WARN_ON(!cl->un.leaf.q);
1da177e4
LT
1227 qdisc_destroy(cl->un.leaf.q);
1228 }
ee39e10c 1229 gen_kill_estimator(&cl->bstats, &cl->rate_est);
ff31ab56 1230 tcf_destroy_chain(&cl->filter_list);
1da177e4
LT
1231 kfree(cl);
1232}
1233
87990467 1234static void htb_destroy(struct Qdisc *sch)
1da177e4
LT
1235{
1236 struct htb_sched *q = qdisc_priv(sch);
b67bfe0d 1237 struct hlist_node *next;
fbd8f137
PM
1238 struct htb_class *cl;
1239 unsigned int i;
1da177e4 1240
1224736d 1241 cancel_work_sync(&q->work);
fb983d45 1242 qdisc_watchdog_cancel(&q->watchdog);
1da177e4 1243 /* This line used to be after htb_destroy_class call below
cc7ec456
ED
1244 * and surprisingly it worked in 2.4. But it must precede it
1245 * because filter need its target class alive to be able to call
1246 * unbind_filter on it (without Oops).
1247 */
ff31ab56 1248 tcf_destroy_chain(&q->filter_list);
87990467 1249
f4c1f3e0 1250 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 1251 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode)
fbd8f137
PM
1252 tcf_destroy_chain(&cl->filter_list);
1253 }
f4c1f3e0 1254 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 1255 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
f4c1f3e0 1256 common.hnode)
fbd8f137
PM
1257 htb_destroy_class(sch, cl);
1258 }
f4c1f3e0 1259 qdisc_class_hash_destroy(&q->clhash);
1da177e4
LT
1260 __skb_queue_purge(&q->direct_queue);
1261}
1262
1263static int htb_delete(struct Qdisc *sch, unsigned long arg)
1264{
1265 struct htb_sched *q = qdisc_priv(sch);
87990467 1266 struct htb_class *cl = (struct htb_class *)arg;
256d61b8 1267 unsigned int qlen;
160d5e10
JP
1268 struct Qdisc *new_q = NULL;
1269 int last_child = 0;
1da177e4
LT
1270
1271 // TODO: why don't allow to delete subtree ? references ? does
1272 // tc subsys quarantee us that in htb_destroy it holds no class
1273 // refs so that we can remove children safely there ?
42077599 1274 if (cl->children || cl->filter_cnt)
1da177e4 1275 return -EBUSY;
87990467 1276
160d5e10 1277 if (!cl->level && htb_parent_last_child(cl)) {
3511c913 1278 new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
bb949fbd 1279 cl->parent->common.classid);
160d5e10
JP
1280 last_child = 1;
1281 }
1282
1da177e4 1283 sch_tree_lock(sch);
87990467 1284
814a175e 1285 if (!cl->level) {
256d61b8 1286 qlen = cl->un.leaf.q->q.qlen;
814a175e 1287 qdisc_reset(cl->un.leaf.q);
256d61b8 1288 qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
814a175e
PM
1289 }
1290
f4c1f3e0
PM
1291 /* delete from hash and active; remainder in destroy_class */
1292 qdisc_class_hash_remove(&q->clhash, &cl->common);
26b284de
JP
1293 if (cl->parent)
1294 cl->parent->children--;
c38c83cb 1295
1da177e4 1296 if (cl->prio_activity)
87990467 1297 htb_deactivate(q, cl);
1da177e4 1298
fbd8f137 1299 if (cl->cmode != HTB_CAN_SEND)
c9364636
ED
1300 htb_safe_rb_erase(&cl->pq_node,
1301 &q->hlevel[cl->level].wait_pq);
fbd8f137 1302
160d5e10 1303 if (last_child)
3ba08b00 1304 htb_parent_to_leaf(q, cl, new_q);
160d5e10 1305
7cd0a638
JP
1306 BUG_ON(--cl->refcnt == 0);
1307 /*
1308 * This shouldn't happen: we "hold" one cops->get() when called
1309 * from tc_ctl_tclass; the destroy method is done from cops->put().
1310 */
1da177e4
LT
1311
1312 sch_tree_unlock(sch);
1313 return 0;
1314}
1315
1316static void htb_put(struct Qdisc *sch, unsigned long arg)
1317{
87990467 1318 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1319
1320 if (--cl->refcnt == 0)
87990467 1321 htb_destroy_class(sch, cl);
1da177e4
LT
1322}
1323
87990467 1324static int htb_change_class(struct Qdisc *sch, u32 classid,
1e90474c 1325 u32 parentid, struct nlattr **tca,
87990467 1326 unsigned long *arg)
1da177e4
LT
1327{
1328 int err = -EINVAL;
1329 struct htb_sched *q = qdisc_priv(sch);
87990467 1330 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1e90474c 1331 struct nlattr *opt = tca[TCA_OPTIONS];
8a8e3d84 1332 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
6906f4ed 1333 struct nlattr *tb[TCA_HTB_MAX + 1];
1da177e4
LT
1334 struct tc_htb_opt *hopt;
1335
1336 /* extract all subattrs from opt attr */
cee63723
PM
1337 if (!opt)
1338 goto failure;
1339
e18434c4 1340 err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
cee63723
PM
1341 if (err < 0)
1342 goto failure;
1343
1344 err = -EINVAL;
27a3421e 1345 if (tb[TCA_HTB_PARMS] == NULL)
1da177e4 1346 goto failure;
1da177e4 1347
87990467
SH
1348 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1349
1e90474c 1350 hopt = nla_data(tb[TCA_HTB_PARMS]);
196d97f6 1351 if (!hopt->rate.rate || !hopt->ceil.rate)
87990467 1352 goto failure;
1da177e4 1353
8a8e3d84
JDB
1354 /* Keeping backward compatible with rate_table based iproute2 tc */
1355 if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) {
1356 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
1357 if (rtab)
1358 qdisc_put_rtab(rtab);
1359 }
1360 if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) {
1361 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
1362 if (ctab)
1363 qdisc_put_rtab(ctab);
1364 }
1365
87990467 1366 if (!cl) { /* new class */
1da177e4 1367 struct Qdisc *new_q;
3696f625 1368 int prio;
ee39e10c 1369 struct {
1e90474c 1370 struct nlattr nla;
ee39e10c
PM
1371 struct gnet_estimator opt;
1372 } est = {
1e90474c
PM
1373 .nla = {
1374 .nla_len = nla_attr_size(sizeof(est.opt)),
1375 .nla_type = TCA_RATE,
ee39e10c
PM
1376 },
1377 .opt = {
1378 /* 4s interval, 16s averaging constant */
1379 .interval = 2,
1380 .ewma_log = 2,
1381 },
1382 };
3696f625 1383
1da177e4 1384 /* check for valid classid */
f64f9e71
JP
1385 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1386 htb_find(classid, sch))
1da177e4
LT
1387 goto failure;
1388
1389 /* check maximal depth */
1390 if (parent && parent->parent && parent->parent->level < 2) {
cc7ec456 1391 pr_err("htb: tree is too deep\n");
1da177e4
LT
1392 goto failure;
1393 }
1394 err = -ENOBUFS;
cc7ec456
ED
1395 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1396 if (!cl)
1da177e4 1397 goto failure;
87990467 1398
64153ce0
ED
1399 if (htb_rate_est || tca[TCA_RATE]) {
1400 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1401 qdisc_root_sleeping_lock(sch),
1402 tca[TCA_RATE] ? : &est.nla);
1403 if (err) {
1404 kfree(cl);
1405 goto failure;
1406 }
71bcb09a
SH
1407 }
1408
1da177e4 1409 cl->refcnt = 1;
42077599 1410 cl->children = 0;
1da177e4 1411 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
3696f625
SH
1412 RB_CLEAR_NODE(&cl->pq_node);
1413
1414 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1415 RB_CLEAR_NODE(&cl->node[prio]);
1da177e4
LT
1416
1417 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
cc7ec456
ED
1418 * so that can't be used inside of sch_tree_lock
1419 * -- thanks to Karlis Peisenieks
1420 */
3511c913 1421 new_q = qdisc_create_dflt(sch->dev_queue,
bb949fbd 1422 &pfifo_qdisc_ops, classid);
1da177e4
LT
1423 sch_tree_lock(sch);
1424 if (parent && !parent->level) {
256d61b8
PM
1425 unsigned int qlen = parent->un.leaf.q->q.qlen;
1426
1da177e4 1427 /* turn parent into inner node */
256d61b8
PM
1428 qdisc_reset(parent->un.leaf.q);
1429 qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
87990467
SH
1430 qdisc_destroy(parent->un.leaf.q);
1431 if (parent->prio_activity)
1432 htb_deactivate(q, parent);
1da177e4
LT
1433
1434 /* remove from evt list because of level change */
1435 if (parent->cmode != HTB_CAN_SEND) {
c9364636 1436 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1da177e4
LT
1437 parent->cmode = HTB_CAN_SEND;
1438 }
1439 parent->level = (parent->parent ? parent->parent->level
87990467
SH
1440 : TC_HTB_MAXDEPTH) - 1;
1441 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1da177e4
LT
1442 }
1443 /* leaf (we) needs elementary qdisc */
1444 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1445
f4c1f3e0 1446 cl->common.classid = classid;
87990467 1447 cl->parent = parent;
1da177e4
LT
1448
1449 /* set class to be in HTB_CAN_SEND state */
b9a7afde
JP
1450 cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1451 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
5343a7f8
ED
1452 cl->mbuffer = 60ULL * NSEC_PER_SEC; /* 1min */
1453 cl->t_c = ktime_to_ns(ktime_get());
1da177e4
LT
1454 cl->cmode = HTB_CAN_SEND;
1455
1456 /* attach to the hash list and parent's family */
f4c1f3e0 1457 qdisc_class_hash_insert(&q->clhash, &cl->common);
42077599
PM
1458 if (parent)
1459 parent->children++;
ee39e10c 1460 } else {
71bcb09a
SH
1461 if (tca[TCA_RATE]) {
1462 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1463 qdisc_root_sleeping_lock(sch),
1464 tca[TCA_RATE]);
1465 if (err)
1466 return err;
1467 }
87990467 1468 sch_tree_lock(sch);
ee39e10c 1469 }
1da177e4
LT
1470
1471 /* it used to be a nasty bug here, we have to check that node
cc7ec456
ED
1472 * is really leaf before changing cl->un.leaf !
1473 */
1da177e4 1474 if (!cl->level) {
196d97f6 1475 cl->quantum = hopt->rate.rate / q->rate2quantum;
c19f7a34 1476 if (!hopt->quantum && cl->quantum < 1000) {
cc7ec456 1477 pr_warning(
87990467 1478 "HTB: quantum of class %X is small. Consider r2q change.\n",
f4c1f3e0 1479 cl->common.classid);
c19f7a34 1480 cl->quantum = 1000;
1da177e4 1481 }
c19f7a34 1482 if (!hopt->quantum && cl->quantum > 200000) {
cc7ec456 1483 pr_warning(
87990467 1484 "HTB: quantum of class %X is big. Consider r2q change.\n",
f4c1f3e0 1485 cl->common.classid);
c19f7a34 1486 cl->quantum = 200000;
1da177e4
LT
1487 }
1488 if (hopt->quantum)
c19f7a34
JP
1489 cl->quantum = hopt->quantum;
1490 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1491 cl->prio = TC_HTB_NUMPRIO - 1;
1da177e4
LT
1492 }
1493
01cb71d2
ED
1494 psched_ratecfg_precompute(&cl->rate, &hopt->rate);
1495 psched_ratecfg_precompute(&cl->ceil, &hopt->ceil);
56b765b7 1496
324f5aa5 1497 cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
f3ad857e 1498 cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
56b765b7 1499
1da177e4
LT
1500 sch_tree_unlock(sch);
1501
f4c1f3e0
PM
1502 qdisc_class_hash_grow(sch, &q->clhash);
1503
1da177e4
LT
1504 *arg = (unsigned long)cl;
1505 return 0;
1506
1507failure:
1da177e4
LT
1508 return err;
1509}
1510
1511static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1512{
1513 struct htb_sched *q = qdisc_priv(sch);
1514 struct htb_class *cl = (struct htb_class *)arg;
1515 struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
3bf72957 1516
1da177e4
LT
1517 return fl;
1518}
1519
1520static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
87990467 1521 u32 classid)
1da177e4 1522{
87990467 1523 struct htb_class *cl = htb_find(classid, sch);
3bf72957 1524
1da177e4 1525 /*if (cl && !cl->level) return 0;
cc7ec456
ED
1526 * The line above used to be there to prevent attaching filters to
1527 * leaves. But at least tc_index filter uses this just to get class
1528 * for other reasons so that we have to allow for it.
1529 * ----
1530 * 19.6.2002 As Werner explained it is ok - bind filter is just
1531 * another way to "lock" the class - unlike "get" this lock can
1532 * be broken by class during destroy IIUC.
1da177e4 1533 */
87990467
SH
1534 if (cl)
1535 cl->filter_cnt++;
1da177e4
LT
1536 return (unsigned long)cl;
1537}
1538
1539static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1540{
1da177e4 1541 struct htb_class *cl = (struct htb_class *)arg;
3bf72957 1542
87990467
SH
1543 if (cl)
1544 cl->filter_cnt--;
1da177e4
LT
1545}
1546
1547static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1548{
1549 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 1550 struct htb_class *cl;
f4c1f3e0 1551 unsigned int i;
1da177e4
LT
1552
1553 if (arg->stop)
1554 return;
1555
f4c1f3e0 1556 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 1557 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1da177e4
LT
1558 if (arg->count < arg->skip) {
1559 arg->count++;
1560 continue;
1561 }
1562 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1563 arg->stop = 1;
1564 return;
1565 }
1566 arg->count++;
1567 }
1568 }
1569}
1570
20fea08b 1571static const struct Qdisc_class_ops htb_class_ops = {
1da177e4
LT
1572 .graft = htb_graft,
1573 .leaf = htb_leaf,
256d61b8 1574 .qlen_notify = htb_qlen_notify,
1da177e4
LT
1575 .get = htb_get,
1576 .put = htb_put,
1577 .change = htb_change_class,
1578 .delete = htb_delete,
1579 .walk = htb_walk,
1580 .tcf_chain = htb_find_tcf,
1581 .bind_tcf = htb_bind_filter,
1582 .unbind_tcf = htb_unbind_filter,
1583 .dump = htb_dump_class,
1584 .dump_stats = htb_dump_class_stats,
1585};
1586
20fea08b 1587static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1da177e4
LT
1588 .cl_ops = &htb_class_ops,
1589 .id = "htb",
1590 .priv_size = sizeof(struct htb_sched),
1591 .enqueue = htb_enqueue,
1592 .dequeue = htb_dequeue,
77be155c 1593 .peek = qdisc_peek_dequeued,
1da177e4
LT
1594 .drop = htb_drop,
1595 .init = htb_init,
1596 .reset = htb_reset,
1597 .destroy = htb_destroy,
1da177e4
LT
1598 .dump = htb_dump,
1599 .owner = THIS_MODULE,
1600};
1601
1602static int __init htb_module_init(void)
1603{
87990467 1604 return register_qdisc(&htb_qdisc_ops);
1da177e4 1605}
87990467 1606static void __exit htb_module_exit(void)
1da177e4 1607{
87990467 1608 unregister_qdisc(&htb_qdisc_ops);
1da177e4 1609}
87990467 1610
1da177e4
LT
1611module_init(htb_module_init)
1612module_exit(htb_module_exit)
1613MODULE_LICENSE("GPL");