]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/sched/sch_htb.c
net_sched: fq_codel: cache skb->truesize into skb->cb
[mirror_ubuntu-artful-kernel.git] / net / sched / sch_htb.c
CommitLineData
87990467 1/*
1da177e4
LT
2 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Martin Devera, <devik@cdi.cz>
10 *
11 * Credits (in time order) for older HTB versions:
12 * Stef Coene <stef.coene@docum.org>
13 * HTB support at LARTC mailing list
10297b99 14 * Ondrej Kraus, <krauso@barr.cz>
1da177e4
LT
15 * found missing INIT_QDISC(htb)
16 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
17 * helped a lot to locate nasty class stall bug
18 * Andi Kleen, Jamal Hadi, Bert Hubert
19 * code review and helpful comments on shaping
20 * Tomasz Wrona, <tw@eter.tym.pl>
21 * created test case so that I was able to fix nasty bug
22 * Wilfried Weissmann
23 * spotted bug in dequeue code and helped with fix
24 * Jiri Fojtasek
25 * fixed requeue routine
26 * and many others. thanks.
1da177e4 27 */
1da177e4 28#include <linux/module.h>
47083fc0 29#include <linux/moduleparam.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/kernel.h>
1da177e4 32#include <linux/string.h>
1da177e4 33#include <linux/errno.h>
1da177e4
LT
34#include <linux/skbuff.h>
35#include <linux/list.h>
36#include <linux/compiler.h>
0ba48053 37#include <linux/rbtree.h>
1224736d 38#include <linux/workqueue.h>
5a0e3ad6 39#include <linux/slab.h>
dc5fc579 40#include <net/netlink.h>
292f1c7f 41#include <net/sch_generic.h>
1da177e4 42#include <net/pkt_sched.h>
1da177e4
LT
43
44/* HTB algorithm.
45 Author: devik@cdi.cz
46 ========================================================================
47 HTB is like TBF with multiple classes. It is also similar to CBQ because
10297b99 48 it allows to assign priority to each class in hierarchy.
1da177e4
LT
49 In fact it is another implementation of Floyd's formal sharing.
50
51 Levels:
10297b99 52 Each class is assigned level. Leaf has ALWAYS level 0 and root
1da177e4
LT
53 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
54 one less than their parent.
55*/
56
47083fc0 57static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
87990467 58#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
1da177e4
LT
59
60#if HTB_VER >> 16 != TC_HTB_PROTOVER
61#error "Mismatched sch_htb.c and pkt_sch.h"
62#endif
63
47083fc0
JDB
64/* Module parameter and sysfs export */
65module_param (htb_hysteresis, int, 0640);
66MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
67
64153ce0
ED
68static int htb_rate_est = 0; /* htb classes have a default rate estimator */
69module_param(htb_rate_est, int, 0640);
70MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
71
1da177e4
LT
72/* used internaly to keep status of single class */
73enum htb_cmode {
87990467
SH
74 HTB_CANT_SEND, /* class can't send and can't borrow */
75 HTB_MAY_BORROW, /* class can't send but may borrow */
76 HTB_CAN_SEND /* class can send */
1da177e4
LT
77};
78
c9364636
ED
79struct htb_prio {
80 union {
81 struct rb_root row;
82 struct rb_root feed;
83 };
84 struct rb_node *ptr;
85 /* When class changes from state 1->2 and disconnects from
86 * parent's feed then we lost ptr value and start from the
87 * first child again. Here we store classid of the
88 * last valid ptr (used when ptr is NULL).
89 */
90 u32 last_ptr_id;
91};
92
ca4ec90b
ED
93/* interior & leaf nodes; props specific to leaves are marked L:
94 * To reduce false sharing, place mostly read fields at beginning,
95 * and mostly written ones at the end.
96 */
87990467 97struct htb_class {
f4c1f3e0 98 struct Qdisc_class_common common;
ca4ec90b
ED
99 struct psched_ratecfg rate;
100 struct psched_ratecfg ceil;
101 s64 buffer, cbuffer;/* token bucket depth/rate */
102 s64 mbuffer; /* max wait time */
cbd37556 103 u32 prio; /* these two are used only by leaves... */
ca4ec90b
ED
104 int quantum; /* but stored for parent-to-leaf return */
105
25d8c0d5 106 struct tcf_proto __rcu *filter_list; /* class attached filters */
ca4ec90b
ED
107 int filter_cnt;
108 int refcnt; /* usage count of this class */
109
110 int level; /* our level (see above) */
111 unsigned int children;
112 struct htb_class *parent; /* parent class */
113
45203a3b 114 struct gnet_stats_rate_est64 rate_est;
1da177e4 115
ca4ec90b
ED
116 /*
117 * Written often fields
118 */
119 struct gnet_stats_basic_packed bstats;
120 struct gnet_stats_queue qstats;
121 struct tc_htb_xstats xstats; /* our special stats */
87990467 122
ca4ec90b
ED
123 /* token bucket parameters */
124 s64 tokens, ctokens;/* current number of tokens */
125 s64 t_c; /* checkpoint time */
c19f7a34 126
87990467
SH
127 union {
128 struct htb_class_leaf {
87990467 129 struct list_head drop_list;
c9364636
ED
130 int deficit[TC_HTB_MAXDEPTH];
131 struct Qdisc *q;
87990467
SH
132 } leaf;
133 struct htb_class_inner {
c9364636 134 struct htb_prio clprio[TC_HTB_NUMPRIO];
87990467
SH
135 } inner;
136 } un;
ca4ec90b 137 s64 pq_key;
87990467 138
ca4ec90b
ED
139 int prio_activity; /* for which prios are we active */
140 enum htb_cmode cmode; /* current mode of the class */
141 struct rb_node pq_node; /* node for event queue */
142 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
1da177e4
LT
143};
144
c9364636
ED
145struct htb_level {
146 struct rb_root wait_pq;
147 struct htb_prio hprio[TC_HTB_NUMPRIO];
148};
149
87990467 150struct htb_sched {
f4c1f3e0 151 struct Qdisc_class_hash clhash;
c9364636
ED
152 int defcls; /* class where unclassified flows go to */
153 int rate2quantum; /* quant = rate / rate2quantum */
1da177e4 154
c9364636 155 /* filters for qdisc itself */
25d8c0d5 156 struct tcf_proto __rcu *filter_list;
1da177e4 157
c9364636
ED
158#define HTB_WARN_TOOMANYEVENTS 0x1
159 unsigned int warned; /* only one warning */
160 int direct_qlen;
161 struct work_struct work;
1da177e4 162
c9364636
ED
163 /* non shaped skbs; let them go directly thru */
164 struct sk_buff_head direct_queue;
165 long direct_pkts;
1da177e4 166
c9364636 167 struct qdisc_watchdog watchdog;
1da177e4 168
c9364636
ED
169 s64 now; /* cached dequeue time */
170 struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
1da177e4 171
c9364636
ED
172 /* time of nearest event per level (row) */
173 s64 near_ev_cache[TC_HTB_MAXDEPTH];
87990467 174
c9364636 175 int row_mask[TC_HTB_MAXDEPTH];
e82181de 176
c9364636 177 struct htb_level hlevel[TC_HTB_MAXDEPTH];
1da177e4
LT
178};
179
1da177e4 180/* find class in global hash table using given handle */
87990467 181static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
1da177e4
LT
182{
183 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 184 struct Qdisc_class_common *clc;
0cef296d 185
f4c1f3e0
PM
186 clc = qdisc_class_find(&q->clhash, handle);
187 if (clc == NULL)
1da177e4 188 return NULL;
f4c1f3e0 189 return container_of(clc, struct htb_class, common);
1da177e4
LT
190}
191
192/**
193 * htb_classify - classify a packet into class
194 *
195 * It returns NULL if the packet should be dropped or -1 if the packet
196 * should be passed directly thru. In all other cases leaf class is returned.
197 * We allow direct class selection by classid in priority. The we examine
198 * filters in qdisc and in inner nodes (if higher filter points to the inner
199 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
10297b99 200 * internal fifo (direct). These packets then go directly thru. If we still
25985edc 201 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
1da177e4
LT
202 * then finish and return direct queue.
203 */
cc7ec456 204#define HTB_DIRECT ((struct htb_class *)-1L)
1da177e4 205
87990467
SH
206static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
207 int *qerr)
1da177e4
LT
208{
209 struct htb_sched *q = qdisc_priv(sch);
210 struct htb_class *cl;
211 struct tcf_result res;
212 struct tcf_proto *tcf;
213 int result;
214
215 /* allow to select class by setting skb->priority to valid classid;
cc7ec456
ED
216 * note that nfmark can be used too by attaching filter fw with no
217 * rules in it
218 */
1da177e4 219 if (skb->priority == sch->handle)
87990467 220 return HTB_DIRECT; /* X:0 (direct flow) selected */
cc7ec456 221 cl = htb_find(skb->priority, sch);
29824310
HM
222 if (cl) {
223 if (cl->level == 0)
224 return cl;
225 /* Start with inner filter chain if a non-leaf class is selected */
25d8c0d5 226 tcf = rcu_dereference_bh(cl->filter_list);
29824310 227 } else {
25d8c0d5 228 tcf = rcu_dereference_bh(q->filter_list);
29824310 229 }
1da177e4 230
c27f339a 231 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
3b3ae880 232 while (tcf && (result = tc_classify(skb, tcf, &res, false)) >= 0) {
1da177e4
LT
233#ifdef CONFIG_NET_CLS_ACT
234 switch (result) {
235 case TC_ACT_QUEUED:
87990467 236 case TC_ACT_STOLEN:
378a2f09 237 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
1da177e4
LT
238 case TC_ACT_SHOT:
239 return NULL;
240 }
1da177e4 241#endif
cc7ec456
ED
242 cl = (void *)res.class;
243 if (!cl) {
1da177e4 244 if (res.classid == sch->handle)
87990467 245 return HTB_DIRECT; /* X:0 (direct flow) */
cc7ec456
ED
246 cl = htb_find(res.classid, sch);
247 if (!cl)
87990467 248 break; /* filter selected invalid classid */
1da177e4
LT
249 }
250 if (!cl->level)
87990467 251 return cl; /* we hit leaf; return it */
1da177e4
LT
252
253 /* we have got inner class; apply inner filter chain */
25d8c0d5 254 tcf = rcu_dereference_bh(cl->filter_list);
1da177e4
LT
255 }
256 /* classification failed; try to use default class */
87990467 257 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
1da177e4 258 if (!cl || cl->level)
87990467 259 return HTB_DIRECT; /* bad default .. this is safe bet */
1da177e4
LT
260 return cl;
261}
262
1da177e4
LT
263/**
264 * htb_add_to_id_tree - adds class to the round robin list
265 *
266 * Routine adds class to the list (actually tree) sorted by classid.
267 * Make sure that class is not already on such list for given prio.
268 */
87990467
SH
269static void htb_add_to_id_tree(struct rb_root *root,
270 struct htb_class *cl, int prio)
1da177e4
LT
271{
272 struct rb_node **p = &root->rb_node, *parent = NULL;
3bf72957 273
1da177e4 274 while (*p) {
87990467
SH
275 struct htb_class *c;
276 parent = *p;
1da177e4 277 c = rb_entry(parent, struct htb_class, node[prio]);
3bf72957 278
f4c1f3e0 279 if (cl->common.classid > c->common.classid)
1da177e4 280 p = &parent->rb_right;
87990467 281 else
1da177e4
LT
282 p = &parent->rb_left;
283 }
284 rb_link_node(&cl->node[prio], parent, p);
285 rb_insert_color(&cl->node[prio], root);
286}
287
288/**
289 * htb_add_to_wait_tree - adds class to the event queue with delay
290 *
291 * The class is added to priority event queue to indicate that class will
292 * change its mode in cl->pq_key microseconds. Make sure that class is not
293 * already in the queue.
294 */
87990467 295static void htb_add_to_wait_tree(struct htb_sched *q,
56b765b7 296 struct htb_class *cl, s64 delay)
1da177e4 297{
c9364636 298 struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
3bf72957 299
fb983d45
PM
300 cl->pq_key = q->now + delay;
301 if (cl->pq_key == q->now)
1da177e4
LT
302 cl->pq_key++;
303
304 /* update the nearest event cache */
fb983d45 305 if (q->near_ev_cache[cl->level] > cl->pq_key)
1da177e4 306 q->near_ev_cache[cl->level] = cl->pq_key;
87990467 307
1da177e4 308 while (*p) {
87990467
SH
309 struct htb_class *c;
310 parent = *p;
1da177e4 311 c = rb_entry(parent, struct htb_class, pq_node);
fb983d45 312 if (cl->pq_key >= c->pq_key)
1da177e4 313 p = &parent->rb_right;
87990467 314 else
1da177e4
LT
315 p = &parent->rb_left;
316 }
317 rb_link_node(&cl->pq_node, parent, p);
c9364636 318 rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
1da177e4
LT
319}
320
321/**
322 * htb_next_rb_node - finds next node in binary tree
323 *
324 * When we are past last key we return NULL.
325 * Average complexity is 2 steps per call.
326 */
3696f625 327static inline void htb_next_rb_node(struct rb_node **n)
1da177e4
LT
328{
329 *n = rb_next(*n);
330}
331
332/**
333 * htb_add_class_to_row - add class to its row
334 *
335 * The class is added to row at priorities marked in mask.
336 * It does nothing if mask == 0.
337 */
87990467
SH
338static inline void htb_add_class_to_row(struct htb_sched *q,
339 struct htb_class *cl, int mask)
1da177e4 340{
1da177e4
LT
341 q->row_mask[cl->level] |= mask;
342 while (mask) {
343 int prio = ffz(~mask);
344 mask &= ~(1 << prio);
c9364636 345 htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
1da177e4
LT
346 }
347}
348
3696f625
SH
349/* If this triggers, it is a bug in this code, but it need not be fatal */
350static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
351{
81771b3b 352 if (RB_EMPTY_NODE(rb)) {
3696f625
SH
353 WARN_ON(1);
354 } else {
355 rb_erase(rb, root);
356 RB_CLEAR_NODE(rb);
357 }
358}
359
360
1da177e4
LT
361/**
362 * htb_remove_class_from_row - removes class from its row
363 *
364 * The class is removed from row at priorities marked in mask.
365 * It does nothing if mask == 0.
366 */
87990467
SH
367static inline void htb_remove_class_from_row(struct htb_sched *q,
368 struct htb_class *cl, int mask)
1da177e4
LT
369{
370 int m = 0;
c9364636 371 struct htb_level *hlevel = &q->hlevel[cl->level];
3bf72957 372
1da177e4
LT
373 while (mask) {
374 int prio = ffz(~mask);
c9364636 375 struct htb_prio *hprio = &hlevel->hprio[prio];
3696f625 376
1da177e4 377 mask &= ~(1 << prio);
c9364636
ED
378 if (hprio->ptr == cl->node + prio)
379 htb_next_rb_node(&hprio->ptr);
3696f625 380
c9364636
ED
381 htb_safe_rb_erase(cl->node + prio, &hprio->row);
382 if (!hprio->row.rb_node)
1da177e4
LT
383 m |= 1 << prio;
384 }
1da177e4
LT
385 q->row_mask[cl->level] &= ~m;
386}
387
388/**
389 * htb_activate_prios - creates active classe's feed chain
390 *
391 * The class is connected to ancestors and/or appropriate rows
10297b99 392 * for priorities it is participating on. cl->cmode must be new
1da177e4
LT
393 * (activated) mode. It does nothing if cl->prio_activity == 0.
394 */
87990467 395static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
1da177e4
LT
396{
397 struct htb_class *p = cl->parent;
87990467 398 long m, mask = cl->prio_activity;
1da177e4
LT
399
400 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
401 m = mask;
402 while (m) {
1da177e4
LT
403 int prio = ffz(~m);
404 m &= ~(1 << prio);
87990467 405
c9364636 406 if (p->un.inner.clprio[prio].feed.rb_node)
1da177e4 407 /* parent already has its feed in use so that
cc7ec456
ED
408 * reset bit in mask as parent is already ok
409 */
1da177e4 410 mask &= ~(1 << prio);
87990467 411
c9364636 412 htb_add_to_id_tree(&p->un.inner.clprio[prio].feed, cl, prio);
1da177e4 413 }
1da177e4 414 p->prio_activity |= mask;
87990467
SH
415 cl = p;
416 p = cl->parent;
3bf72957 417
1da177e4
LT
418 }
419 if (cl->cmode == HTB_CAN_SEND && mask)
87990467 420 htb_add_class_to_row(q, cl, mask);
1da177e4
LT
421}
422
423/**
424 * htb_deactivate_prios - remove class from feed chain
425 *
10297b99 426 * cl->cmode must represent old mode (before deactivation). It does
1da177e4
LT
427 * nothing if cl->prio_activity == 0. Class is removed from all feed
428 * chains and rows.
429 */
430static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
431{
432 struct htb_class *p = cl->parent;
87990467 433 long m, mask = cl->prio_activity;
1da177e4
LT
434
435 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
436 m = mask;
437 mask = 0;
1da177e4
LT
438 while (m) {
439 int prio = ffz(~m);
440 m &= ~(1 << prio);
87990467 441
c9364636 442 if (p->un.inner.clprio[prio].ptr == cl->node + prio) {
1da177e4 443 /* we are removing child which is pointed to from
cc7ec456
ED
444 * parent feed - forget the pointer but remember
445 * classid
446 */
c9364636
ED
447 p->un.inner.clprio[prio].last_ptr_id = cl->common.classid;
448 p->un.inner.clprio[prio].ptr = NULL;
1da177e4 449 }
87990467 450
c9364636
ED
451 htb_safe_rb_erase(cl->node + prio,
452 &p->un.inner.clprio[prio].feed);
87990467 453
c9364636 454 if (!p->un.inner.clprio[prio].feed.rb_node)
1da177e4
LT
455 mask |= 1 << prio;
456 }
3bf72957 457
1da177e4 458 p->prio_activity &= ~mask;
87990467
SH
459 cl = p;
460 p = cl->parent;
3bf72957 461
1da177e4 462 }
87990467
SH
463 if (cl->cmode == HTB_CAN_SEND && mask)
464 htb_remove_class_from_row(q, cl, mask);
1da177e4
LT
465}
466
56b765b7 467static inline s64 htb_lowater(const struct htb_class *cl)
18a63e86 468{
47083fc0
JDB
469 if (htb_hysteresis)
470 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
471 else
472 return 0;
18a63e86 473}
56b765b7 474static inline s64 htb_hiwater(const struct htb_class *cl)
18a63e86 475{
47083fc0
JDB
476 if (htb_hysteresis)
477 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
478 else
479 return 0;
18a63e86 480}
47083fc0 481
18a63e86 482
1da177e4
LT
483/**
484 * htb_class_mode - computes and returns current class mode
485 *
486 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
487 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
10297b99 488 * from now to time when cl will change its state.
1da177e4 489 * Also it is worth to note that class mode doesn't change simply
10297b99 490 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
1da177e4
LT
491 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
492 * mode transitions per time unit. The speed gain is about 1/6.
493 */
87990467 494static inline enum htb_cmode
56b765b7 495htb_class_mode(struct htb_class *cl, s64 *diff)
1da177e4 496{
56b765b7 497 s64 toks;
1da177e4 498
87990467
SH
499 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
500 *diff = -toks;
501 return HTB_CANT_SEND;
502 }
18a63e86 503
87990467
SH
504 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
505 return HTB_CAN_SEND;
1da177e4 506
87990467
SH
507 *diff = -toks;
508 return HTB_MAY_BORROW;
1da177e4
LT
509}
510
511/**
512 * htb_change_class_mode - changes classe's mode
513 *
514 * This should be the only way how to change classe's mode under normal
515 * cirsumstances. Routine will update feed lists linkage, change mode
516 * and add class to the wait event queue if appropriate. New mode should
517 * be different from old one and cl->pq_key has to be valid if changing
518 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
519 */
87990467 520static void
56b765b7 521htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
87990467
SH
522{
523 enum htb_cmode new_mode = htb_class_mode(cl, diff);
1da177e4
LT
524
525 if (new_mode == cl->cmode)
87990467
SH
526 return;
527
528 if (cl->prio_activity) { /* not necessary: speed optimization */
529 if (cl->cmode != HTB_CANT_SEND)
530 htb_deactivate_prios(q, cl);
1da177e4 531 cl->cmode = new_mode;
87990467
SH
532 if (new_mode != HTB_CANT_SEND)
533 htb_activate_prios(q, cl);
534 } else
1da177e4
LT
535 cl->cmode = new_mode;
536}
537
538/**
10297b99 539 * htb_activate - inserts leaf cl into appropriate active feeds
1da177e4
LT
540 *
541 * Routine learns (new) priority of leaf and activates feed chain
542 * for the prio. It can be called on already active leaf safely.
543 * It also adds leaf into droplist.
544 */
87990467 545static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
1da177e4 546{
547b792c 547 WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
3bf72957 548
1da177e4 549 if (!cl->prio_activity) {
c19f7a34 550 cl->prio_activity = 1 << cl->prio;
87990467
SH
551 htb_activate_prios(q, cl);
552 list_add_tail(&cl->un.leaf.drop_list,
c19f7a34 553 q->drops + cl->prio);
1da177e4
LT
554 }
555}
556
557/**
10297b99 558 * htb_deactivate - remove leaf cl from active feeds
1da177e4
LT
559 *
560 * Make sure that leaf is active. In the other words it can't be called
561 * with non-active leaf. It also removes class from the drop list.
562 */
87990467 563static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
1da177e4 564{
547b792c 565 WARN_ON(!cl->prio_activity);
3bf72957 566
87990467 567 htb_deactivate_prios(q, cl);
1da177e4
LT
568 cl->prio_activity = 0;
569 list_del_init(&cl->un.leaf.drop_list);
570}
571
520ac30f
ED
572static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
573 struct sk_buff **to_free)
1da177e4 574{
f30ab418 575 int uninitialized_var(ret);
87990467
SH
576 struct htb_sched *q = qdisc_priv(sch);
577 struct htb_class *cl = htb_classify(skb, sch, &ret);
578
579 if (cl == HTB_DIRECT) {
580 /* enqueue to helper queue */
581 if (q->direct_queue.qlen < q->direct_qlen) {
582 __skb_queue_tail(&q->direct_queue, skb);
583 q->direct_pkts++;
584 } else {
520ac30f 585 return qdisc_drop(skb, sch, to_free);
87990467 586 }
1da177e4 587#ifdef CONFIG_NET_CLS_ACT
87990467 588 } else if (!cl) {
c27f339a 589 if (ret & __NET_XMIT_BYPASS)
25331d6c 590 qdisc_qstats_drop(sch);
520ac30f 591 __qdisc_drop(skb, to_free);
87990467 592 return ret;
1da177e4 593#endif
520ac30f
ED
594 } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q,
595 to_free)) != NET_XMIT_SUCCESS) {
378a2f09 596 if (net_xmit_drop_count(ret)) {
25331d6c 597 qdisc_qstats_drop(sch);
378a2f09
JP
598 cl->qstats.drops++;
599 }
69747650 600 return ret;
87990467 601 } else {
87990467
SH
602 htb_activate(q, cl);
603 }
604
431e3a8e 605 qdisc_qstats_backlog_inc(sch, skb);
87990467 606 sch->q.qlen++;
87990467 607 return NET_XMIT_SUCCESS;
1da177e4
LT
608}
609
56b765b7 610static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
59e4220a 611{
56b765b7 612 s64 toks = diff + cl->tokens;
59e4220a
JP
613
614 if (toks > cl->buffer)
615 toks = cl->buffer;
292f1c7f 616 toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
59e4220a
JP
617 if (toks <= -cl->mbuffer)
618 toks = 1 - cl->mbuffer;
619
620 cl->tokens = toks;
621}
622
56b765b7 623static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
59e4220a 624{
56b765b7 625 s64 toks = diff + cl->ctokens;
59e4220a
JP
626
627 if (toks > cl->cbuffer)
628 toks = cl->cbuffer;
292f1c7f 629 toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
59e4220a
JP
630 if (toks <= -cl->mbuffer)
631 toks = 1 - cl->mbuffer;
632
633 cl->ctokens = toks;
634}
635
1da177e4
LT
636/**
637 * htb_charge_class - charges amount "bytes" to leaf and ancestors
638 *
639 * Routine assumes that packet "bytes" long was dequeued from leaf cl
640 * borrowing from "level". It accounts bytes to ceil leaky bucket for
641 * leaf and all ancestors and to rate bucket for ancestors at levels
642 * "level" and higher. It also handles possible change of mode resulting
643 * from the update. Note that mode can also increase here (MAY_BORROW to
644 * CAN_SEND) because we can use more precise clock that event queue here.
645 * In such case we remove class from event queue first.
646 */
87990467 647static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
c9726d68 648 int level, struct sk_buff *skb)
87990467 649{
0abf77e5 650 int bytes = qdisc_pkt_len(skb);
1da177e4 651 enum htb_cmode old_mode;
56b765b7 652 s64 diff;
1da177e4
LT
653
654 while (cl) {
56b765b7 655 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
1da177e4 656 if (cl->level >= level) {
87990467
SH
657 if (cl->level == level)
658 cl->xstats.lends++;
59e4220a 659 htb_accnt_tokens(cl, bytes, diff);
1da177e4
LT
660 } else {
661 cl->xstats.borrows++;
87990467 662 cl->tokens += diff; /* we moved t_c; update tokens */
1da177e4 663 }
59e4220a 664 htb_accnt_ctokens(cl, bytes, diff);
1da177e4 665 cl->t_c = q->now;
1da177e4 666
87990467
SH
667 old_mode = cl->cmode;
668 diff = 0;
669 htb_change_class_mode(q, cl, &diff);
1da177e4
LT
670 if (old_mode != cl->cmode) {
671 if (old_mode != HTB_CAN_SEND)
c9364636 672 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
1da177e4 673 if (cl->cmode != HTB_CAN_SEND)
87990467 674 htb_add_to_wait_tree(q, cl, diff);
1da177e4 675 }
1da177e4 676
bfe0d029
ED
677 /* update basic stats except for leaves which are already updated */
678 if (cl->level)
679 bstats_update(&cl->bstats, skb);
680
1da177e4
LT
681 cl = cl->parent;
682 }
683}
684
685/**
686 * htb_do_events - make mode changes to classes at the level
687 *
fb983d45 688 * Scans event queue for pending events and applies them. Returns time of
1224736d 689 * next pending event (0 for no event in pq, q->now for too many events).
fb983d45 690 * Note: Applied are events whose have cl->pq_key <= q->now.
1da177e4 691 */
c9364636 692static s64 htb_do_events(struct htb_sched *q, const int level,
5343a7f8 693 unsigned long start)
1da177e4 694{
8f3ea33a 695 /* don't run for longer than 2 jiffies; 2 is used instead of
cc7ec456
ED
696 * 1 to simplify things when jiffy is going to be incremented
697 * too soon
698 */
a73be040 699 unsigned long stop_at = start + 2;
c9364636
ED
700 struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
701
8f3ea33a 702 while (time_before(jiffies, stop_at)) {
1da177e4 703 struct htb_class *cl;
56b765b7 704 s64 diff;
c9364636 705 struct rb_node *p = rb_first(wait_pq);
30bdbe39 706
87990467
SH
707 if (!p)
708 return 0;
1da177e4
LT
709
710 cl = rb_entry(p, struct htb_class, pq_node);
fb983d45
PM
711 if (cl->pq_key > q->now)
712 return cl->pq_key;
713
c9364636 714 htb_safe_rb_erase(p, wait_pq);
56b765b7 715 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
87990467 716 htb_change_class_mode(q, cl, &diff);
1da177e4 717 if (cl->cmode != HTB_CAN_SEND)
87990467 718 htb_add_to_wait_tree(q, cl, diff);
1da177e4 719 }
1224736d
JP
720
721 /* too much load - let's continue after a break for scheduling */
e82181de 722 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
c17988a9 723 pr_warn("htb: too many events!\n");
e82181de
JP
724 q->warned |= HTB_WARN_TOOMANYEVENTS;
725 }
1224736d
JP
726
727 return q->now;
1da177e4
LT
728}
729
730/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
cc7ec456
ED
731 * is no such one exists.
732 */
87990467
SH
733static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
734 u32 id)
1da177e4
LT
735{
736 struct rb_node *r = NULL;
737 while (n) {
87990467
SH
738 struct htb_class *cl =
739 rb_entry(n, struct htb_class, node[prio]);
87990467 740
f4c1f3e0 741 if (id > cl->common.classid) {
1da177e4 742 n = n->rb_right;
1b5c0077 743 } else if (id < cl->common.classid) {
1da177e4
LT
744 r = n;
745 n = n->rb_left;
1b5c0077
JP
746 } else {
747 return n;
1da177e4
LT
748 }
749 }
750 return r;
751}
752
753/**
754 * htb_lookup_leaf - returns next leaf class in DRR order
755 *
756 * Find leaf where current feed pointers points to.
757 */
c9364636 758static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
1da177e4
LT
759{
760 int i;
761 struct {
762 struct rb_node *root;
763 struct rb_node **pptr;
764 u32 *pid;
87990467
SH
765 } stk[TC_HTB_MAXDEPTH], *sp = stk;
766
c9364636
ED
767 BUG_ON(!hprio->row.rb_node);
768 sp->root = hprio->row.rb_node;
769 sp->pptr = &hprio->ptr;
770 sp->pid = &hprio->last_ptr_id;
1da177e4
LT
771
772 for (i = 0; i < 65535; i++) {
87990467 773 if (!*sp->pptr && *sp->pid) {
10297b99 774 /* ptr was invalidated but id is valid - try to recover
cc7ec456
ED
775 * the original or next ptr
776 */
87990467
SH
777 *sp->pptr =
778 htb_id_find_next_upper(prio, sp->root, *sp->pid);
1da177e4 779 }
87990467 780 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
cc7ec456
ED
781 * can become out of date quickly
782 */
87990467 783 if (!*sp->pptr) { /* we are at right end; rewind & go up */
1da177e4 784 *sp->pptr = sp->root;
87990467 785 while ((*sp->pptr)->rb_left)
1da177e4
LT
786 *sp->pptr = (*sp->pptr)->rb_left;
787 if (sp > stk) {
788 sp--;
512bb43e
JP
789 if (!*sp->pptr) {
790 WARN_ON(1);
87990467 791 return NULL;
512bb43e 792 }
87990467 793 htb_next_rb_node(sp->pptr);
1da177e4
LT
794 }
795 } else {
796 struct htb_class *cl;
c9364636
ED
797 struct htb_prio *clp;
798
87990467
SH
799 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
800 if (!cl->level)
1da177e4 801 return cl;
c9364636
ED
802 clp = &cl->un.inner.clprio[prio];
803 (++sp)->root = clp->feed.rb_node;
804 sp->pptr = &clp->ptr;
805 sp->pid = &clp->last_ptr_id;
1da177e4
LT
806 }
807 }
547b792c 808 WARN_ON(1);
1da177e4
LT
809 return NULL;
810}
811
812/* dequeues packet at given priority and level; call only if
cc7ec456
ED
813 * you are sure that there is active class at prio/level
814 */
c9364636
ED
815static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
816 const int level)
1da177e4
LT
817{
818 struct sk_buff *skb = NULL;
87990467 819 struct htb_class *cl, *start;
c9364636
ED
820 struct htb_level *hlevel = &q->hlevel[level];
821 struct htb_prio *hprio = &hlevel->hprio[prio];
822
1da177e4 823 /* look initial class up in the row */
c9364636 824 start = cl = htb_lookup_leaf(hprio, prio);
87990467 825
1da177e4
LT
826 do {
827next:
512bb43e 828 if (unlikely(!cl))
87990467 829 return NULL;
1da177e4
LT
830
831 /* class can be empty - it is unlikely but can be true if leaf
cc7ec456
ED
832 * qdisc drops packets in enqueue routine or if someone used
833 * graft operation on the leaf since last dequeue;
834 * simply deactivate and skip such class
835 */
1da177e4
LT
836 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
837 struct htb_class *next;
87990467 838 htb_deactivate(q, cl);
1da177e4
LT
839
840 /* row/level might become empty */
841 if ((q->row_mask[level] & (1 << prio)) == 0)
87990467 842 return NULL;
1da177e4 843
c9364636 844 next = htb_lookup_leaf(hprio, prio);
87990467
SH
845
846 if (cl == start) /* fix start if we just deleted it */
1da177e4
LT
847 start = next;
848 cl = next;
849 goto next;
850 }
87990467
SH
851
852 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
853 if (likely(skb != NULL))
1da177e4 854 break;
633fe66e 855
b00355db 856 qdisc_warn_nonwc("htb", cl->un.leaf.q);
c9364636
ED
857 htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr:
858 &q->hlevel[0].hprio[prio].ptr);
859 cl = htb_lookup_leaf(hprio, prio);
1da177e4
LT
860
861 } while (cl != start);
862
863 if (likely(skb != NULL)) {
196d97f6 864 bstats_update(&cl->bstats, skb);
0abf77e5
JK
865 cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
866 if (cl->un.leaf.deficit[level] < 0) {
c19f7a34 867 cl->un.leaf.deficit[level] += cl->quantum;
c9364636
ED
868 htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr :
869 &q->hlevel[0].hprio[prio].ptr);
1da177e4
LT
870 }
871 /* this used to be after charge_class but this constelation
cc7ec456
ED
872 * gives us slightly better performance
873 */
1da177e4 874 if (!cl->un.leaf.q->q.qlen)
87990467 875 htb_deactivate(q, cl);
c9726d68 876 htb_charge_class(q, cl, level, skb);
1da177e4
LT
877 }
878 return skb;
879}
880
1da177e4
LT
881static struct sk_buff *htb_dequeue(struct Qdisc *sch)
882{
9190b3b3 883 struct sk_buff *skb;
1da177e4
LT
884 struct htb_sched *q = qdisc_priv(sch);
885 int level;
5343a7f8 886 s64 next_event;
a73be040 887 unsigned long start_at;
1da177e4
LT
888
889 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
87990467
SH
890 skb = __skb_dequeue(&q->direct_queue);
891 if (skb != NULL) {
9190b3b3
ED
892ok:
893 qdisc_bstats_update(sch, skb);
431e3a8e 894 qdisc_qstats_backlog_dec(sch, skb);
1da177e4
LT
895 sch->q.qlen--;
896 return skb;
897 }
898
87990467
SH
899 if (!sch->q.qlen)
900 goto fin;
d2de875c 901 q->now = ktime_get_ns();
a73be040 902 start_at = jiffies;
1da177e4 903
d2fe85da 904 next_event = q->now + 5LLU * NSEC_PER_SEC;
633fe66e 905
1da177e4
LT
906 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
907 /* common case optimization - skip event handler quickly */
908 int m;
c9364636 909 s64 event = q->near_ev_cache[level];
fb983d45 910
c9364636 911 if (q->now >= event) {
a73be040 912 event = htb_do_events(q, level, start_at);
2e4b3b0e 913 if (!event)
56b765b7 914 event = q->now + NSEC_PER_SEC;
2e4b3b0e 915 q->near_ev_cache[level] = event;
c9364636 916 }
fb983d45 917
c0851347 918 if (next_event > event)
fb983d45 919 next_event = event;
87990467 920
1da177e4
LT
921 m = ~q->row_mask[level];
922 while (m != (int)(-1)) {
87990467 923 int prio = ffz(m);
cc7ec456 924
1da177e4 925 m |= 1 << prio;
87990467 926 skb = htb_dequeue_tree(q, prio, level);
9190b3b3
ED
927 if (likely(skb != NULL))
928 goto ok;
1da177e4
LT
929 }
930 }
25331d6c 931 qdisc_qstats_overlimit(sch);
a9efad8b 932 if (likely(next_event > q->now))
45f50bed 933 qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
a9efad8b 934 else
1224736d 935 schedule_work(&q->work);
1da177e4 936fin:
1da177e4
LT
937 return skb;
938}
939
1da177e4
LT
940/* reset all classes */
941/* always caled under BH & queue lock */
87990467 942static void htb_reset(struct Qdisc *sch)
1da177e4
LT
943{
944 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 945 struct htb_class *cl;
f4c1f3e0 946 unsigned int i;
0cef296d 947
f4c1f3e0 948 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 949 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1da177e4 950 if (cl->level)
87990467 951 memset(&cl->un.inner, 0, sizeof(cl->un.inner));
1da177e4 952 else {
87990467 953 if (cl->un.leaf.q)
1da177e4
LT
954 qdisc_reset(cl->un.leaf.q);
955 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
956 }
957 cl->prio_activity = 0;
958 cl->cmode = HTB_CAN_SEND;
1da177e4
LT
959 }
960 }
fb983d45 961 qdisc_watchdog_cancel(&q->watchdog);
a5a9f534 962 __qdisc_reset_queue(&q->direct_queue);
1da177e4 963 sch->q.qlen = 0;
431e3a8e 964 sch->qstats.backlog = 0;
c9364636 965 memset(q->hlevel, 0, sizeof(q->hlevel));
87990467 966 memset(q->row_mask, 0, sizeof(q->row_mask));
1da177e4 967 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 968 INIT_LIST_HEAD(q->drops + i);
1da177e4
LT
969}
970
27a3421e
PM
971static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
972 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
973 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
974 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
975 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
6906f4ed 976 [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
df62cdf3
ED
977 [TCA_HTB_RATE64] = { .type = NLA_U64 },
978 [TCA_HTB_CEIL64] = { .type = NLA_U64 },
27a3421e
PM
979};
980
1224736d
JP
981static void htb_work_func(struct work_struct *work)
982{
983 struct htb_sched *q = container_of(work, struct htb_sched, work);
984 struct Qdisc *sch = q->watchdog.qdisc;
985
986 __netif_schedule(qdisc_root(sch));
987}
988
1e90474c 989static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
990{
991 struct htb_sched *q = qdisc_priv(sch);
6906f4ed 992 struct nlattr *tb[TCA_HTB_MAX + 1];
1da177e4 993 struct tc_htb_glob *gopt;
cee63723 994 int err;
1da177e4 995 int i;
cee63723
PM
996
997 if (!opt)
998 return -EINVAL;
999
6906f4ed 1000 err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
cee63723
PM
1001 if (err < 0)
1002 return err;
1003
6906f4ed 1004 if (!tb[TCA_HTB_INIT])
1da177e4 1005 return -EINVAL;
6906f4ed 1006
1e90474c 1007 gopt = nla_data(tb[TCA_HTB_INIT]);
6906f4ed 1008 if (gopt->version != HTB_VER >> 16)
1da177e4 1009 return -EINVAL;
1da177e4 1010
f4c1f3e0
PM
1011 err = qdisc_class_hash_init(&q->clhash);
1012 if (err < 0)
1013 return err;
1da177e4 1014 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 1015 INIT_LIST_HEAD(q->drops + i);
1da177e4 1016
fb983d45 1017 qdisc_watchdog_init(&q->watchdog, sch);
1224736d 1018 INIT_WORK(&q->work, htb_work_func);
ab34f648 1019 __skb_queue_head_init(&q->direct_queue);
1da177e4 1020
6906f4ed
ED
1021 if (tb[TCA_HTB_DIRECT_QLEN])
1022 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
348e3435 1023 else
6906f4ed 1024 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
348e3435 1025
1da177e4
LT
1026 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1027 q->rate2quantum = 1;
1028 q->defcls = gopt->defcls;
1029
1030 return 0;
1031}
1032
1033static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1034{
1035 struct htb_sched *q = qdisc_priv(sch);
4b3550ef 1036 struct nlattr *nest;
1da177e4 1037 struct tc_htb_glob gopt;
4b3550ef 1038
6f542efc
ED
1039 /* Its safe to not acquire qdisc lock. As we hold RTNL,
1040 * no change can happen on the qdisc parameters.
1041 */
1da177e4 1042
4b3550ef 1043 gopt.direct_pkts = q->direct_pkts;
1da177e4
LT
1044 gopt.version = HTB_VER;
1045 gopt.rate2quantum = q->rate2quantum;
1046 gopt.defcls = q->defcls;
3bf72957 1047 gopt.debug = 0;
4b3550ef
PM
1048
1049 nest = nla_nest_start(skb, TCA_OPTIONS);
1050 if (nest == NULL)
1051 goto nla_put_failure;
6906f4ed
ED
1052 if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1053 nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1b34ec43 1054 goto nla_put_failure;
4b3550ef 1055
6f542efc 1056 return nla_nest_end(skb, nest);
4b3550ef 1057
1e90474c 1058nla_put_failure:
4b3550ef 1059 nla_nest_cancel(skb, nest);
1da177e4
LT
1060 return -1;
1061}
1062
1063static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
87990467 1064 struct sk_buff *skb, struct tcmsg *tcm)
1da177e4 1065{
87990467 1066 struct htb_class *cl = (struct htb_class *)arg;
4b3550ef 1067 struct nlattr *nest;
1da177e4
LT
1068 struct tc_htb_opt opt;
1069
6f542efc
ED
1070 /* Its safe to not acquire qdisc lock. As we hold RTNL,
1071 * no change can happen on the class parameters.
1072 */
f4c1f3e0
PM
1073 tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1074 tcm->tcm_handle = cl->common.classid;
1da177e4
LT
1075 if (!cl->level && cl->un.leaf.q)
1076 tcm->tcm_info = cl->un.leaf.q->handle;
1077
4b3550ef
PM
1078 nest = nla_nest_start(skb, TCA_OPTIONS);
1079 if (nest == NULL)
1080 goto nla_put_failure;
1da177e4 1081
87990467 1082 memset(&opt, 0, sizeof(opt));
1da177e4 1083
01cb71d2 1084 psched_ratecfg_getrate(&opt.rate, &cl->rate);
9c10f411 1085 opt.buffer = PSCHED_NS2TICKS(cl->buffer);
01cb71d2 1086 psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
9c10f411 1087 opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
c19f7a34
JP
1088 opt.quantum = cl->quantum;
1089 opt.prio = cl->prio;
87990467 1090 opt.level = cl->level;
1b34ec43
DM
1091 if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1092 goto nla_put_failure;
df62cdf3 1093 if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
2a51c1e8
ND
1094 nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1095 TCA_HTB_PAD))
df62cdf3
ED
1096 goto nla_put_failure;
1097 if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
2a51c1e8
ND
1098 nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1099 TCA_HTB_PAD))
df62cdf3 1100 goto nla_put_failure;
4b3550ef 1101
6f542efc 1102 return nla_nest_end(skb, nest);
4b3550ef 1103
1e90474c 1104nla_put_failure:
4b3550ef 1105 nla_nest_cancel(skb, nest);
1da177e4
LT
1106 return -1;
1107}
1108
1109static int
87990467 1110htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1da177e4 1111{
87990467 1112 struct htb_class *cl = (struct htb_class *)arg;
64015853 1113 __u32 qlen = 0;
1da177e4 1114
1da177e4 1115 if (!cl->level && cl->un.leaf.q)
64015853 1116 qlen = cl->un.leaf.q->q.qlen;
5343a7f8
ED
1117 cl->xstats.tokens = PSCHED_NS2TICKS(cl->tokens);
1118 cl->xstats.ctokens = PSCHED_NS2TICKS(cl->ctokens);
1da177e4 1119
edb09eb1
ED
1120 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
1121 d, NULL, &cl->bstats) < 0 ||
d250a5f9 1122 gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
b0ab6f92 1123 gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
1da177e4
LT
1124 return -1;
1125
1126 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1127}
1128
1129static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
87990467 1130 struct Qdisc **old)
1da177e4 1131{
87990467 1132 struct htb_class *cl = (struct htb_class *)arg;
1da177e4 1133
5b9a9ccf
PM
1134 if (cl->level)
1135 return -EINVAL;
1136 if (new == NULL &&
3511c913 1137 (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
5b9a9ccf
PM
1138 cl->common.classid)) == NULL)
1139 return -ENOBUFS;
1140
86a7996c 1141 *old = qdisc_replace(sch, new, &cl->un.leaf.q);
5b9a9ccf 1142 return 0;
1da177e4
LT
1143}
1144
87990467 1145static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1da177e4 1146{
87990467 1147 struct htb_class *cl = (struct htb_class *)arg;
5b9a9ccf 1148 return !cl->level ? cl->un.leaf.q : NULL;
1da177e4
LT
1149}
1150
256d61b8
PM
1151static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1152{
1153 struct htb_class *cl = (struct htb_class *)arg;
1154
1155 if (cl->un.leaf.q->q.qlen == 0)
1156 htb_deactivate(qdisc_priv(sch), cl);
1157}
1158
1da177e4
LT
1159static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1160{
87990467
SH
1161 struct htb_class *cl = htb_find(classid, sch);
1162 if (cl)
1da177e4
LT
1163 cl->refcnt++;
1164 return (unsigned long)cl;
1165}
1166
160d5e10
JP
1167static inline int htb_parent_last_child(struct htb_class *cl)
1168{
1169 if (!cl->parent)
1170 /* the root class */
1171 return 0;
42077599 1172 if (cl->parent->children > 1)
160d5e10
JP
1173 /* not the last child */
1174 return 0;
160d5e10
JP
1175 return 1;
1176}
1177
3ba08b00
JP
1178static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1179 struct Qdisc *new_q)
160d5e10
JP
1180{
1181 struct htb_class *parent = cl->parent;
1182
547b792c 1183 WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
160d5e10 1184
3ba08b00 1185 if (parent->cmode != HTB_CAN_SEND)
c9364636
ED
1186 htb_safe_rb_erase(&parent->pq_node,
1187 &q->hlevel[parent->level].wait_pq);
3ba08b00 1188
160d5e10
JP
1189 parent->level = 0;
1190 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1191 INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1192 parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
160d5e10
JP
1193 parent->tokens = parent->buffer;
1194 parent->ctokens = parent->cbuffer;
d2de875c 1195 parent->t_c = ktime_get_ns();
160d5e10
JP
1196 parent->cmode = HTB_CAN_SEND;
1197}
1198
87990467 1199static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1da177e4 1200{
1da177e4 1201 if (!cl->level) {
547b792c 1202 WARN_ON(!cl->un.leaf.q);
1da177e4
LT
1203 qdisc_destroy(cl->un.leaf.q);
1204 }
ee39e10c 1205 gen_kill_estimator(&cl->bstats, &cl->rate_est);
ff31ab56 1206 tcf_destroy_chain(&cl->filter_list);
1da177e4
LT
1207 kfree(cl);
1208}
1209
87990467 1210static void htb_destroy(struct Qdisc *sch)
1da177e4
LT
1211{
1212 struct htb_sched *q = qdisc_priv(sch);
b67bfe0d 1213 struct hlist_node *next;
fbd8f137
PM
1214 struct htb_class *cl;
1215 unsigned int i;
1da177e4 1216
1224736d 1217 cancel_work_sync(&q->work);
fb983d45 1218 qdisc_watchdog_cancel(&q->watchdog);
1da177e4 1219 /* This line used to be after htb_destroy_class call below
cc7ec456
ED
1220 * and surprisingly it worked in 2.4. But it must precede it
1221 * because filter need its target class alive to be able to call
1222 * unbind_filter on it (without Oops).
1223 */
ff31ab56 1224 tcf_destroy_chain(&q->filter_list);
87990467 1225
f4c1f3e0 1226 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 1227 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode)
fbd8f137
PM
1228 tcf_destroy_chain(&cl->filter_list);
1229 }
f4c1f3e0 1230 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 1231 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
f4c1f3e0 1232 common.hnode)
fbd8f137
PM
1233 htb_destroy_class(sch, cl);
1234 }
f4c1f3e0 1235 qdisc_class_hash_destroy(&q->clhash);
a5a9f534 1236 __qdisc_reset_queue(&q->direct_queue);
1da177e4
LT
1237}
1238
1239static int htb_delete(struct Qdisc *sch, unsigned long arg)
1240{
1241 struct htb_sched *q = qdisc_priv(sch);
87990467 1242 struct htb_class *cl = (struct htb_class *)arg;
160d5e10
JP
1243 struct Qdisc *new_q = NULL;
1244 int last_child = 0;
1da177e4 1245
a071d272
YY
1246 /* TODO: why don't allow to delete subtree ? references ? does
1247 * tc subsys guarantee us that in htb_destroy it holds no class
1248 * refs so that we can remove children safely there ?
1249 */
42077599 1250 if (cl->children || cl->filter_cnt)
1da177e4 1251 return -EBUSY;
87990467 1252
160d5e10 1253 if (!cl->level && htb_parent_last_child(cl)) {
3511c913 1254 new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
bb949fbd 1255 cl->parent->common.classid);
160d5e10
JP
1256 last_child = 1;
1257 }
1258
1da177e4 1259 sch_tree_lock(sch);
87990467 1260
814a175e 1261 if (!cl->level) {
2ccccf5f
WC
1262 unsigned int qlen = cl->un.leaf.q->q.qlen;
1263 unsigned int backlog = cl->un.leaf.q->qstats.backlog;
1264
814a175e 1265 qdisc_reset(cl->un.leaf.q);
2ccccf5f 1266 qdisc_tree_reduce_backlog(cl->un.leaf.q, qlen, backlog);
814a175e
PM
1267 }
1268
f4c1f3e0
PM
1269 /* delete from hash and active; remainder in destroy_class */
1270 qdisc_class_hash_remove(&q->clhash, &cl->common);
26b284de
JP
1271 if (cl->parent)
1272 cl->parent->children--;
c38c83cb 1273
1da177e4 1274 if (cl->prio_activity)
87990467 1275 htb_deactivate(q, cl);
1da177e4 1276
fbd8f137 1277 if (cl->cmode != HTB_CAN_SEND)
c9364636
ED
1278 htb_safe_rb_erase(&cl->pq_node,
1279 &q->hlevel[cl->level].wait_pq);
fbd8f137 1280
160d5e10 1281 if (last_child)
3ba08b00 1282 htb_parent_to_leaf(q, cl, new_q);
160d5e10 1283
7cd0a638
JP
1284 BUG_ON(--cl->refcnt == 0);
1285 /*
1286 * This shouldn't happen: we "hold" one cops->get() when called
1287 * from tc_ctl_tclass; the destroy method is done from cops->put().
1288 */
1da177e4
LT
1289
1290 sch_tree_unlock(sch);
1291 return 0;
1292}
1293
1294static void htb_put(struct Qdisc *sch, unsigned long arg)
1295{
87990467 1296 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1297
1298 if (--cl->refcnt == 0)
87990467 1299 htb_destroy_class(sch, cl);
1da177e4
LT
1300}
1301
87990467 1302static int htb_change_class(struct Qdisc *sch, u32 classid,
1e90474c 1303 u32 parentid, struct nlattr **tca,
87990467 1304 unsigned long *arg)
1da177e4
LT
1305{
1306 int err = -EINVAL;
1307 struct htb_sched *q = qdisc_priv(sch);
87990467 1308 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1e90474c 1309 struct nlattr *opt = tca[TCA_OPTIONS];
6906f4ed 1310 struct nlattr *tb[TCA_HTB_MAX + 1];
1da177e4 1311 struct tc_htb_opt *hopt;
df62cdf3 1312 u64 rate64, ceil64;
1da177e4
LT
1313
1314 /* extract all subattrs from opt attr */
cee63723
PM
1315 if (!opt)
1316 goto failure;
1317
e18434c4 1318 err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
cee63723
PM
1319 if (err < 0)
1320 goto failure;
1321
1322 err = -EINVAL;
27a3421e 1323 if (tb[TCA_HTB_PARMS] == NULL)
1da177e4 1324 goto failure;
1da177e4 1325
87990467
SH
1326 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1327
1e90474c 1328 hopt = nla_data(tb[TCA_HTB_PARMS]);
196d97f6 1329 if (!hopt->rate.rate || !hopt->ceil.rate)
87990467 1330 goto failure;
1da177e4 1331
8a8e3d84 1332 /* Keeping backward compatible with rate_table based iproute2 tc */
6b1dd856
YY
1333 if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1334 qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]));
1335
1336 if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1337 qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]));
8a8e3d84 1338
87990467 1339 if (!cl) { /* new class */
1da177e4 1340 struct Qdisc *new_q;
3696f625 1341 int prio;
ee39e10c 1342 struct {
1e90474c 1343 struct nlattr nla;
ee39e10c
PM
1344 struct gnet_estimator opt;
1345 } est = {
1e90474c
PM
1346 .nla = {
1347 .nla_len = nla_attr_size(sizeof(est.opt)),
1348 .nla_type = TCA_RATE,
ee39e10c
PM
1349 },
1350 .opt = {
1351 /* 4s interval, 16s averaging constant */
1352 .interval = 2,
1353 .ewma_log = 2,
1354 },
1355 };
3696f625 1356
1da177e4 1357 /* check for valid classid */
f64f9e71
JP
1358 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1359 htb_find(classid, sch))
1da177e4
LT
1360 goto failure;
1361
1362 /* check maximal depth */
1363 if (parent && parent->parent && parent->parent->level < 2) {
cc7ec456 1364 pr_err("htb: tree is too deep\n");
1da177e4
LT
1365 goto failure;
1366 }
1367 err = -ENOBUFS;
cc7ec456
ED
1368 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1369 if (!cl)
1da177e4 1370 goto failure;
87990467 1371
64153ce0 1372 if (htb_rate_est || tca[TCA_RATE]) {
22e0f8b9
JF
1373 err = gen_new_estimator(&cl->bstats, NULL,
1374 &cl->rate_est,
edb09eb1
ED
1375 NULL,
1376 qdisc_root_sleeping_running(sch),
64153ce0
ED
1377 tca[TCA_RATE] ? : &est.nla);
1378 if (err) {
1379 kfree(cl);
1380 goto failure;
1381 }
71bcb09a
SH
1382 }
1383
1da177e4 1384 cl->refcnt = 1;
42077599 1385 cl->children = 0;
1da177e4 1386 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
3696f625
SH
1387 RB_CLEAR_NODE(&cl->pq_node);
1388
1389 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1390 RB_CLEAR_NODE(&cl->node[prio]);
1da177e4
LT
1391
1392 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
cc7ec456
ED
1393 * so that can't be used inside of sch_tree_lock
1394 * -- thanks to Karlis Peisenieks
1395 */
3511c913 1396 new_q = qdisc_create_dflt(sch->dev_queue,
bb949fbd 1397 &pfifo_qdisc_ops, classid);
1da177e4
LT
1398 sch_tree_lock(sch);
1399 if (parent && !parent->level) {
256d61b8 1400 unsigned int qlen = parent->un.leaf.q->q.qlen;
2ccccf5f 1401 unsigned int backlog = parent->un.leaf.q->qstats.backlog;
256d61b8 1402
1da177e4 1403 /* turn parent into inner node */
256d61b8 1404 qdisc_reset(parent->un.leaf.q);
2ccccf5f 1405 qdisc_tree_reduce_backlog(parent->un.leaf.q, qlen, backlog);
87990467
SH
1406 qdisc_destroy(parent->un.leaf.q);
1407 if (parent->prio_activity)
1408 htb_deactivate(q, parent);
1da177e4
LT
1409
1410 /* remove from evt list because of level change */
1411 if (parent->cmode != HTB_CAN_SEND) {
c9364636 1412 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1da177e4
LT
1413 parent->cmode = HTB_CAN_SEND;
1414 }
1415 parent->level = (parent->parent ? parent->parent->level
87990467
SH
1416 : TC_HTB_MAXDEPTH) - 1;
1417 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1da177e4
LT
1418 }
1419 /* leaf (we) needs elementary qdisc */
1420 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1421
f4c1f3e0 1422 cl->common.classid = classid;
87990467 1423 cl->parent = parent;
1da177e4
LT
1424
1425 /* set class to be in HTB_CAN_SEND state */
b9a7afde
JP
1426 cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1427 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
5343a7f8 1428 cl->mbuffer = 60ULL * NSEC_PER_SEC; /* 1min */
d2de875c 1429 cl->t_c = ktime_get_ns();
1da177e4
LT
1430 cl->cmode = HTB_CAN_SEND;
1431
1432 /* attach to the hash list and parent's family */
f4c1f3e0 1433 qdisc_class_hash_insert(&q->clhash, &cl->common);
42077599
PM
1434 if (parent)
1435 parent->children++;
ee39e10c 1436 } else {
71bcb09a 1437 if (tca[TCA_RATE]) {
22e0f8b9
JF
1438 err = gen_replace_estimator(&cl->bstats, NULL,
1439 &cl->rate_est,
edb09eb1
ED
1440 NULL,
1441 qdisc_root_sleeping_running(sch),
71bcb09a
SH
1442 tca[TCA_RATE]);
1443 if (err)
1444 return err;
1445 }
87990467 1446 sch_tree_lock(sch);
ee39e10c 1447 }
1da177e4 1448
1598f7cb
YY
1449 rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1450
1451 ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1452
1453 psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
1454 psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
1455
1da177e4 1456 /* it used to be a nasty bug here, we have to check that node
cc7ec456
ED
1457 * is really leaf before changing cl->un.leaf !
1458 */
1da177e4 1459 if (!cl->level) {
1598f7cb
YY
1460 u64 quantum = cl->rate.rate_bytes_ps;
1461
1462 do_div(quantum, q->rate2quantum);
1463 cl->quantum = min_t(u64, quantum, INT_MAX);
1464
c19f7a34 1465 if (!hopt->quantum && cl->quantum < 1000) {
c17988a9
YY
1466 pr_warn("HTB: quantum of class %X is small. Consider r2q change.\n",
1467 cl->common.classid);
c19f7a34 1468 cl->quantum = 1000;
1da177e4 1469 }
c19f7a34 1470 if (!hopt->quantum && cl->quantum > 200000) {
c17988a9
YY
1471 pr_warn("HTB: quantum of class %X is big. Consider r2q change.\n",
1472 cl->common.classid);
c19f7a34 1473 cl->quantum = 200000;
1da177e4
LT
1474 }
1475 if (hopt->quantum)
c19f7a34
JP
1476 cl->quantum = hopt->quantum;
1477 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1478 cl->prio = TC_HTB_NUMPRIO - 1;
1da177e4
LT
1479 }
1480
324f5aa5 1481 cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
f3ad857e 1482 cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
56b765b7 1483
1da177e4
LT
1484 sch_tree_unlock(sch);
1485
f4c1f3e0
PM
1486 qdisc_class_hash_grow(sch, &q->clhash);
1487
1da177e4
LT
1488 *arg = (unsigned long)cl;
1489 return 0;
1490
1491failure:
1da177e4
LT
1492 return err;
1493}
1494
25d8c0d5
JF
1495static struct tcf_proto __rcu **htb_find_tcf(struct Qdisc *sch,
1496 unsigned long arg)
1da177e4
LT
1497{
1498 struct htb_sched *q = qdisc_priv(sch);
1499 struct htb_class *cl = (struct htb_class *)arg;
25d8c0d5 1500 struct tcf_proto __rcu **fl = cl ? &cl->filter_list : &q->filter_list;
3bf72957 1501
1da177e4
LT
1502 return fl;
1503}
1504
1505static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
87990467 1506 u32 classid)
1da177e4 1507{
87990467 1508 struct htb_class *cl = htb_find(classid, sch);
3bf72957 1509
1da177e4 1510 /*if (cl && !cl->level) return 0;
cc7ec456
ED
1511 * The line above used to be there to prevent attaching filters to
1512 * leaves. But at least tc_index filter uses this just to get class
1513 * for other reasons so that we have to allow for it.
1514 * ----
1515 * 19.6.2002 As Werner explained it is ok - bind filter is just
1516 * another way to "lock" the class - unlike "get" this lock can
1517 * be broken by class during destroy IIUC.
1da177e4 1518 */
87990467
SH
1519 if (cl)
1520 cl->filter_cnt++;
1da177e4
LT
1521 return (unsigned long)cl;
1522}
1523
1524static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1525{
1da177e4 1526 struct htb_class *cl = (struct htb_class *)arg;
3bf72957 1527
87990467
SH
1528 if (cl)
1529 cl->filter_cnt--;
1da177e4
LT
1530}
1531
1532static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1533{
1534 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 1535 struct htb_class *cl;
f4c1f3e0 1536 unsigned int i;
1da177e4
LT
1537
1538 if (arg->stop)
1539 return;
1540
f4c1f3e0 1541 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 1542 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1da177e4
LT
1543 if (arg->count < arg->skip) {
1544 arg->count++;
1545 continue;
1546 }
1547 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1548 arg->stop = 1;
1549 return;
1550 }
1551 arg->count++;
1552 }
1553 }
1554}
1555
20fea08b 1556static const struct Qdisc_class_ops htb_class_ops = {
1da177e4
LT
1557 .graft = htb_graft,
1558 .leaf = htb_leaf,
256d61b8 1559 .qlen_notify = htb_qlen_notify,
1da177e4
LT
1560 .get = htb_get,
1561 .put = htb_put,
1562 .change = htb_change_class,
1563 .delete = htb_delete,
1564 .walk = htb_walk,
1565 .tcf_chain = htb_find_tcf,
1566 .bind_tcf = htb_bind_filter,
1567 .unbind_tcf = htb_unbind_filter,
1568 .dump = htb_dump_class,
1569 .dump_stats = htb_dump_class_stats,
1570};
1571
20fea08b 1572static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1da177e4
LT
1573 .cl_ops = &htb_class_ops,
1574 .id = "htb",
1575 .priv_size = sizeof(struct htb_sched),
1576 .enqueue = htb_enqueue,
1577 .dequeue = htb_dequeue,
77be155c 1578 .peek = qdisc_peek_dequeued,
1da177e4
LT
1579 .init = htb_init,
1580 .reset = htb_reset,
1581 .destroy = htb_destroy,
1da177e4
LT
1582 .dump = htb_dump,
1583 .owner = THIS_MODULE,
1584};
1585
1586static int __init htb_module_init(void)
1587{
87990467 1588 return register_qdisc(&htb_qdisc_ops);
1da177e4 1589}
87990467 1590static void __exit htb_module_exit(void)
1da177e4 1591{
87990467 1592 unregister_qdisc(&htb_qdisc_ops);
1da177e4 1593}
87990467 1594
1da177e4
LT
1595module_init(htb_module_init)
1596module_exit(htb_module_exit)
1597MODULE_LICENSE("GPL");