]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/sched/sch_htb.c
sch_htb: fix doc warning in htb_deactivate_prios()
[mirror_ubuntu-jammy-kernel.git] / net / sched / sch_htb.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
87990467 2/*
1da177e4
LT
3 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
4 *
1da177e4
LT
5 * Authors: Martin Devera, <devik@cdi.cz>
6 *
7 * Credits (in time order) for older HTB versions:
8 * Stef Coene <stef.coene@docum.org>
9 * HTB support at LARTC mailing list
10297b99 10 * Ondrej Kraus, <krauso@barr.cz>
1da177e4
LT
11 * found missing INIT_QDISC(htb)
12 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
13 * helped a lot to locate nasty class stall bug
14 * Andi Kleen, Jamal Hadi, Bert Hubert
15 * code review and helpful comments on shaping
16 * Tomasz Wrona, <tw@eter.tym.pl>
17 * created test case so that I was able to fix nasty bug
18 * Wilfried Weissmann
19 * spotted bug in dequeue code and helped with fix
20 * Jiri Fojtasek
21 * fixed requeue routine
22 * and many others. thanks.
1da177e4 23 */
1da177e4 24#include <linux/module.h>
47083fc0 25#include <linux/moduleparam.h>
1da177e4
LT
26#include <linux/types.h>
27#include <linux/kernel.h>
1da177e4 28#include <linux/string.h>
1da177e4 29#include <linux/errno.h>
1da177e4
LT
30#include <linux/skbuff.h>
31#include <linux/list.h>
32#include <linux/compiler.h>
0ba48053 33#include <linux/rbtree.h>
1224736d 34#include <linux/workqueue.h>
5a0e3ad6 35#include <linux/slab.h>
dc5fc579 36#include <net/netlink.h>
292f1c7f 37#include <net/sch_generic.h>
1da177e4 38#include <net/pkt_sched.h>
cf1facda 39#include <net/pkt_cls.h>
1da177e4
LT
40
41/* HTB algorithm.
42 Author: devik@cdi.cz
43 ========================================================================
44 HTB is like TBF with multiple classes. It is also similar to CBQ because
10297b99 45 it allows to assign priority to each class in hierarchy.
1da177e4
LT
46 In fact it is another implementation of Floyd's formal sharing.
47
48 Levels:
10297b99 49 Each class is assigned level. Leaf has ALWAYS level 0 and root
1da177e4
LT
50 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51 one less than their parent.
52*/
53
47083fc0 54static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
37f2ad2b 55#define HTB_VER 0x30011 /* major must be matched with number supplied by TC as version */
1da177e4
LT
56
57#if HTB_VER >> 16 != TC_HTB_PROTOVER
58#error "Mismatched sch_htb.c and pkt_sch.h"
59#endif
60
47083fc0
JDB
61/* Module parameter and sysfs export */
62module_param (htb_hysteresis, int, 0640);
63MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64
64153ce0
ED
65static int htb_rate_est = 0; /* htb classes have a default rate estimator */
66module_param(htb_rate_est, int, 0640);
67MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
68
1da177e4
LT
69/* used internaly to keep status of single class */
70enum htb_cmode {
87990467
SH
71 HTB_CANT_SEND, /* class can't send and can't borrow */
72 HTB_MAY_BORROW, /* class can't send but may borrow */
73 HTB_CAN_SEND /* class can send */
1da177e4
LT
74};
75
c9364636
ED
76struct htb_prio {
77 union {
78 struct rb_root row;
79 struct rb_root feed;
80 };
81 struct rb_node *ptr;
82 /* When class changes from state 1->2 and disconnects from
83 * parent's feed then we lost ptr value and start from the
84 * first child again. Here we store classid of the
85 * last valid ptr (used when ptr is NULL).
86 */
87 u32 last_ptr_id;
88};
89
ca4ec90b
ED
90/* interior & leaf nodes; props specific to leaves are marked L:
91 * To reduce false sharing, place mostly read fields at beginning,
92 * and mostly written ones at the end.
93 */
87990467 94struct htb_class {
f4c1f3e0 95 struct Qdisc_class_common common;
ca4ec90b
ED
96 struct psched_ratecfg rate;
97 struct psched_ratecfg ceil;
98 s64 buffer, cbuffer;/* token bucket depth/rate */
99 s64 mbuffer; /* max wait time */
cbd37556 100 u32 prio; /* these two are used only by leaves... */
ca4ec90b
ED
101 int quantum; /* but stored for parent-to-leaf return */
102
25d8c0d5 103 struct tcf_proto __rcu *filter_list; /* class attached filters */
6529eaba 104 struct tcf_block *block;
ca4ec90b 105 int filter_cnt;
ca4ec90b
ED
106
107 int level; /* our level (see above) */
108 unsigned int children;
109 struct htb_class *parent; /* parent class */
110
1c0d32fd 111 struct net_rate_estimator __rcu *rate_est;
1da177e4 112
ca4ec90b
ED
113 /*
114 * Written often fields
115 */
116 struct gnet_stats_basic_packed bstats;
83271586 117 struct gnet_stats_basic_packed bstats_bias;
ca4ec90b 118 struct tc_htb_xstats xstats; /* our special stats */
87990467 119
ca4ec90b
ED
120 /* token bucket parameters */
121 s64 tokens, ctokens;/* current number of tokens */
122 s64 t_c; /* checkpoint time */
c19f7a34 123
87990467
SH
124 union {
125 struct htb_class_leaf {
c9364636
ED
126 int deficit[TC_HTB_MAXDEPTH];
127 struct Qdisc *q;
87990467
SH
128 } leaf;
129 struct htb_class_inner {
c9364636 130 struct htb_prio clprio[TC_HTB_NUMPRIO];
87990467 131 } inner;
11957be2 132 };
ca4ec90b 133 s64 pq_key;
87990467 134
ca4ec90b
ED
135 int prio_activity; /* for which prios are we active */
136 enum htb_cmode cmode; /* current mode of the class */
137 struct rb_node pq_node; /* node for event queue */
138 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
338ed9b4
ED
139
140 unsigned int drops ____cacheline_aligned_in_smp;
3c75f6ee 141 unsigned int overlimits;
1da177e4
LT
142};
143
c9364636
ED
144struct htb_level {
145 struct rb_root wait_pq;
146 struct htb_prio hprio[TC_HTB_NUMPRIO];
147};
148
87990467 149struct htb_sched {
f4c1f3e0 150 struct Qdisc_class_hash clhash;
c9364636
ED
151 int defcls; /* class where unclassified flows go to */
152 int rate2quantum; /* quant = rate / rate2quantum */
1da177e4 153
c9364636 154 /* filters for qdisc itself */
25d8c0d5 155 struct tcf_proto __rcu *filter_list;
6529eaba 156 struct tcf_block *block;
1da177e4 157
c9364636
ED
158#define HTB_WARN_TOOMANYEVENTS 0x1
159 unsigned int warned; /* only one warning */
160 int direct_qlen;
161 struct work_struct work;
1da177e4 162
c9364636 163 /* non shaped skbs; let them go directly thru */
48da34b7 164 struct qdisc_skb_head direct_queue;
b362487a
CW
165 u32 direct_pkts;
166 u32 overlimits;
1da177e4 167
c9364636 168 struct qdisc_watchdog watchdog;
1da177e4 169
c9364636 170 s64 now; /* cached dequeue time */
1da177e4 171
c9364636
ED
172 /* time of nearest event per level (row) */
173 s64 near_ev_cache[TC_HTB_MAXDEPTH];
87990467 174
c9364636 175 int row_mask[TC_HTB_MAXDEPTH];
e82181de 176
c9364636 177 struct htb_level hlevel[TC_HTB_MAXDEPTH];
d03b195b
MM
178
179 struct Qdisc **direct_qdiscs;
180 unsigned int num_direct_qdiscs;
181
182 bool offload;
1da177e4
LT
183};
184
1da177e4 185/* find class in global hash table using given handle */
87990467 186static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
1da177e4
LT
187{
188 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 189 struct Qdisc_class_common *clc;
0cef296d 190
f4c1f3e0
PM
191 clc = qdisc_class_find(&q->clhash, handle);
192 if (clc == NULL)
1da177e4 193 return NULL;
f4c1f3e0 194 return container_of(clc, struct htb_class, common);
1da177e4
LT
195}
196
143976ce
WC
197static unsigned long htb_search(struct Qdisc *sch, u32 handle)
198{
199 return (unsigned long)htb_find(handle, sch);
200}
1da177e4
LT
201/**
202 * htb_classify - classify a packet into class
203 *
204 * It returns NULL if the packet should be dropped or -1 if the packet
205 * should be passed directly thru. In all other cases leaf class is returned.
206 * We allow direct class selection by classid in priority. The we examine
207 * filters in qdisc and in inner nodes (if higher filter points to the inner
208 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
10297b99 209 * internal fifo (direct). These packets then go directly thru. If we still
25985edc 210 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
1da177e4
LT
211 * then finish and return direct queue.
212 */
cc7ec456 213#define HTB_DIRECT ((struct htb_class *)-1L)
1da177e4 214
87990467
SH
215static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
216 int *qerr)
1da177e4
LT
217{
218 struct htb_sched *q = qdisc_priv(sch);
219 struct htb_class *cl;
220 struct tcf_result res;
221 struct tcf_proto *tcf;
222 int result;
223
224 /* allow to select class by setting skb->priority to valid classid;
cc7ec456
ED
225 * note that nfmark can be used too by attaching filter fw with no
226 * rules in it
227 */
1da177e4 228 if (skb->priority == sch->handle)
87990467 229 return HTB_DIRECT; /* X:0 (direct flow) selected */
cc7ec456 230 cl = htb_find(skb->priority, sch);
29824310
HM
231 if (cl) {
232 if (cl->level == 0)
233 return cl;
234 /* Start with inner filter chain if a non-leaf class is selected */
25d8c0d5 235 tcf = rcu_dereference_bh(cl->filter_list);
29824310 236 } else {
25d8c0d5 237 tcf = rcu_dereference_bh(q->filter_list);
29824310 238 }
1da177e4 239
c27f339a 240 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
87d83093 241 while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
1da177e4
LT
242#ifdef CONFIG_NET_CLS_ACT
243 switch (result) {
244 case TC_ACT_QUEUED:
87990467 245 case TC_ACT_STOLEN:
e25ea21f 246 case TC_ACT_TRAP:
378a2f09 247 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
964201de 248 fallthrough;
1da177e4
LT
249 case TC_ACT_SHOT:
250 return NULL;
251 }
1da177e4 252#endif
cc7ec456
ED
253 cl = (void *)res.class;
254 if (!cl) {
1da177e4 255 if (res.classid == sch->handle)
87990467 256 return HTB_DIRECT; /* X:0 (direct flow) */
cc7ec456
ED
257 cl = htb_find(res.classid, sch);
258 if (!cl)
87990467 259 break; /* filter selected invalid classid */
1da177e4
LT
260 }
261 if (!cl->level)
87990467 262 return cl; /* we hit leaf; return it */
1da177e4
LT
263
264 /* we have got inner class; apply inner filter chain */
25d8c0d5 265 tcf = rcu_dereference_bh(cl->filter_list);
1da177e4
LT
266 }
267 /* classification failed; try to use default class */
87990467 268 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
1da177e4 269 if (!cl || cl->level)
87990467 270 return HTB_DIRECT; /* bad default .. this is safe bet */
1da177e4
LT
271 return cl;
272}
273
1da177e4
LT
274/**
275 * htb_add_to_id_tree - adds class to the round robin list
a10541f5
YK
276 * @root: the root of the tree
277 * @cl: the class to add
278 * @prio: the give prio in class
1da177e4
LT
279 *
280 * Routine adds class to the list (actually tree) sorted by classid.
281 * Make sure that class is not already on such list for given prio.
282 */
87990467
SH
283static void htb_add_to_id_tree(struct rb_root *root,
284 struct htb_class *cl, int prio)
1da177e4
LT
285{
286 struct rb_node **p = &root->rb_node, *parent = NULL;
3bf72957 287
1da177e4 288 while (*p) {
87990467
SH
289 struct htb_class *c;
290 parent = *p;
1da177e4 291 c = rb_entry(parent, struct htb_class, node[prio]);
3bf72957 292
f4c1f3e0 293 if (cl->common.classid > c->common.classid)
1da177e4 294 p = &parent->rb_right;
87990467 295 else
1da177e4
LT
296 p = &parent->rb_left;
297 }
298 rb_link_node(&cl->node[prio], parent, p);
299 rb_insert_color(&cl->node[prio], root);
300}
301
302/**
303 * htb_add_to_wait_tree - adds class to the event queue with delay
4d7efa73
YK
304 * @q: the priority event queue
305 * @cl: the class to add
306 * @delay: delay in microseconds
1da177e4
LT
307 *
308 * The class is added to priority event queue to indicate that class will
309 * change its mode in cl->pq_key microseconds. Make sure that class is not
310 * already in the queue.
311 */
87990467 312static void htb_add_to_wait_tree(struct htb_sched *q,
56b765b7 313 struct htb_class *cl, s64 delay)
1da177e4 314{
c9364636 315 struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
3bf72957 316
fb983d45
PM
317 cl->pq_key = q->now + delay;
318 if (cl->pq_key == q->now)
1da177e4
LT
319 cl->pq_key++;
320
321 /* update the nearest event cache */
fb983d45 322 if (q->near_ev_cache[cl->level] > cl->pq_key)
1da177e4 323 q->near_ev_cache[cl->level] = cl->pq_key;
87990467 324
1da177e4 325 while (*p) {
87990467
SH
326 struct htb_class *c;
327 parent = *p;
1da177e4 328 c = rb_entry(parent, struct htb_class, pq_node);
fb983d45 329 if (cl->pq_key >= c->pq_key)
1da177e4 330 p = &parent->rb_right;
87990467 331 else
1da177e4
LT
332 p = &parent->rb_left;
333 }
334 rb_link_node(&cl->pq_node, parent, p);
c9364636 335 rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
1da177e4
LT
336}
337
338/**
339 * htb_next_rb_node - finds next node in binary tree
274e5d0e 340 * @n: the current node in binary tree
1da177e4
LT
341 *
342 * When we are past last key we return NULL.
343 * Average complexity is 2 steps per call.
344 */
3696f625 345static inline void htb_next_rb_node(struct rb_node **n)
1da177e4
LT
346{
347 *n = rb_next(*n);
348}
349
350/**
351 * htb_add_class_to_row - add class to its row
996bccc3
YK
352 * @q: the priority event queue
353 * @cl: the class to add
354 * @mask: the given priorities in class in bitmap
1da177e4
LT
355 *
356 * The class is added to row at priorities marked in mask.
357 * It does nothing if mask == 0.
358 */
87990467
SH
359static inline void htb_add_class_to_row(struct htb_sched *q,
360 struct htb_class *cl, int mask)
1da177e4 361{
1da177e4
LT
362 q->row_mask[cl->level] |= mask;
363 while (mask) {
364 int prio = ffz(~mask);
365 mask &= ~(1 << prio);
c9364636 366 htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
1da177e4
LT
367 }
368}
369
3696f625
SH
370/* If this triggers, it is a bug in this code, but it need not be fatal */
371static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
372{
81771b3b 373 if (RB_EMPTY_NODE(rb)) {
3696f625
SH
374 WARN_ON(1);
375 } else {
376 rb_erase(rb, root);
377 RB_CLEAR_NODE(rb);
378 }
379}
380
381
1da177e4
LT
382/**
383 * htb_remove_class_from_row - removes class from its row
5f8c6d05
YK
384 * @q: the priority event queue
385 * @cl: the class to add
386 * @mask: the given priorities in class in bitmap
1da177e4
LT
387 *
388 * The class is removed from row at priorities marked in mask.
389 * It does nothing if mask == 0.
390 */
87990467
SH
391static inline void htb_remove_class_from_row(struct htb_sched *q,
392 struct htb_class *cl, int mask)
1da177e4
LT
393{
394 int m = 0;
c9364636 395 struct htb_level *hlevel = &q->hlevel[cl->level];
3bf72957 396
1da177e4
LT
397 while (mask) {
398 int prio = ffz(~mask);
c9364636 399 struct htb_prio *hprio = &hlevel->hprio[prio];
3696f625 400
1da177e4 401 mask &= ~(1 << prio);
c9364636
ED
402 if (hprio->ptr == cl->node + prio)
403 htb_next_rb_node(&hprio->ptr);
3696f625 404
c9364636
ED
405 htb_safe_rb_erase(cl->node + prio, &hprio->row);
406 if (!hprio->row.rb_node)
1da177e4
LT
407 m |= 1 << prio;
408 }
1da177e4
LT
409 q->row_mask[cl->level] &= ~m;
410}
411
412/**
413 * htb_activate_prios - creates active classe's feed chain
876b5fc0
YK
414 * @q: the priority event queue
415 * @cl: the class to activate
1da177e4
LT
416 *
417 * The class is connected to ancestors and/or appropriate rows
10297b99 418 * for priorities it is participating on. cl->cmode must be new
1da177e4
LT
419 * (activated) mode. It does nothing if cl->prio_activity == 0.
420 */
87990467 421static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
1da177e4
LT
422{
423 struct htb_class *p = cl->parent;
87990467 424 long m, mask = cl->prio_activity;
1da177e4
LT
425
426 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
427 m = mask;
428 while (m) {
1da177e4
LT
429 int prio = ffz(~m);
430 m &= ~(1 << prio);
87990467 431
11957be2 432 if (p->inner.clprio[prio].feed.rb_node)
1da177e4 433 /* parent already has its feed in use so that
cc7ec456
ED
434 * reset bit in mask as parent is already ok
435 */
1da177e4 436 mask &= ~(1 << prio);
87990467 437
11957be2 438 htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
1da177e4 439 }
1da177e4 440 p->prio_activity |= mask;
87990467
SH
441 cl = p;
442 p = cl->parent;
3bf72957 443
1da177e4
LT
444 }
445 if (cl->cmode == HTB_CAN_SEND && mask)
87990467 446 htb_add_class_to_row(q, cl, mask);
1da177e4
LT
447}
448
449/**
450 * htb_deactivate_prios - remove class from feed chain
4113be20
YK
451 * @q: the priority event queue
452 * @cl: the class to deactivate
1da177e4 453 *
10297b99 454 * cl->cmode must represent old mode (before deactivation). It does
1da177e4
LT
455 * nothing if cl->prio_activity == 0. Class is removed from all feed
456 * chains and rows.
457 */
458static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
459{
460 struct htb_class *p = cl->parent;
87990467 461 long m, mask = cl->prio_activity;
1da177e4
LT
462
463 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
464 m = mask;
465 mask = 0;
1da177e4
LT
466 while (m) {
467 int prio = ffz(~m);
468 m &= ~(1 << prio);
87990467 469
11957be2 470 if (p->inner.clprio[prio].ptr == cl->node + prio) {
1da177e4 471 /* we are removing child which is pointed to from
cc7ec456
ED
472 * parent feed - forget the pointer but remember
473 * classid
474 */
11957be2
CW
475 p->inner.clprio[prio].last_ptr_id = cl->common.classid;
476 p->inner.clprio[prio].ptr = NULL;
1da177e4 477 }
87990467 478
c9364636 479 htb_safe_rb_erase(cl->node + prio,
11957be2 480 &p->inner.clprio[prio].feed);
87990467 481
11957be2 482 if (!p->inner.clprio[prio].feed.rb_node)
1da177e4
LT
483 mask |= 1 << prio;
484 }
3bf72957 485
1da177e4 486 p->prio_activity &= ~mask;
87990467
SH
487 cl = p;
488 p = cl->parent;
3bf72957 489
1da177e4 490 }
87990467
SH
491 if (cl->cmode == HTB_CAN_SEND && mask)
492 htb_remove_class_from_row(q, cl, mask);
1da177e4
LT
493}
494
56b765b7 495static inline s64 htb_lowater(const struct htb_class *cl)
18a63e86 496{
47083fc0
JDB
497 if (htb_hysteresis)
498 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
499 else
500 return 0;
18a63e86 501}
56b765b7 502static inline s64 htb_hiwater(const struct htb_class *cl)
18a63e86 503{
47083fc0
JDB
504 if (htb_hysteresis)
505 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
506 else
507 return 0;
18a63e86 508}
47083fc0 509
18a63e86 510
1da177e4
LT
511/**
512 * htb_class_mode - computes and returns current class mode
513 *
514 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
515 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
10297b99 516 * from now to time when cl will change its state.
1da177e4 517 * Also it is worth to note that class mode doesn't change simply
10297b99 518 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
1da177e4
LT
519 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
520 * mode transitions per time unit. The speed gain is about 1/6.
521 */
87990467 522static inline enum htb_cmode
56b765b7 523htb_class_mode(struct htb_class *cl, s64 *diff)
1da177e4 524{
56b765b7 525 s64 toks;
1da177e4 526
87990467
SH
527 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
528 *diff = -toks;
529 return HTB_CANT_SEND;
530 }
18a63e86 531
87990467
SH
532 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
533 return HTB_CAN_SEND;
1da177e4 534
87990467
SH
535 *diff = -toks;
536 return HTB_MAY_BORROW;
1da177e4
LT
537}
538
539/**
540 * htb_change_class_mode - changes classe's mode
541 *
542 * This should be the only way how to change classe's mode under normal
37f2ad2b 543 * circumstances. Routine will update feed lists linkage, change mode
1da177e4
LT
544 * and add class to the wait event queue if appropriate. New mode should
545 * be different from old one and cl->pq_key has to be valid if changing
546 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
547 */
87990467 548static void
56b765b7 549htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
87990467
SH
550{
551 enum htb_cmode new_mode = htb_class_mode(cl, diff);
1da177e4
LT
552
553 if (new_mode == cl->cmode)
87990467
SH
554 return;
555
b362487a 556 if (new_mode == HTB_CANT_SEND) {
3c75f6ee 557 cl->overlimits++;
b362487a
CW
558 q->overlimits++;
559 }
3c75f6ee 560
87990467
SH
561 if (cl->prio_activity) { /* not necessary: speed optimization */
562 if (cl->cmode != HTB_CANT_SEND)
563 htb_deactivate_prios(q, cl);
1da177e4 564 cl->cmode = new_mode;
87990467
SH
565 if (new_mode != HTB_CANT_SEND)
566 htb_activate_prios(q, cl);
567 } else
1da177e4
LT
568 cl->cmode = new_mode;
569}
570
571/**
10297b99 572 * htb_activate - inserts leaf cl into appropriate active feeds
1da177e4
LT
573 *
574 * Routine learns (new) priority of leaf and activates feed chain
575 * for the prio. It can be called on already active leaf safely.
576 * It also adds leaf into droplist.
577 */
87990467 578static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
1da177e4 579{
11957be2 580 WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
3bf72957 581
1da177e4 582 if (!cl->prio_activity) {
c19f7a34 583 cl->prio_activity = 1 << cl->prio;
87990467 584 htb_activate_prios(q, cl);
1da177e4
LT
585 }
586}
587
588/**
10297b99 589 * htb_deactivate - remove leaf cl from active feeds
1da177e4
LT
590 *
591 * Make sure that leaf is active. In the other words it can't be called
592 * with non-active leaf. It also removes class from the drop list.
593 */
87990467 594static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
1da177e4 595{
547b792c 596 WARN_ON(!cl->prio_activity);
3bf72957 597
87990467 598 htb_deactivate_prios(q, cl);
1da177e4 599 cl->prio_activity = 0;
1da177e4
LT
600}
601
520ac30f
ED
602static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
603 struct sk_buff **to_free)
1da177e4 604{
3f649ab7 605 int ret;
f6bab199 606 unsigned int len = qdisc_pkt_len(skb);
87990467
SH
607 struct htb_sched *q = qdisc_priv(sch);
608 struct htb_class *cl = htb_classify(skb, sch, &ret);
609
610 if (cl == HTB_DIRECT) {
611 /* enqueue to helper queue */
612 if (q->direct_queue.qlen < q->direct_qlen) {
aea890b8 613 __qdisc_enqueue_tail(skb, &q->direct_queue);
87990467
SH
614 q->direct_pkts++;
615 } else {
520ac30f 616 return qdisc_drop(skb, sch, to_free);
87990467 617 }
1da177e4 618#ifdef CONFIG_NET_CLS_ACT
87990467 619 } else if (!cl) {
c27f339a 620 if (ret & __NET_XMIT_BYPASS)
25331d6c 621 qdisc_qstats_drop(sch);
520ac30f 622 __qdisc_drop(skb, to_free);
87990467 623 return ret;
1da177e4 624#endif
11957be2 625 } else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
520ac30f 626 to_free)) != NET_XMIT_SUCCESS) {
378a2f09 627 if (net_xmit_drop_count(ret)) {
25331d6c 628 qdisc_qstats_drop(sch);
338ed9b4 629 cl->drops++;
378a2f09 630 }
69747650 631 return ret;
87990467 632 } else {
87990467
SH
633 htb_activate(q, cl);
634 }
635
f6bab199 636 sch->qstats.backlog += len;
87990467 637 sch->q.qlen++;
87990467 638 return NET_XMIT_SUCCESS;
1da177e4
LT
639}
640
56b765b7 641static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
59e4220a 642{
56b765b7 643 s64 toks = diff + cl->tokens;
59e4220a
JP
644
645 if (toks > cl->buffer)
646 toks = cl->buffer;
292f1c7f 647 toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
59e4220a
JP
648 if (toks <= -cl->mbuffer)
649 toks = 1 - cl->mbuffer;
650
651 cl->tokens = toks;
652}
653
56b765b7 654static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
59e4220a 655{
56b765b7 656 s64 toks = diff + cl->ctokens;
59e4220a
JP
657
658 if (toks > cl->cbuffer)
659 toks = cl->cbuffer;
292f1c7f 660 toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
59e4220a
JP
661 if (toks <= -cl->mbuffer)
662 toks = 1 - cl->mbuffer;
663
664 cl->ctokens = toks;
665}
666
1da177e4
LT
667/**
668 * htb_charge_class - charges amount "bytes" to leaf and ancestors
669 *
670 * Routine assumes that packet "bytes" long was dequeued from leaf cl
671 * borrowing from "level". It accounts bytes to ceil leaky bucket for
672 * leaf and all ancestors and to rate bucket for ancestors at levels
673 * "level" and higher. It also handles possible change of mode resulting
674 * from the update. Note that mode can also increase here (MAY_BORROW to
675 * CAN_SEND) because we can use more precise clock that event queue here.
676 * In such case we remove class from event queue first.
677 */
87990467 678static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
c9726d68 679 int level, struct sk_buff *skb)
87990467 680{
0abf77e5 681 int bytes = qdisc_pkt_len(skb);
1da177e4 682 enum htb_cmode old_mode;
56b765b7 683 s64 diff;
1da177e4
LT
684
685 while (cl) {
56b765b7 686 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
1da177e4 687 if (cl->level >= level) {
87990467
SH
688 if (cl->level == level)
689 cl->xstats.lends++;
59e4220a 690 htb_accnt_tokens(cl, bytes, diff);
1da177e4
LT
691 } else {
692 cl->xstats.borrows++;
87990467 693 cl->tokens += diff; /* we moved t_c; update tokens */
1da177e4 694 }
59e4220a 695 htb_accnt_ctokens(cl, bytes, diff);
1da177e4 696 cl->t_c = q->now;
1da177e4 697
87990467
SH
698 old_mode = cl->cmode;
699 diff = 0;
700 htb_change_class_mode(q, cl, &diff);
1da177e4
LT
701 if (old_mode != cl->cmode) {
702 if (old_mode != HTB_CAN_SEND)
c9364636 703 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
1da177e4 704 if (cl->cmode != HTB_CAN_SEND)
87990467 705 htb_add_to_wait_tree(q, cl, diff);
1da177e4 706 }
1da177e4 707
bfe0d029
ED
708 /* update basic stats except for leaves which are already updated */
709 if (cl->level)
710 bstats_update(&cl->bstats, skb);
711
1da177e4
LT
712 cl = cl->parent;
713 }
714}
715
716/**
717 * htb_do_events - make mode changes to classes at the level
718 *
fb983d45 719 * Scans event queue for pending events and applies them. Returns time of
1224736d 720 * next pending event (0 for no event in pq, q->now for too many events).
fb983d45 721 * Note: Applied are events whose have cl->pq_key <= q->now.
1da177e4 722 */
c9364636 723static s64 htb_do_events(struct htb_sched *q, const int level,
5343a7f8 724 unsigned long start)
1da177e4 725{
8f3ea33a 726 /* don't run for longer than 2 jiffies; 2 is used instead of
cc7ec456
ED
727 * 1 to simplify things when jiffy is going to be incremented
728 * too soon
729 */
a73be040 730 unsigned long stop_at = start + 2;
c9364636
ED
731 struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
732
8f3ea33a 733 while (time_before(jiffies, stop_at)) {
1da177e4 734 struct htb_class *cl;
56b765b7 735 s64 diff;
c9364636 736 struct rb_node *p = rb_first(wait_pq);
30bdbe39 737
87990467
SH
738 if (!p)
739 return 0;
1da177e4
LT
740
741 cl = rb_entry(p, struct htb_class, pq_node);
fb983d45
PM
742 if (cl->pq_key > q->now)
743 return cl->pq_key;
744
c9364636 745 htb_safe_rb_erase(p, wait_pq);
56b765b7 746 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
87990467 747 htb_change_class_mode(q, cl, &diff);
1da177e4 748 if (cl->cmode != HTB_CAN_SEND)
87990467 749 htb_add_to_wait_tree(q, cl, diff);
1da177e4 750 }
1224736d
JP
751
752 /* too much load - let's continue after a break for scheduling */
e82181de 753 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
c17988a9 754 pr_warn("htb: too many events!\n");
e82181de
JP
755 q->warned |= HTB_WARN_TOOMANYEVENTS;
756 }
1224736d
JP
757
758 return q->now;
1da177e4
LT
759}
760
761/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
cc7ec456
ED
762 * is no such one exists.
763 */
87990467
SH
764static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
765 u32 id)
1da177e4
LT
766{
767 struct rb_node *r = NULL;
768 while (n) {
87990467
SH
769 struct htb_class *cl =
770 rb_entry(n, struct htb_class, node[prio]);
87990467 771
f4c1f3e0 772 if (id > cl->common.classid) {
1da177e4 773 n = n->rb_right;
1b5c0077 774 } else if (id < cl->common.classid) {
1da177e4
LT
775 r = n;
776 n = n->rb_left;
1b5c0077
JP
777 } else {
778 return n;
1da177e4
LT
779 }
780 }
781 return r;
782}
783
784/**
785 * htb_lookup_leaf - returns next leaf class in DRR order
786 *
787 * Find leaf where current feed pointers points to.
788 */
c9364636 789static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
1da177e4
LT
790{
791 int i;
792 struct {
793 struct rb_node *root;
794 struct rb_node **pptr;
795 u32 *pid;
87990467
SH
796 } stk[TC_HTB_MAXDEPTH], *sp = stk;
797
c9364636
ED
798 BUG_ON(!hprio->row.rb_node);
799 sp->root = hprio->row.rb_node;
800 sp->pptr = &hprio->ptr;
801 sp->pid = &hprio->last_ptr_id;
1da177e4
LT
802
803 for (i = 0; i < 65535; i++) {
87990467 804 if (!*sp->pptr && *sp->pid) {
10297b99 805 /* ptr was invalidated but id is valid - try to recover
cc7ec456
ED
806 * the original or next ptr
807 */
87990467
SH
808 *sp->pptr =
809 htb_id_find_next_upper(prio, sp->root, *sp->pid);
1da177e4 810 }
87990467 811 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
cc7ec456
ED
812 * can become out of date quickly
813 */
87990467 814 if (!*sp->pptr) { /* we are at right end; rewind & go up */
1da177e4 815 *sp->pptr = sp->root;
87990467 816 while ((*sp->pptr)->rb_left)
1da177e4
LT
817 *sp->pptr = (*sp->pptr)->rb_left;
818 if (sp > stk) {
819 sp--;
512bb43e
JP
820 if (!*sp->pptr) {
821 WARN_ON(1);
87990467 822 return NULL;
512bb43e 823 }
87990467 824 htb_next_rb_node(sp->pptr);
1da177e4
LT
825 }
826 } else {
827 struct htb_class *cl;
c9364636
ED
828 struct htb_prio *clp;
829
87990467
SH
830 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
831 if (!cl->level)
1da177e4 832 return cl;
11957be2 833 clp = &cl->inner.clprio[prio];
c9364636
ED
834 (++sp)->root = clp->feed.rb_node;
835 sp->pptr = &clp->ptr;
836 sp->pid = &clp->last_ptr_id;
1da177e4
LT
837 }
838 }
547b792c 839 WARN_ON(1);
1da177e4
LT
840 return NULL;
841}
842
843/* dequeues packet at given priority and level; call only if
cc7ec456
ED
844 * you are sure that there is active class at prio/level
845 */
c9364636
ED
846static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
847 const int level)
1da177e4
LT
848{
849 struct sk_buff *skb = NULL;
87990467 850 struct htb_class *cl, *start;
c9364636
ED
851 struct htb_level *hlevel = &q->hlevel[level];
852 struct htb_prio *hprio = &hlevel->hprio[prio];
853
1da177e4 854 /* look initial class up in the row */
c9364636 855 start = cl = htb_lookup_leaf(hprio, prio);
87990467 856
1da177e4
LT
857 do {
858next:
512bb43e 859 if (unlikely(!cl))
87990467 860 return NULL;
1da177e4
LT
861
862 /* class can be empty - it is unlikely but can be true if leaf
cc7ec456
ED
863 * qdisc drops packets in enqueue routine or if someone used
864 * graft operation on the leaf since last dequeue;
865 * simply deactivate and skip such class
866 */
11957be2 867 if (unlikely(cl->leaf.q->q.qlen == 0)) {
1da177e4 868 struct htb_class *next;
87990467 869 htb_deactivate(q, cl);
1da177e4
LT
870
871 /* row/level might become empty */
872 if ((q->row_mask[level] & (1 << prio)) == 0)
87990467 873 return NULL;
1da177e4 874
c9364636 875 next = htb_lookup_leaf(hprio, prio);
87990467
SH
876
877 if (cl == start) /* fix start if we just deleted it */
1da177e4
LT
878 start = next;
879 cl = next;
880 goto next;
881 }
87990467 882
11957be2 883 skb = cl->leaf.q->dequeue(cl->leaf.q);
87990467 884 if (likely(skb != NULL))
1da177e4 885 break;
633fe66e 886
11957be2
CW
887 qdisc_warn_nonwc("htb", cl->leaf.q);
888 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
c9364636
ED
889 &q->hlevel[0].hprio[prio].ptr);
890 cl = htb_lookup_leaf(hprio, prio);
1da177e4
LT
891
892 } while (cl != start);
893
894 if (likely(skb != NULL)) {
196d97f6 895 bstats_update(&cl->bstats, skb);
11957be2
CW
896 cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
897 if (cl->leaf.deficit[level] < 0) {
898 cl->leaf.deficit[level] += cl->quantum;
899 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
c9364636 900 &q->hlevel[0].hprio[prio].ptr);
1da177e4
LT
901 }
902 /* this used to be after charge_class but this constelation
cc7ec456
ED
903 * gives us slightly better performance
904 */
11957be2 905 if (!cl->leaf.q->q.qlen)
87990467 906 htb_deactivate(q, cl);
c9726d68 907 htb_charge_class(q, cl, level, skb);
1da177e4
LT
908 }
909 return skb;
910}
911
1da177e4
LT
912static struct sk_buff *htb_dequeue(struct Qdisc *sch)
913{
9190b3b3 914 struct sk_buff *skb;
1da177e4
LT
915 struct htb_sched *q = qdisc_priv(sch);
916 int level;
5343a7f8 917 s64 next_event;
a73be040 918 unsigned long start_at;
1da177e4
LT
919
920 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
48da34b7 921 skb = __qdisc_dequeue_head(&q->direct_queue);
87990467 922 if (skb != NULL) {
9190b3b3
ED
923ok:
924 qdisc_bstats_update(sch, skb);
431e3a8e 925 qdisc_qstats_backlog_dec(sch, skb);
1da177e4
LT
926 sch->q.qlen--;
927 return skb;
928 }
929
87990467
SH
930 if (!sch->q.qlen)
931 goto fin;
d2de875c 932 q->now = ktime_get_ns();
a73be040 933 start_at = jiffies;
1da177e4 934
d2fe85da 935 next_event = q->now + 5LLU * NSEC_PER_SEC;
633fe66e 936
1da177e4
LT
937 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
938 /* common case optimization - skip event handler quickly */
939 int m;
c9364636 940 s64 event = q->near_ev_cache[level];
fb983d45 941
c9364636 942 if (q->now >= event) {
a73be040 943 event = htb_do_events(q, level, start_at);
2e4b3b0e 944 if (!event)
56b765b7 945 event = q->now + NSEC_PER_SEC;
2e4b3b0e 946 q->near_ev_cache[level] = event;
c9364636 947 }
fb983d45 948
c0851347 949 if (next_event > event)
fb983d45 950 next_event = event;
87990467 951
1da177e4
LT
952 m = ~q->row_mask[level];
953 while (m != (int)(-1)) {
87990467 954 int prio = ffz(m);
cc7ec456 955
1da177e4 956 m |= 1 << prio;
87990467 957 skb = htb_dequeue_tree(q, prio, level);
9190b3b3
ED
958 if (likely(skb != NULL))
959 goto ok;
1da177e4
LT
960 }
961 }
a9efad8b 962 if (likely(next_event > q->now))
45f50bed 963 qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
a9efad8b 964 else
1224736d 965 schedule_work(&q->work);
1da177e4 966fin:
1da177e4
LT
967 return skb;
968}
969
1da177e4
LT
970/* reset all classes */
971/* always caled under BH & queue lock */
87990467 972static void htb_reset(struct Qdisc *sch)
1da177e4
LT
973{
974 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 975 struct htb_class *cl;
f4c1f3e0 976 unsigned int i;
0cef296d 977
f4c1f3e0 978 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 979 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1da177e4 980 if (cl->level)
11957be2 981 memset(&cl->inner, 0, sizeof(cl->inner));
1da177e4 982 else {
d03b195b 983 if (cl->leaf.q && !q->offload)
11957be2 984 qdisc_reset(cl->leaf.q);
1da177e4
LT
985 }
986 cl->prio_activity = 0;
987 cl->cmode = HTB_CAN_SEND;
1da177e4
LT
988 }
989 }
fb983d45 990 qdisc_watchdog_cancel(&q->watchdog);
a5a9f534 991 __qdisc_reset_queue(&q->direct_queue);
1da177e4 992 sch->q.qlen = 0;
431e3a8e 993 sch->qstats.backlog = 0;
c9364636 994 memset(q->hlevel, 0, sizeof(q->hlevel));
87990467 995 memset(q->row_mask, 0, sizeof(q->row_mask));
1da177e4
LT
996}
997
27a3421e
PM
998static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
999 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
1000 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
1001 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1002 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
6906f4ed 1003 [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
df62cdf3
ED
1004 [TCA_HTB_RATE64] = { .type = NLA_U64 },
1005 [TCA_HTB_CEIL64] = { .type = NLA_U64 },
d03b195b 1006 [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
27a3421e
PM
1007};
1008
1224736d
JP
1009static void htb_work_func(struct work_struct *work)
1010{
1011 struct htb_sched *q = container_of(work, struct htb_sched, work);
1012 struct Qdisc *sch = q->watchdog.qdisc;
1013
0ee13627 1014 rcu_read_lock();
1224736d 1015 __netif_schedule(qdisc_root(sch));
0ee13627 1016 rcu_read_unlock();
1224736d
JP
1017}
1018
d03b195b
MM
1019static void htb_set_lockdep_class_child(struct Qdisc *q)
1020{
1021 static struct lock_class_key child_key;
1022
1023 lockdep_set_class(qdisc_lock(q), &child_key);
1024}
1025
1026static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1027{
1028 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1029}
1030
e63d7dfd
AA
1031static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1032 struct netlink_ext_ack *extack)
1da177e4 1033{
d03b195b
MM
1034 struct net_device *dev = qdisc_dev(sch);
1035 struct tc_htb_qopt_offload offload_opt;
1da177e4 1036 struct htb_sched *q = qdisc_priv(sch);
6906f4ed 1037 struct nlattr *tb[TCA_HTB_MAX + 1];
1da177e4 1038 struct tc_htb_glob *gopt;
d03b195b 1039 unsigned int ntx;
fb3a3e37 1040 bool offload;
cee63723 1041 int err;
cee63723 1042
88c2ace6
NA
1043 qdisc_watchdog_init(&q->watchdog, sch);
1044 INIT_WORK(&q->work, htb_work_func);
1045
cee63723
PM
1046 if (!opt)
1047 return -EINVAL;
1048
8d1a77f9 1049 err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
6529eaba
JP
1050 if (err)
1051 return err;
1052
8cb08174
JB
1053 err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1054 NULL);
cee63723
PM
1055 if (err < 0)
1056 return err;
1057
6906f4ed 1058 if (!tb[TCA_HTB_INIT])
1da177e4 1059 return -EINVAL;
6906f4ed 1060
1e90474c 1061 gopt = nla_data(tb[TCA_HTB_INIT]);
6906f4ed 1062 if (gopt->version != HTB_VER >> 16)
1da177e4 1063 return -EINVAL;
1da177e4 1064
fb3a3e37 1065 offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
d03b195b 1066
fb3a3e37 1067 if (offload) {
d03b195b
MM
1068 if (sch->parent != TC_H_ROOT)
1069 return -EOPNOTSUPP;
1070
1071 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
1072 return -EOPNOTSUPP;
1073
1074 q->num_direct_qdiscs = dev->real_num_tx_queues;
1075 q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
1076 sizeof(*q->direct_qdiscs),
1077 GFP_KERNEL);
1078 if (!q->direct_qdiscs)
1079 return -ENOMEM;
1080 }
1081
f4c1f3e0
PM
1082 err = qdisc_class_hash_init(&q->clhash);
1083 if (err < 0)
d03b195b 1084 goto err_free_direct_qdiscs;
1da177e4 1085
48da34b7 1086 qdisc_skb_head_init(&q->direct_queue);
1da177e4 1087
6906f4ed
ED
1088 if (tb[TCA_HTB_DIRECT_QLEN])
1089 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
348e3435 1090 else
6906f4ed 1091 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
348e3435 1092
1da177e4
LT
1093 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1094 q->rate2quantum = 1;
1095 q->defcls = gopt->defcls;
1096
fb3a3e37 1097 if (!offload)
d03b195b
MM
1098 return 0;
1099
1100 for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1101 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1102 struct Qdisc *qdisc;
1103
1104 qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1105 TC_H_MAKE(sch->handle, 0), extack);
1106 if (!qdisc) {
1107 err = -ENOMEM;
1108 goto err_free_qdiscs;
1109 }
1110
1111 htb_set_lockdep_class_child(qdisc);
1112 q->direct_qdiscs[ntx] = qdisc;
1113 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1114 }
1115
1116 sch->flags |= TCQ_F_MQROOT;
1117
1118 offload_opt = (struct tc_htb_qopt_offload) {
1119 .command = TC_HTB_CREATE,
1120 .parent_classid = TC_H_MAJ(sch->handle) >> 16,
1121 .classid = TC_H_MIN(q->defcls),
1122 .extack = extack,
1123 };
1124 err = htb_offload(dev, &offload_opt);
1125 if (err)
1126 goto err_free_qdiscs;
1127
fb3a3e37
MM
1128 /* Defer this assignment, so that htb_destroy skips offload-related
1129 * parts (especially calling ndo_setup_tc) on errors.
1130 */
1131 q->offload = true;
1132
1da177e4 1133 return 0;
d03b195b
MM
1134
1135err_free_qdiscs:
d03b195b
MM
1136 for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx];
1137 ntx++)
1138 qdisc_put(q->direct_qdiscs[ntx]);
1139
1140 qdisc_class_hash_destroy(&q->clhash);
1141 /* Prevent use-after-free and double-free when htb_destroy gets called.
1142 */
1143 q->clhash.hash = NULL;
1144 q->clhash.hashsize = 0;
1145
1146err_free_direct_qdiscs:
1147 kfree(q->direct_qdiscs);
1148 q->direct_qdiscs = NULL;
1149 return err;
1150}
1151
1152static void htb_attach_offload(struct Qdisc *sch)
1153{
1154 struct net_device *dev = qdisc_dev(sch);
1155 struct htb_sched *q = qdisc_priv(sch);
1156 unsigned int ntx;
1157
1158 for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1159 struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1160
1161 old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1162 qdisc_put(old);
1163 qdisc_hash_add(qdisc, false);
1164 }
1165 for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1166 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1167 struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1168
1169 qdisc_put(old);
1170 }
1171
1172 kfree(q->direct_qdiscs);
1173 q->direct_qdiscs = NULL;
1174}
1175
1176static void htb_attach_software(struct Qdisc *sch)
1177{
1178 struct net_device *dev = qdisc_dev(sch);
1179 unsigned int ntx;
1180
1181 /* Resemble qdisc_graft behavior. */
1182 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1183 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1184 struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1185
1186 qdisc_refcount_inc(sch);
1187
1188 qdisc_put(old);
1189 }
1190}
1191
1192static void htb_attach(struct Qdisc *sch)
1193{
1194 struct htb_sched *q = qdisc_priv(sch);
1195
1196 if (q->offload)
1197 htb_attach_offload(sch);
1198 else
1199 htb_attach_software(sch);
1da177e4
LT
1200}
1201
1202static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1203{
1204 struct htb_sched *q = qdisc_priv(sch);
4b3550ef 1205 struct nlattr *nest;
1da177e4 1206 struct tc_htb_glob gopt;
4b3550ef 1207
d03b195b
MM
1208 if (q->offload)
1209 sch->flags |= TCQ_F_OFFLOADED;
1210 else
1211 sch->flags &= ~TCQ_F_OFFLOADED;
1212
b362487a 1213 sch->qstats.overlimits = q->overlimits;
6f542efc
ED
1214 /* Its safe to not acquire qdisc lock. As we hold RTNL,
1215 * no change can happen on the qdisc parameters.
1216 */
1da177e4 1217
4b3550ef 1218 gopt.direct_pkts = q->direct_pkts;
1da177e4
LT
1219 gopt.version = HTB_VER;
1220 gopt.rate2quantum = q->rate2quantum;
1221 gopt.defcls = q->defcls;
3bf72957 1222 gopt.debug = 0;
4b3550ef 1223
ae0be8de 1224 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
4b3550ef
PM
1225 if (nest == NULL)
1226 goto nla_put_failure;
6906f4ed
ED
1227 if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1228 nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1b34ec43 1229 goto nla_put_failure;
d03b195b
MM
1230 if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1231 goto nla_put_failure;
4b3550ef 1232
6f542efc 1233 return nla_nest_end(skb, nest);
4b3550ef 1234
1e90474c 1235nla_put_failure:
4b3550ef 1236 nla_nest_cancel(skb, nest);
1da177e4
LT
1237 return -1;
1238}
1239
1240static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
87990467 1241 struct sk_buff *skb, struct tcmsg *tcm)
1da177e4 1242{
87990467 1243 struct htb_class *cl = (struct htb_class *)arg;
83271586 1244 struct htb_sched *q = qdisc_priv(sch);
4b3550ef 1245 struct nlattr *nest;
1da177e4
LT
1246 struct tc_htb_opt opt;
1247
6f542efc
ED
1248 /* Its safe to not acquire qdisc lock. As we hold RTNL,
1249 * no change can happen on the class parameters.
1250 */
f4c1f3e0
PM
1251 tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1252 tcm->tcm_handle = cl->common.classid;
11957be2
CW
1253 if (!cl->level && cl->leaf.q)
1254 tcm->tcm_info = cl->leaf.q->handle;
1da177e4 1255
ae0be8de 1256 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
4b3550ef
PM
1257 if (nest == NULL)
1258 goto nla_put_failure;
1da177e4 1259
87990467 1260 memset(&opt, 0, sizeof(opt));
1da177e4 1261
01cb71d2 1262 psched_ratecfg_getrate(&opt.rate, &cl->rate);
9c10f411 1263 opt.buffer = PSCHED_NS2TICKS(cl->buffer);
01cb71d2 1264 psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
9c10f411 1265 opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
c19f7a34
JP
1266 opt.quantum = cl->quantum;
1267 opt.prio = cl->prio;
87990467 1268 opt.level = cl->level;
1b34ec43
DM
1269 if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1270 goto nla_put_failure;
83271586
MM
1271 if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1272 goto nla_put_failure;
df62cdf3 1273 if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
2a51c1e8
ND
1274 nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1275 TCA_HTB_PAD))
df62cdf3
ED
1276 goto nla_put_failure;
1277 if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
2a51c1e8
ND
1278 nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1279 TCA_HTB_PAD))
df62cdf3 1280 goto nla_put_failure;
4b3550ef 1281
6f542efc 1282 return nla_nest_end(skb, nest);
4b3550ef 1283
1e90474c 1284nla_put_failure:
4b3550ef 1285 nla_nest_cancel(skb, nest);
1da177e4
LT
1286 return -1;
1287}
1288
83271586
MM
1289static void htb_offload_aggregate_stats(struct htb_sched *q,
1290 struct htb_class *cl)
1291{
1292 struct htb_class *c;
1293 unsigned int i;
1294
1295 memset(&cl->bstats, 0, sizeof(cl->bstats));
1296
1297 for (i = 0; i < q->clhash.hashsize; i++) {
1298 hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1299 struct htb_class *p = c;
1300
1301 while (p && p->level < cl->level)
1302 p = p->parent;
1303
1304 if (p != cl)
1305 continue;
1306
1307 cl->bstats.bytes += c->bstats_bias.bytes;
1308 cl->bstats.packets += c->bstats_bias.packets;
1309 if (c->level == 0) {
1310 cl->bstats.bytes += c->leaf.q->bstats.bytes;
1311 cl->bstats.packets += c->leaf.q->bstats.packets;
1312 }
1313 }
1314 }
1315}
1316
1da177e4 1317static int
87990467 1318htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1da177e4 1319{
87990467 1320 struct htb_class *cl = (struct htb_class *)arg;
83271586 1321 struct htb_sched *q = qdisc_priv(sch);
338ed9b4
ED
1322 struct gnet_stats_queue qs = {
1323 .drops = cl->drops,
3c75f6ee 1324 .overlimits = cl->overlimits,
338ed9b4 1325 };
64015853 1326 __u32 qlen = 0;
1da177e4 1327
5dd431b6
PA
1328 if (!cl->level && cl->leaf.q)
1329 qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1330
0564bf0a
KK
1331 cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1332 INT_MIN, INT_MAX);
1333 cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1334 INT_MIN, INT_MAX);
1da177e4 1335
83271586
MM
1336 if (q->offload) {
1337 if (!cl->level) {
1338 if (cl->leaf.q)
1339 cl->bstats = cl->leaf.q->bstats;
1340 else
1341 memset(&cl->bstats, 0, sizeof(cl->bstats));
1342 cl->bstats.bytes += cl->bstats_bias.bytes;
1343 cl->bstats.packets += cl->bstats_bias.packets;
1344 } else {
1345 htb_offload_aggregate_stats(q, cl);
1346 }
1347 }
1348
edb09eb1
ED
1349 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
1350 d, NULL, &cl->bstats) < 0 ||
1c0d32fd 1351 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
338ed9b4 1352 gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1da177e4
LT
1353 return -1;
1354
1355 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1356}
1357
d03b195b
MM
1358static struct netdev_queue *
1359htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1360{
1361 struct net_device *dev = qdisc_dev(sch);
1362 struct tc_htb_qopt_offload offload_opt;
93bde210 1363 struct htb_sched *q = qdisc_priv(sch);
d03b195b
MM
1364 int err;
1365
93bde210
MM
1366 if (!q->offload)
1367 return sch->dev_queue;
1368
d03b195b
MM
1369 offload_opt = (struct tc_htb_qopt_offload) {
1370 .command = TC_HTB_LEAF_QUERY_QUEUE,
1371 .classid = TC_H_MIN(tcm->tcm_parent),
1372 };
1373 err = htb_offload(dev, &offload_opt);
1374 if (err || offload_opt.qid >= dev->num_tx_queues)
1375 return NULL;
1376 return netdev_get_tx_queue(dev, offload_opt.qid);
1377}
1378
1379static struct Qdisc *
1380htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1381{
1382 struct net_device *dev = dev_queue->dev;
1383 struct Qdisc *old_q;
1384
1385 if (dev->flags & IFF_UP)
1386 dev_deactivate(dev);
1387 old_q = dev_graft_qdisc(dev_queue, new_q);
1388 if (new_q)
1389 new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1390 if (dev->flags & IFF_UP)
1391 dev_activate(dev);
1392
1393 return old_q;
1394}
1395
1396static void htb_offload_move_qdisc(struct Qdisc *sch, u16 qid_old, u16 qid_new)
1397{
1398 struct netdev_queue *queue_old, *queue_new;
1399 struct net_device *dev = qdisc_dev(sch);
1400 struct Qdisc *qdisc;
1401
1402 queue_old = netdev_get_tx_queue(dev, qid_old);
1403 queue_new = netdev_get_tx_queue(dev, qid_new);
1404
1405 if (dev->flags & IFF_UP)
1406 dev_deactivate(dev);
1407 qdisc = dev_graft_qdisc(queue_old, NULL);
1408 qdisc->dev_queue = queue_new;
1409 qdisc = dev_graft_qdisc(queue_new, qdisc);
1410 if (dev->flags & IFF_UP)
1411 dev_activate(dev);
1412
1413 WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1414}
1415
1da177e4 1416static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
653d6fd6 1417 struct Qdisc **old, struct netlink_ext_ack *extack)
1da177e4 1418{
d03b195b 1419 struct netdev_queue *dev_queue = sch->dev_queue;
87990467 1420 struct htb_class *cl = (struct htb_class *)arg;
d03b195b
MM
1421 struct htb_sched *q = qdisc_priv(sch);
1422 struct Qdisc *old_q;
1da177e4 1423
5b9a9ccf
PM
1424 if (cl->level)
1425 return -EINVAL;
d03b195b
MM
1426
1427 if (q->offload) {
1428 dev_queue = new->dev_queue;
1429 WARN_ON(dev_queue != cl->leaf.q->dev_queue);
1430 }
1431
1432 if (!new) {
1433 new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1434 cl->common.classid, extack);
1435 if (!new)
1436 return -ENOBUFS;
1437 }
1438
1439 if (q->offload) {
1440 htb_set_lockdep_class_child(new);
1441 /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1442 qdisc_refcount_inc(new);
1443 old_q = htb_graft_helper(dev_queue, new);
1444 }
5b9a9ccf 1445
11957be2 1446 *old = qdisc_replace(sch, new, &cl->leaf.q);
d03b195b
MM
1447
1448 if (q->offload) {
1449 WARN_ON(old_q != *old);
1450 qdisc_put(old_q);
1451 }
1452
5b9a9ccf 1453 return 0;
1da177e4
LT
1454}
1455
87990467 1456static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1da177e4 1457{
87990467 1458 struct htb_class *cl = (struct htb_class *)arg;
11957be2 1459 return !cl->level ? cl->leaf.q : NULL;
1da177e4
LT
1460}
1461
256d61b8
PM
1462static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1463{
1464 struct htb_class *cl = (struct htb_class *)arg;
1465
95946658 1466 htb_deactivate(qdisc_priv(sch), cl);
256d61b8
PM
1467}
1468
160d5e10
JP
1469static inline int htb_parent_last_child(struct htb_class *cl)
1470{
1471 if (!cl->parent)
1472 /* the root class */
1473 return 0;
42077599 1474 if (cl->parent->children > 1)
160d5e10
JP
1475 /* not the last child */
1476 return 0;
160d5e10
JP
1477 return 1;
1478}
1479
d03b195b 1480static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
3ba08b00 1481 struct Qdisc *new_q)
160d5e10 1482{
d03b195b 1483 struct htb_sched *q = qdisc_priv(sch);
160d5e10
JP
1484 struct htb_class *parent = cl->parent;
1485
11957be2 1486 WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
160d5e10 1487
3ba08b00 1488 if (parent->cmode != HTB_CAN_SEND)
c9364636
ED
1489 htb_safe_rb_erase(&parent->pq_node,
1490 &q->hlevel[parent->level].wait_pq);
3ba08b00 1491
160d5e10 1492 parent->level = 0;
11957be2
CW
1493 memset(&parent->inner, 0, sizeof(parent->inner));
1494 parent->leaf.q = new_q ? new_q : &noop_qdisc;
160d5e10
JP
1495 parent->tokens = parent->buffer;
1496 parent->ctokens = parent->cbuffer;
d2de875c 1497 parent->t_c = ktime_get_ns();
160d5e10
JP
1498 parent->cmode = HTB_CAN_SEND;
1499}
1500
d03b195b
MM
1501static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1502 struct netdev_queue *dev_queue,
1503 struct Qdisc *new_q)
1504{
1505 struct Qdisc *old_q;
1506
1507 /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1508 qdisc_refcount_inc(new_q);
1509 old_q = htb_graft_helper(dev_queue, new_q);
1510 WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1511}
1512
1513static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1514 bool last_child, bool destroying,
1515 struct netlink_ext_ack *extack)
1516{
1517 struct tc_htb_qopt_offload offload_opt;
1518 struct Qdisc *q = cl->leaf.q;
1519 struct Qdisc *old = NULL;
1520 int err;
1521
1522 if (cl->level)
1523 return -EINVAL;
1524
1525 WARN_ON(!q);
1526 if (!destroying) {
1527 /* On destroy of HTB, two cases are possible:
1528 * 1. q is a normal qdisc, but q->dev_queue has noop qdisc.
1529 * 2. q is a noop qdisc (for nodes that were inner),
1530 * q->dev_queue is noop_netdev_queue.
1531 */
1532 old = htb_graft_helper(q->dev_queue, NULL);
1533 WARN_ON(!old);
1534 WARN_ON(old != q);
1535 }
1536
83271586
MM
1537 if (cl->parent) {
1538 cl->parent->bstats_bias.bytes += q->bstats.bytes;
1539 cl->parent->bstats_bias.packets += q->bstats.packets;
1540 }
1541
d03b195b
MM
1542 offload_opt = (struct tc_htb_qopt_offload) {
1543 .command = !last_child ? TC_HTB_LEAF_DEL :
1544 destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1545 TC_HTB_LEAF_DEL_LAST,
1546 .classid = cl->common.classid,
1547 .extack = extack,
1548 };
1549 err = htb_offload(qdisc_dev(sch), &offload_opt);
1550
1551 if (!err || destroying)
1552 qdisc_put(old);
1553 else
1554 htb_graft_helper(q->dev_queue, old);
1555
1556 if (last_child)
1557 return err;
1558
1559 if (!err && offload_opt.moved_qid != 0) {
1560 if (destroying)
1561 q->dev_queue = netdev_get_tx_queue(qdisc_dev(sch),
1562 offload_opt.qid);
1563 else
1564 htb_offload_move_qdisc(sch, offload_opt.moved_qid,
1565 offload_opt.qid);
1566 }
1567
1568 return err;
1569}
1570
87990467 1571static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1da177e4 1572{
1da177e4 1573 if (!cl->level) {
11957be2 1574 WARN_ON(!cl->leaf.q);
86bd446b 1575 qdisc_put(cl->leaf.q);
1da177e4 1576 }
1c0d32fd 1577 gen_kill_estimator(&cl->rate_est);
6529eaba 1578 tcf_block_put(cl->block);
1da177e4
LT
1579 kfree(cl);
1580}
1581
87990467 1582static void htb_destroy(struct Qdisc *sch)
1da177e4 1583{
d03b195b
MM
1584 struct net_device *dev = qdisc_dev(sch);
1585 struct tc_htb_qopt_offload offload_opt;
1da177e4 1586 struct htb_sched *q = qdisc_priv(sch);
b67bfe0d 1587 struct hlist_node *next;
d03b195b 1588 bool nonempty, changed;
fbd8f137
PM
1589 struct htb_class *cl;
1590 unsigned int i;
1da177e4 1591
1224736d 1592 cancel_work_sync(&q->work);
fb983d45 1593 qdisc_watchdog_cancel(&q->watchdog);
1da177e4 1594 /* This line used to be after htb_destroy_class call below
cc7ec456
ED
1595 * and surprisingly it worked in 2.4. But it must precede it
1596 * because filter need its target class alive to be able to call
1597 * unbind_filter on it (without Oops).
1598 */
6529eaba 1599 tcf_block_put(q->block);
87990467 1600
f4c1f3e0 1601 for (i = 0; i < q->clhash.hashsize; i++) {
89890422 1602 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
6529eaba 1603 tcf_block_put(cl->block);
89890422
KK
1604 cl->block = NULL;
1605 }
fbd8f137 1606 }
d03b195b
MM
1607
1608 do {
1609 nonempty = false;
1610 changed = false;
1611 for (i = 0; i < q->clhash.hashsize; i++) {
1612 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1613 common.hnode) {
1614 bool last_child;
1615
1616 if (!q->offload) {
1617 htb_destroy_class(sch, cl);
1618 continue;
1619 }
1620
1621 nonempty = true;
1622
1623 if (cl->level)
1624 continue;
1625
1626 changed = true;
1627
1628 last_child = htb_parent_last_child(cl);
1629 htb_destroy_class_offload(sch, cl, last_child,
1630 true, NULL);
1631 qdisc_class_hash_remove(&q->clhash,
1632 &cl->common);
1633 if (cl->parent)
1634 cl->parent->children--;
1635 if (last_child)
1636 htb_parent_to_leaf(sch, cl, NULL);
1637 htb_destroy_class(sch, cl);
1638 }
1639 }
1640 } while (changed);
1641 WARN_ON(nonempty);
1642
f4c1f3e0 1643 qdisc_class_hash_destroy(&q->clhash);
a5a9f534 1644 __qdisc_reset_queue(&q->direct_queue);
d03b195b
MM
1645
1646 if (!q->offload)
1647 return;
1648
1649 offload_opt = (struct tc_htb_qopt_offload) {
1650 .command = TC_HTB_DESTROY,
1651 };
1652 htb_offload(dev, &offload_opt);
1653
1654 if (!q->direct_qdiscs)
1655 return;
1656 for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1657 qdisc_put(q->direct_qdiscs[i]);
1658 kfree(q->direct_qdiscs);
1da177e4
LT
1659}
1660
4dd78a73
MM
1661static int htb_delete(struct Qdisc *sch, unsigned long arg,
1662 struct netlink_ext_ack *extack)
1da177e4
LT
1663{
1664 struct htb_sched *q = qdisc_priv(sch);
87990467 1665 struct htb_class *cl = (struct htb_class *)arg;
160d5e10
JP
1666 struct Qdisc *new_q = NULL;
1667 int last_child = 0;
d03b195b 1668 int err;
1da177e4 1669
a071d272
YY
1670 /* TODO: why don't allow to delete subtree ? references ? does
1671 * tc subsys guarantee us that in htb_destroy it holds no class
1672 * refs so that we can remove children safely there ?
1673 */
42077599 1674 if (cl->children || cl->filter_cnt)
1da177e4 1675 return -EBUSY;
87990467 1676
d03b195b
MM
1677 if (!cl->level && htb_parent_last_child(cl))
1678 last_child = 1;
1679
1680 if (q->offload) {
1681 err = htb_destroy_class_offload(sch, cl, last_child, false,
1682 extack);
1683 if (err)
1684 return err;
1685 }
1686
1687 if (last_child) {
1688 struct netdev_queue *dev_queue;
1689
1690 dev_queue = q->offload ? cl->leaf.q->dev_queue : sch->dev_queue;
1691 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
a38a9882
AA
1692 cl->parent->common.classid,
1693 NULL);
d03b195b 1694 if (q->offload) {
ae81feb7 1695 if (new_q) {
d03b195b 1696 htb_set_lockdep_class_child(new_q);
ae81feb7
YW
1697 htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1698 }
d03b195b 1699 }
160d5e10
JP
1700 }
1701
1da177e4 1702 sch_tree_lock(sch);
87990467 1703
e5f0e8f8
PA
1704 if (!cl->level)
1705 qdisc_purge_queue(cl->leaf.q);
814a175e 1706
f4c1f3e0
PM
1707 /* delete from hash and active; remainder in destroy_class */
1708 qdisc_class_hash_remove(&q->clhash, &cl->common);
26b284de
JP
1709 if (cl->parent)
1710 cl->parent->children--;
c38c83cb 1711
1da177e4 1712 if (cl->prio_activity)
87990467 1713 htb_deactivate(q, cl);
1da177e4 1714
fbd8f137 1715 if (cl->cmode != HTB_CAN_SEND)
c9364636
ED
1716 htb_safe_rb_erase(&cl->pq_node,
1717 &q->hlevel[cl->level].wait_pq);
fbd8f137 1718
160d5e10 1719 if (last_child)
d03b195b 1720 htb_parent_to_leaf(sch, cl, new_q);
160d5e10 1721
1da177e4 1722 sch_tree_unlock(sch);
1da177e4 1723
143976ce
WC
1724 htb_destroy_class(sch, cl);
1725 return 0;
1da177e4
LT
1726}
1727
87990467 1728static int htb_change_class(struct Qdisc *sch, u32 classid,
1e90474c 1729 u32 parentid, struct nlattr **tca,
793d81d6 1730 unsigned long *arg, struct netlink_ext_ack *extack)
1da177e4
LT
1731{
1732 int err = -EINVAL;
1733 struct htb_sched *q = qdisc_priv(sch);
87990467 1734 struct htb_class *cl = (struct htb_class *)*arg, *parent;
d03b195b 1735 struct tc_htb_qopt_offload offload_opt;
1e90474c 1736 struct nlattr *opt = tca[TCA_OPTIONS];
6906f4ed 1737 struct nlattr *tb[TCA_HTB_MAX + 1];
4ce70b4a 1738 struct Qdisc *parent_qdisc = NULL;
d03b195b 1739 struct netdev_queue *dev_queue;
1da177e4 1740 struct tc_htb_opt *hopt;
df62cdf3 1741 u64 rate64, ceil64;
da01ec4e 1742 int warn = 0;
1da177e4
LT
1743
1744 /* extract all subattrs from opt attr */
cee63723
PM
1745 if (!opt)
1746 goto failure;
1747
8cb08174
JB
1748 err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1749 NULL);
cee63723
PM
1750 if (err < 0)
1751 goto failure;
1752
1753 err = -EINVAL;
27a3421e 1754 if (tb[TCA_HTB_PARMS] == NULL)
1da177e4 1755 goto failure;
1da177e4 1756
87990467
SH
1757 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1758
1e90474c 1759 hopt = nla_data(tb[TCA_HTB_PARMS]);
196d97f6 1760 if (!hopt->rate.rate || !hopt->ceil.rate)
87990467 1761 goto failure;
1da177e4 1762
8a8e3d84 1763 /* Keeping backward compatible with rate_table based iproute2 tc */
6b1dd856 1764 if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
e9bc3fa2
AA
1765 qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1766 NULL));
6b1dd856
YY
1767
1768 if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
e9bc3fa2
AA
1769 qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1770 NULL));
8a8e3d84 1771
d03b195b
MM
1772 rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1773 ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1774
87990467 1775 if (!cl) { /* new class */
d03b195b
MM
1776 struct net_device *dev = qdisc_dev(sch);
1777 struct Qdisc *new_q, *old_q;
3696f625 1778 int prio;
ee39e10c 1779 struct {
1e90474c 1780 struct nlattr nla;
ee39e10c
PM
1781 struct gnet_estimator opt;
1782 } est = {
1e90474c
PM
1783 .nla = {
1784 .nla_len = nla_attr_size(sizeof(est.opt)),
1785 .nla_type = TCA_RATE,
ee39e10c
PM
1786 },
1787 .opt = {
1788 /* 4s interval, 16s averaging constant */
1789 .interval = 2,
1790 .ewma_log = 2,
1791 },
1792 };
3696f625 1793
1da177e4 1794 /* check for valid classid */
f64f9e71
JP
1795 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1796 htb_find(classid, sch))
1da177e4
LT
1797 goto failure;
1798
1799 /* check maximal depth */
1800 if (parent && parent->parent && parent->parent->level < 2) {
cc7ec456 1801 pr_err("htb: tree is too deep\n");
1da177e4
LT
1802 goto failure;
1803 }
1804 err = -ENOBUFS;
cc7ec456
ED
1805 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1806 if (!cl)
1da177e4 1807 goto failure;
87990467 1808
8d1a77f9 1809 err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
6529eaba
JP
1810 if (err) {
1811 kfree(cl);
1812 goto failure;
1813 }
64153ce0 1814 if (htb_rate_est || tca[TCA_RATE]) {
22e0f8b9
JF
1815 err = gen_new_estimator(&cl->bstats, NULL,
1816 &cl->rate_est,
edb09eb1
ED
1817 NULL,
1818 qdisc_root_sleeping_running(sch),
64153ce0 1819 tca[TCA_RATE] ? : &est.nla);
d03b195b
MM
1820 if (err)
1821 goto err_block_put;
71bcb09a
SH
1822 }
1823
42077599 1824 cl->children = 0;
3696f625
SH
1825 RB_CLEAR_NODE(&cl->pq_node);
1826
1827 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1828 RB_CLEAR_NODE(&cl->node[prio]);
1da177e4 1829
d03b195b
MM
1830 cl->common.classid = classid;
1831
1832 /* Make sure nothing interrupts us in between of two
1833 * ndo_setup_tc calls.
1834 */
1835 ASSERT_RTNL();
1836
1da177e4 1837 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
cc7ec456
ED
1838 * so that can't be used inside of sch_tree_lock
1839 * -- thanks to Karlis Peisenieks
1840 */
d03b195b
MM
1841 if (!q->offload) {
1842 dev_queue = sch->dev_queue;
1843 } else if (!(parent && !parent->level)) {
1844 /* Assign a dev_queue to this classid. */
1845 offload_opt = (struct tc_htb_qopt_offload) {
1846 .command = TC_HTB_LEAF_ALLOC_QUEUE,
1847 .classid = cl->common.classid,
1848 .parent_classid = parent ?
1849 TC_H_MIN(parent->common.classid) :
1850 TC_HTB_CLASSID_ROOT,
1851 .rate = max_t(u64, hopt->rate.rate, rate64),
1852 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1853 .extack = extack,
1854 };
1855 err = htb_offload(dev, &offload_opt);
1856 if (err) {
1857 pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
1858 err);
1859 goto err_kill_estimator;
1860 }
1861 dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1862 } else { /* First child. */
1863 dev_queue = parent->leaf.q->dev_queue;
1864 old_q = htb_graft_helper(dev_queue, NULL);
1865 WARN_ON(old_q != parent->leaf.q);
1866 offload_opt = (struct tc_htb_qopt_offload) {
1867 .command = TC_HTB_LEAF_TO_INNER,
1868 .classid = cl->common.classid,
1869 .parent_classid =
1870 TC_H_MIN(parent->common.classid),
1871 .rate = max_t(u64, hopt->rate.rate, rate64),
1872 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1873 .extack = extack,
1874 };
1875 err = htb_offload(dev, &offload_opt);
1876 if (err) {
1877 pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
1878 err);
1879 htb_graft_helper(dev_queue, old_q);
1880 goto err_kill_estimator;
1881 }
83271586
MM
1882 parent->bstats_bias.bytes += old_q->bstats.bytes;
1883 parent->bstats_bias.packets += old_q->bstats.packets;
d03b195b
MM
1884 qdisc_put(old_q);
1885 }
1886 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
a38a9882 1887 classid, NULL);
d03b195b
MM
1888 if (q->offload) {
1889 if (new_q) {
1890 htb_set_lockdep_class_child(new_q);
1891 /* One ref for cl->leaf.q, the other for
1892 * dev_queue->qdisc.
1893 */
1894 qdisc_refcount_inc(new_q);
1895 }
1896 old_q = htb_graft_helper(dev_queue, new_q);
1897 /* No qdisc_put needed. */
1898 WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1899 }
1da177e4
LT
1900 sch_tree_lock(sch);
1901 if (parent && !parent->level) {
1902 /* turn parent into inner node */
e5f0e8f8 1903 qdisc_purge_queue(parent->leaf.q);
4ce70b4a 1904 parent_qdisc = parent->leaf.q;
87990467
SH
1905 if (parent->prio_activity)
1906 htb_deactivate(q, parent);
1da177e4
LT
1907
1908 /* remove from evt list because of level change */
1909 if (parent->cmode != HTB_CAN_SEND) {
c9364636 1910 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1da177e4
LT
1911 parent->cmode = HTB_CAN_SEND;
1912 }
1913 parent->level = (parent->parent ? parent->parent->level
87990467 1914 : TC_HTB_MAXDEPTH) - 1;
11957be2 1915 memset(&parent->inner, 0, sizeof(parent->inner));
1da177e4 1916 }
d03b195b 1917
1da177e4 1918 /* leaf (we) needs elementary qdisc */
11957be2 1919 cl->leaf.q = new_q ? new_q : &noop_qdisc;
1da177e4 1920
87990467 1921 cl->parent = parent;
1da177e4
LT
1922
1923 /* set class to be in HTB_CAN_SEND state */
b9a7afde
JP
1924 cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1925 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
5343a7f8 1926 cl->mbuffer = 60ULL * NSEC_PER_SEC; /* 1min */
d2de875c 1927 cl->t_c = ktime_get_ns();
1da177e4
LT
1928 cl->cmode = HTB_CAN_SEND;
1929
1930 /* attach to the hash list and parent's family */
f4c1f3e0 1931 qdisc_class_hash_insert(&q->clhash, &cl->common);
42077599
PM
1932 if (parent)
1933 parent->children++;
11957be2
CW
1934 if (cl->leaf.q != &noop_qdisc)
1935 qdisc_hash_add(cl->leaf.q, true);
ee39e10c 1936 } else {
71bcb09a 1937 if (tca[TCA_RATE]) {
22e0f8b9
JF
1938 err = gen_replace_estimator(&cl->bstats, NULL,
1939 &cl->rate_est,
edb09eb1
ED
1940 NULL,
1941 qdisc_root_sleeping_running(sch),
71bcb09a
SH
1942 tca[TCA_RATE]);
1943 if (err)
1944 return err;
1945 }
1da177e4 1946
d03b195b
MM
1947 if (q->offload) {
1948 struct net_device *dev = qdisc_dev(sch);
1949
1950 offload_opt = (struct tc_htb_qopt_offload) {
1951 .command = TC_HTB_NODE_MODIFY,
1952 .classid = cl->common.classid,
1953 .rate = max_t(u64, hopt->rate.rate, rate64),
1954 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1955 .extack = extack,
1956 };
1957 err = htb_offload(dev, &offload_opt);
1958 if (err)
1959 /* Estimator was replaced, and rollback may fail
1960 * as well, so we don't try to recover it, and
1961 * the estimator won't work property with the
1962 * offload anyway, because bstats are updated
1963 * only when the stats are queried.
1964 */
1965 return err;
1966 }
1598f7cb 1967
d03b195b
MM
1968 sch_tree_lock(sch);
1969 }
1598f7cb
YY
1970
1971 psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
1972 psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
1973
1da177e4 1974 /* it used to be a nasty bug here, we have to check that node
11957be2 1975 * is really leaf before changing cl->leaf !
cc7ec456 1976 */
1da177e4 1977 if (!cl->level) {
1598f7cb
YY
1978 u64 quantum = cl->rate.rate_bytes_ps;
1979
1980 do_div(quantum, q->rate2quantum);
1981 cl->quantum = min_t(u64, quantum, INT_MAX);
1982
c19f7a34 1983 if (!hopt->quantum && cl->quantum < 1000) {
da01ec4e 1984 warn = -1;
c19f7a34 1985 cl->quantum = 1000;
1da177e4 1986 }
c19f7a34 1987 if (!hopt->quantum && cl->quantum > 200000) {
da01ec4e 1988 warn = 1;
c19f7a34 1989 cl->quantum = 200000;
1da177e4
LT
1990 }
1991 if (hopt->quantum)
c19f7a34
JP
1992 cl->quantum = hopt->quantum;
1993 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1994 cl->prio = TC_HTB_NUMPRIO - 1;
1da177e4
LT
1995 }
1996
324f5aa5 1997 cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
f3ad857e 1998 cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
56b765b7 1999
1da177e4 2000 sch_tree_unlock(sch);
4ce70b4a 2001 qdisc_put(parent_qdisc);
1da177e4 2002
da01ec4e
LR
2003 if (warn)
2004 pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
2005 cl->common.classid, (warn == -1 ? "small" : "big"));
2006
f4c1f3e0
PM
2007 qdisc_class_hash_grow(sch, &q->clhash);
2008
1da177e4
LT
2009 *arg = (unsigned long)cl;
2010 return 0;
2011
d03b195b
MM
2012err_kill_estimator:
2013 gen_kill_estimator(&cl->rate_est);
2014err_block_put:
2015 tcf_block_put(cl->block);
2016 kfree(cl);
1da177e4 2017failure:
1da177e4
LT
2018 return err;
2019}
2020
cbaacc4e
AA
2021static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
2022 struct netlink_ext_ack *extack)
1da177e4
LT
2023{
2024 struct htb_sched *q = qdisc_priv(sch);
2025 struct htb_class *cl = (struct htb_class *)arg;
3bf72957 2026
6529eaba 2027 return cl ? cl->block : q->block;
1da177e4
LT
2028}
2029
2030static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
87990467 2031 u32 classid)
1da177e4 2032{
87990467 2033 struct htb_class *cl = htb_find(classid, sch);
3bf72957 2034
1da177e4 2035 /*if (cl && !cl->level) return 0;
cc7ec456
ED
2036 * The line above used to be there to prevent attaching filters to
2037 * leaves. But at least tc_index filter uses this just to get class
2038 * for other reasons so that we have to allow for it.
2039 * ----
2040 * 19.6.2002 As Werner explained it is ok - bind filter is just
2041 * another way to "lock" the class - unlike "get" this lock can
2042 * be broken by class during destroy IIUC.
1da177e4 2043 */
87990467
SH
2044 if (cl)
2045 cl->filter_cnt++;
1da177e4
LT
2046 return (unsigned long)cl;
2047}
2048
2049static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2050{
1da177e4 2051 struct htb_class *cl = (struct htb_class *)arg;
3bf72957 2052
87990467
SH
2053 if (cl)
2054 cl->filter_cnt--;
1da177e4
LT
2055}
2056
2057static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2058{
2059 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 2060 struct htb_class *cl;
f4c1f3e0 2061 unsigned int i;
1da177e4
LT
2062
2063 if (arg->stop)
2064 return;
2065
f4c1f3e0 2066 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 2067 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1da177e4
LT
2068 if (arg->count < arg->skip) {
2069 arg->count++;
2070 continue;
2071 }
2072 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2073 arg->stop = 1;
2074 return;
2075 }
2076 arg->count++;
2077 }
2078 }
2079}
2080
20fea08b 2081static const struct Qdisc_class_ops htb_class_ops = {
d03b195b 2082 .select_queue = htb_select_queue,
1da177e4
LT
2083 .graft = htb_graft,
2084 .leaf = htb_leaf,
256d61b8 2085 .qlen_notify = htb_qlen_notify,
143976ce 2086 .find = htb_search,
1da177e4
LT
2087 .change = htb_change_class,
2088 .delete = htb_delete,
2089 .walk = htb_walk,
6529eaba 2090 .tcf_block = htb_tcf_block,
1da177e4
LT
2091 .bind_tcf = htb_bind_filter,
2092 .unbind_tcf = htb_unbind_filter,
2093 .dump = htb_dump_class,
2094 .dump_stats = htb_dump_class_stats,
2095};
2096
20fea08b 2097static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1da177e4
LT
2098 .cl_ops = &htb_class_ops,
2099 .id = "htb",
2100 .priv_size = sizeof(struct htb_sched),
2101 .enqueue = htb_enqueue,
2102 .dequeue = htb_dequeue,
77be155c 2103 .peek = qdisc_peek_dequeued,
1da177e4 2104 .init = htb_init,
d03b195b 2105 .attach = htb_attach,
1da177e4
LT
2106 .reset = htb_reset,
2107 .destroy = htb_destroy,
1da177e4
LT
2108 .dump = htb_dump,
2109 .owner = THIS_MODULE,
2110};
2111
2112static int __init htb_module_init(void)
2113{
87990467 2114 return register_qdisc(&htb_qdisc_ops);
1da177e4 2115}
87990467 2116static void __exit htb_module_exit(void)
1da177e4 2117{
87990467 2118 unregister_qdisc(&htb_qdisc_ops);
1da177e4 2119}
87990467 2120
1da177e4
LT
2121module_init(htb_module_init)
2122module_exit(htb_module_exit)
2123MODULE_LICENSE("GPL");