]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/sched/sch_api.c
net/qdisc: IFF_NO_QUEUE drivers should use consistent TX queue len
[mirror_ubuntu-artful-kernel.git] / net / sched / sch_api.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
1da177e4
LT
18#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
1da177e4 21#include <linux/string.h>
1da177e4 22#include <linux/errno.h>
1da177e4 23#include <linux/skbuff.h>
1da177e4
LT
24#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
4179477f 29#include <linux/hrtimer.h>
25bfcd5a 30#include <linux/lockdep.h>
5a0e3ad6 31#include <linux/slab.h>
59cc1f61 32#include <linux/hashtable.h>
1da177e4 33
457c4cbc 34#include <net/net_namespace.h>
b854272b 35#include <net/sock.h>
dc5fc579 36#include <net/netlink.h>
1da177e4
LT
37#include <net/pkt_sched.h>
38
7316ae88
TG
39static int qdisc_notify(struct net *net, struct sk_buff *oskb,
40 struct nlmsghdr *n, u32 clid,
1da177e4 41 struct Qdisc *old, struct Qdisc *new);
7316ae88
TG
42static int tclass_notify(struct net *net, struct sk_buff *oskb,
43 struct nlmsghdr *n, struct Qdisc *q,
44 unsigned long cl, int event);
1da177e4
LT
45
46/*
47
48 Short review.
49 -------------
50
51 This file consists of two interrelated parts:
52
53 1. queueing disciplines manager frontend.
54 2. traffic classes manager frontend.
55
56 Generally, queueing discipline ("qdisc") is a black box,
57 which is able to enqueue packets and to dequeue them (when
58 device is ready to send something) in order and at times
59 determined by algorithm hidden in it.
60
61 qdisc's are divided to two categories:
62 - "queues", which have no internal structure visible from outside.
63 - "schedulers", which split all the packets to "traffic classes",
64 using "packet classifiers" (look at cls_api.c)
65
66 In turn, classes may have child qdiscs (as rule, queues)
67 attached to them etc. etc. etc.
68
69 The goal of the routines in this file is to translate
70 information supplied by user in the form of handles
71 to more intelligible for kernel form, to make some sanity
72 checks and part of work, which is common to all qdiscs
73 and to provide rtnetlink notifications.
74
75 All real intelligent work is done inside qdisc modules.
76
77
78
79 Every discipline has two major routines: enqueue and dequeue.
80
81 ---dequeue
82
83 dequeue usually returns a skb to send. It is allowed to return NULL,
84 but it does not mean that queue is empty, it just means that
85 discipline does not want to send anything this time.
86 Queue is really empty if q->q.qlen == 0.
87 For complicated disciplines with multiple queues q->q is not
88 real packet queue, but however q->q.qlen must be valid.
89
90 ---enqueue
91
92 enqueue returns 0, if packet was enqueued successfully.
93 If packet (this one or another one) was dropped, it returns
94 not zero error code.
95 NET_XMIT_DROP - this packet dropped
96 Expected action: do not backoff, but wait until queue will clear.
97 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
98 Expected action: backoff or ignore
1da177e4
LT
99
100 Auxiliary routines:
101
99c0db26
JP
102 ---peek
103
104 like dequeue but without removing a packet from the queue
105
1da177e4
LT
106 ---reset
107
108 returns qdisc to initial state: purge all buffers, clear all
109 timers, counters (except for statistics) etc.
110
111 ---init
112
113 initializes newly created qdisc.
114
115 ---destroy
116
117 destroys resources allocated by init and during lifetime of qdisc.
118
119 ---change
120
121 changes qdisc parameters.
122 */
123
124/* Protects list of registered TC modules. It is pure SMP lock. */
125static DEFINE_RWLOCK(qdisc_mod_lock);
126
127
128/************************************************
129 * Queueing disciplines manipulation. *
130 ************************************************/
131
132
133/* The list of all installed queueing disciplines. */
134
135static struct Qdisc_ops *qdisc_base;
136
21eb2189 137/* Register/unregister queueing discipline */
1da177e4
LT
138
139int register_qdisc(struct Qdisc_ops *qops)
140{
141 struct Qdisc_ops *q, **qp;
142 int rc = -EEXIST;
143
144 write_lock(&qdisc_mod_lock);
145 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
146 if (!strcmp(qops->id, q->id))
147 goto out;
148
149 if (qops->enqueue == NULL)
150 qops->enqueue = noop_qdisc_ops.enqueue;
99c0db26 151 if (qops->peek == NULL) {
68fd26b5 152 if (qops->dequeue == NULL)
99c0db26 153 qops->peek = noop_qdisc_ops.peek;
68fd26b5
JP
154 else
155 goto out_einval;
99c0db26 156 }
1da177e4
LT
157 if (qops->dequeue == NULL)
158 qops->dequeue = noop_qdisc_ops.dequeue;
159
68fd26b5
JP
160 if (qops->cl_ops) {
161 const struct Qdisc_class_ops *cops = qops->cl_ops;
162
3e9e5a59 163 if (!(cops->get && cops->put && cops->walk && cops->leaf))
68fd26b5
JP
164 goto out_einval;
165
166 if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf))
167 goto out_einval;
168 }
169
1da177e4
LT
170 qops->next = NULL;
171 *qp = qops;
172 rc = 0;
173out:
174 write_unlock(&qdisc_mod_lock);
175 return rc;
68fd26b5
JP
176
177out_einval:
178 rc = -EINVAL;
179 goto out;
1da177e4 180}
62e3ba1b 181EXPORT_SYMBOL(register_qdisc);
1da177e4
LT
182
183int unregister_qdisc(struct Qdisc_ops *qops)
184{
185 struct Qdisc_ops *q, **qp;
186 int err = -ENOENT;
187
188 write_lock(&qdisc_mod_lock);
cc7ec456 189 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
1da177e4
LT
190 if (q == qops)
191 break;
192 if (q) {
193 *qp = q->next;
194 q->next = NULL;
195 err = 0;
196 }
197 write_unlock(&qdisc_mod_lock);
198 return err;
199}
62e3ba1b 200EXPORT_SYMBOL(unregister_qdisc);
1da177e4 201
6da7c8fc 202/* Get default qdisc if not otherwise specified */
203void qdisc_get_default(char *name, size_t len)
204{
205 read_lock(&qdisc_mod_lock);
206 strlcpy(name, default_qdisc_ops->id, len);
207 read_unlock(&qdisc_mod_lock);
208}
209
210static struct Qdisc_ops *qdisc_lookup_default(const char *name)
211{
212 struct Qdisc_ops *q = NULL;
213
214 for (q = qdisc_base; q; q = q->next) {
215 if (!strcmp(name, q->id)) {
216 if (!try_module_get(q->owner))
217 q = NULL;
218 break;
219 }
220 }
221
222 return q;
223}
224
225/* Set new default qdisc to use */
226int qdisc_set_default(const char *name)
227{
228 const struct Qdisc_ops *ops;
229
230 if (!capable(CAP_NET_ADMIN))
231 return -EPERM;
232
233 write_lock(&qdisc_mod_lock);
234 ops = qdisc_lookup_default(name);
235 if (!ops) {
236 /* Not found, drop lock and try to load module */
237 write_unlock(&qdisc_mod_lock);
238 request_module("sch_%s", name);
239 write_lock(&qdisc_mod_lock);
240
241 ops = qdisc_lookup_default(name);
242 }
243
244 if (ops) {
245 /* Set new default */
246 module_put(default_qdisc_ops->owner);
247 default_qdisc_ops = ops;
248 }
249 write_unlock(&qdisc_mod_lock);
250
251 return ops ? 0 : -ENOENT;
252}
253
1da177e4 254/* We know handle. Find qdisc among all qdisc's attached to device
4eaf3b84
ED
255 * (root qdisc, all its children, children of children etc.)
256 * Note: caller either uses rtnl or rcu_read_lock()
1da177e4
LT
257 */
258
6113b748 259static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
8123b421
DM
260{
261 struct Qdisc *q;
262
69012ae4
JK
263 if (!qdisc_dev(root))
264 return (root->handle == handle ? root : NULL);
265
8123b421
DM
266 if (!(root->flags & TCQ_F_BUILTIN) &&
267 root->handle == handle)
268 return root;
269
59cc1f61 270 hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
8123b421
DM
271 if (q->handle == handle)
272 return q;
273 }
274 return NULL;
275}
276
59cc1f61 277void qdisc_hash_add(struct Qdisc *q)
f6e0b239 278{
37314363
ED
279 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
280 struct Qdisc *root = qdisc_dev(q)->qdisc;
e57a784d 281
37314363 282 WARN_ON_ONCE(root == &noop_qdisc);
4eaf3b84 283 ASSERT_RTNL();
59cc1f61 284 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
37314363 285 }
f6e0b239 286}
59cc1f61 287EXPORT_SYMBOL(qdisc_hash_add);
f6e0b239 288
59cc1f61 289void qdisc_hash_del(struct Qdisc *q)
f6e0b239 290{
4eaf3b84
ED
291 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
292 ASSERT_RTNL();
59cc1f61 293 hash_del_rcu(&q->hash);
4eaf3b84 294 }
f6e0b239 295}
59cc1f61 296EXPORT_SYMBOL(qdisc_hash_del);
f6e0b239 297
ead81cc5 298struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
1da177e4 299{
f6e0b239
JP
300 struct Qdisc *q;
301
af356afa
PM
302 q = qdisc_match_from_root(dev->qdisc, handle);
303 if (q)
304 goto out;
f6e0b239 305
24824a09
ED
306 if (dev_ingress_queue(dev))
307 q = qdisc_match_from_root(
308 dev_ingress_queue(dev)->qdisc_sleeping,
309 handle);
f6486d40 310out:
f6e0b239 311 return q;
1da177e4
LT
312}
313
314static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
315{
316 unsigned long cl;
317 struct Qdisc *leaf;
20fea08b 318 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
1da177e4
LT
319
320 if (cops == NULL)
321 return NULL;
322 cl = cops->get(p, classid);
323
324 if (cl == 0)
325 return NULL;
326 leaf = cops->leaf(p, cl);
327 cops->put(p, cl);
328 return leaf;
329}
330
331/* Find queueing discipline by name */
332
1e90474c 333static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
1da177e4
LT
334{
335 struct Qdisc_ops *q = NULL;
336
337 if (kind) {
338 read_lock(&qdisc_mod_lock);
339 for (q = qdisc_base; q; q = q->next) {
1e90474c 340 if (nla_strcmp(kind, q->id) == 0) {
1da177e4
LT
341 if (!try_module_get(q->owner))
342 q = NULL;
343 break;
344 }
345 }
346 read_unlock(&qdisc_mod_lock);
347 }
348 return q;
349}
350
8a8e3d84
JDB
351/* The linklayer setting were not transferred from iproute2, in older
352 * versions, and the rate tables lookup systems have been dropped in
353 * the kernel. To keep backward compatible with older iproute2 tc
354 * utils, we detect the linklayer setting by detecting if the rate
355 * table were modified.
356 *
357 * For linklayer ATM table entries, the rate table will be aligned to
358 * 48 bytes, thus some table entries will contain the same value. The
359 * mpu (min packet unit) is also encoded into the old rate table, thus
360 * starting from the mpu, we find low and high table entries for
361 * mapping this cell. If these entries contain the same value, when
362 * the rate tables have been modified for linklayer ATM.
363 *
364 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
365 * and then roundup to the next cell, calc the table entry one below,
366 * and compare.
367 */
368static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
369{
370 int low = roundup(r->mpu, 48);
371 int high = roundup(low+1, 48);
372 int cell_low = low >> r->cell_log;
373 int cell_high = (high >> r->cell_log) - 1;
374
375 /* rtab is too inaccurate at rates > 100Mbit/s */
376 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
377 pr_debug("TC linklayer: Giving up ATM detection\n");
378 return TC_LINKLAYER_ETHERNET;
379 }
380
381 if ((cell_high > cell_low) && (cell_high < 256)
382 && (rtab[cell_low] == rtab[cell_high])) {
383 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
384 cell_low, cell_high, rtab[cell_high]);
385 return TC_LINKLAYER_ATM;
386 }
387 return TC_LINKLAYER_ETHERNET;
388}
389
1da177e4
LT
390static struct qdisc_rate_table *qdisc_rtab_list;
391
5a7a5555
JHS
392struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
393 struct nlattr *tab)
1da177e4
LT
394{
395 struct qdisc_rate_table *rtab;
396
40edeff6
ED
397 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
398 nla_len(tab) != TC_RTAB_SIZE)
399 return NULL;
400
1da177e4 401 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
40edeff6
ED
402 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
403 !memcmp(&rtab->data, nla_data(tab), 1024)) {
1da177e4
LT
404 rtab->refcnt++;
405 return rtab;
406 }
407 }
408
1da177e4
LT
409 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
410 if (rtab) {
411 rtab->rate = *r;
412 rtab->refcnt = 1;
1e90474c 413 memcpy(rtab->data, nla_data(tab), 1024);
8a8e3d84
JDB
414 if (r->linklayer == TC_LINKLAYER_UNAWARE)
415 r->linklayer = __detect_linklayer(r, rtab->data);
1da177e4
LT
416 rtab->next = qdisc_rtab_list;
417 qdisc_rtab_list = rtab;
418 }
419 return rtab;
420}
62e3ba1b 421EXPORT_SYMBOL(qdisc_get_rtab);
1da177e4
LT
422
423void qdisc_put_rtab(struct qdisc_rate_table *tab)
424{
425 struct qdisc_rate_table *rtab, **rtabp;
426
427 if (!tab || --tab->refcnt)
428 return;
429
cc7ec456
ED
430 for (rtabp = &qdisc_rtab_list;
431 (rtab = *rtabp) != NULL;
432 rtabp = &rtab->next) {
1da177e4
LT
433 if (rtab == tab) {
434 *rtabp = rtab->next;
435 kfree(rtab);
436 return;
437 }
438 }
439}
62e3ba1b 440EXPORT_SYMBOL(qdisc_put_rtab);
1da177e4 441
175f9c1b
JK
442static LIST_HEAD(qdisc_stab_list);
443static DEFINE_SPINLOCK(qdisc_stab_lock);
444
445static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
446 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
447 [TCA_STAB_DATA] = { .type = NLA_BINARY },
448};
449
450static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
451{
452 struct nlattr *tb[TCA_STAB_MAX + 1];
453 struct qdisc_size_table *stab;
454 struct tc_sizespec *s;
455 unsigned int tsize = 0;
456 u16 *tab = NULL;
457 int err;
458
459 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
460 if (err < 0)
461 return ERR_PTR(err);
462 if (!tb[TCA_STAB_BASE])
463 return ERR_PTR(-EINVAL);
464
465 s = nla_data(tb[TCA_STAB_BASE]);
466
467 if (s->tsize > 0) {
468 if (!tb[TCA_STAB_DATA])
469 return ERR_PTR(-EINVAL);
470 tab = nla_data(tb[TCA_STAB_DATA]);
471 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
472 }
473
00093fab 474 if (tsize != s->tsize || (!tab && tsize > 0))
175f9c1b
JK
475 return ERR_PTR(-EINVAL);
476
f3b9605d 477 spin_lock(&qdisc_stab_lock);
175f9c1b
JK
478
479 list_for_each_entry(stab, &qdisc_stab_list, list) {
480 if (memcmp(&stab->szopts, s, sizeof(*s)))
481 continue;
482 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
483 continue;
484 stab->refcnt++;
f3b9605d 485 spin_unlock(&qdisc_stab_lock);
175f9c1b
JK
486 return stab;
487 }
488
f3b9605d 489 spin_unlock(&qdisc_stab_lock);
175f9c1b
JK
490
491 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
492 if (!stab)
493 return ERR_PTR(-ENOMEM);
494
495 stab->refcnt = 1;
496 stab->szopts = *s;
497 if (tsize > 0)
498 memcpy(stab->data, tab, tsize * sizeof(u16));
499
f3b9605d 500 spin_lock(&qdisc_stab_lock);
175f9c1b 501 list_add_tail(&stab->list, &qdisc_stab_list);
f3b9605d 502 spin_unlock(&qdisc_stab_lock);
175f9c1b
JK
503
504 return stab;
505}
506
a2da570d
ED
507static void stab_kfree_rcu(struct rcu_head *head)
508{
509 kfree(container_of(head, struct qdisc_size_table, rcu));
510}
511
175f9c1b
JK
512void qdisc_put_stab(struct qdisc_size_table *tab)
513{
514 if (!tab)
515 return;
516
f3b9605d 517 spin_lock(&qdisc_stab_lock);
175f9c1b
JK
518
519 if (--tab->refcnt == 0) {
520 list_del(&tab->list);
a2da570d 521 call_rcu_bh(&tab->rcu, stab_kfree_rcu);
175f9c1b
JK
522 }
523
f3b9605d 524 spin_unlock(&qdisc_stab_lock);
175f9c1b
JK
525}
526EXPORT_SYMBOL(qdisc_put_stab);
527
528static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
529{
530 struct nlattr *nest;
531
532 nest = nla_nest_start(skb, TCA_STAB);
3aa4614d
PM
533 if (nest == NULL)
534 goto nla_put_failure;
1b34ec43
DM
535 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
536 goto nla_put_failure;
175f9c1b
JK
537 nla_nest_end(skb, nest);
538
539 return skb->len;
540
541nla_put_failure:
542 return -1;
543}
544
5a7a5555
JHS
545void __qdisc_calculate_pkt_len(struct sk_buff *skb,
546 const struct qdisc_size_table *stab)
175f9c1b
JK
547{
548 int pkt_len, slot;
549
550 pkt_len = skb->len + stab->szopts.overhead;
551 if (unlikely(!stab->szopts.tsize))
552 goto out;
553
554 slot = pkt_len + stab->szopts.cell_align;
555 if (unlikely(slot < 0))
556 slot = 0;
557
558 slot >>= stab->szopts.cell_log;
559 if (likely(slot < stab->szopts.tsize))
560 pkt_len = stab->data[slot];
561 else
562 pkt_len = stab->data[stab->szopts.tsize - 1] *
563 (slot / stab->szopts.tsize) +
564 stab->data[slot % stab->szopts.tsize];
565
566 pkt_len <<= stab->szopts.size_log;
567out:
568 if (unlikely(pkt_len < 1))
569 pkt_len = 1;
570 qdisc_skb_cb(skb)->pkt_len = pkt_len;
571}
a2da570d 572EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
175f9c1b 573
6e765a00 574void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
b00355db
JP
575{
576 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
cc7ec456
ED
577 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
578 txt, qdisc->ops->id, qdisc->handle >> 16);
b00355db
JP
579 qdisc->flags |= TCQ_F_WARN_NONWC;
580 }
581}
582EXPORT_SYMBOL(qdisc_warn_nonwc);
583
4179477f
PM
584static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
585{
586 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
2fbd3da3 587 timer);
4179477f 588
1e203c1a 589 rcu_read_lock();
8608db03 590 __netif_schedule(qdisc_root(wd->qdisc));
1e203c1a 591 rcu_read_unlock();
1936502d 592
4179477f
PM
593 return HRTIMER_NORESTART;
594}
595
596void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
597{
4a8e320c 598 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
2fbd3da3 599 wd->timer.function = qdisc_watchdog;
4179477f
PM
600 wd->qdisc = qdisc;
601}
602EXPORT_SYMBOL(qdisc_watchdog_init);
603
45f50bed 604void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
4179477f 605{
2540e051
JP
606 if (test_bit(__QDISC_STATE_DEACTIVATED,
607 &qdisc_root_sleeping(wd->qdisc)->state))
608 return;
609
a9efad8b
ED
610 if (wd->last_expires == expires)
611 return;
612
613 wd->last_expires = expires;
46baac38 614 hrtimer_start(&wd->timer,
34c5d292 615 ns_to_ktime(expires),
4a8e320c 616 HRTIMER_MODE_ABS_PINNED);
4179477f 617}
34c5d292 618EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
4179477f
PM
619
620void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
621{
2fbd3da3 622 hrtimer_cancel(&wd->timer);
4179477f
PM
623}
624EXPORT_SYMBOL(qdisc_watchdog_cancel);
1da177e4 625
a94f779f 626static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
6fe1c7a5
PM
627{
628 unsigned int size = n * sizeof(struct hlist_head), i;
629 struct hlist_head *h;
630
631 if (size <= PAGE_SIZE)
632 h = kmalloc(size, GFP_KERNEL);
633 else
634 h = (struct hlist_head *)
635 __get_free_pages(GFP_KERNEL, get_order(size));
636
637 if (h != NULL) {
638 for (i = 0; i < n; i++)
639 INIT_HLIST_HEAD(&h[i]);
640 }
641 return h;
642}
643
644static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
645{
646 unsigned int size = n * sizeof(struct hlist_head);
647
648 if (size <= PAGE_SIZE)
649 kfree(h);
650 else
651 free_pages((unsigned long)h, get_order(size));
652}
653
654void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
655{
656 struct Qdisc_class_common *cl;
b67bfe0d 657 struct hlist_node *next;
6fe1c7a5
PM
658 struct hlist_head *nhash, *ohash;
659 unsigned int nsize, nmask, osize;
660 unsigned int i, h;
661
662 /* Rehash when load factor exceeds 0.75 */
663 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
664 return;
665 nsize = clhash->hashsize * 2;
666 nmask = nsize - 1;
667 nhash = qdisc_class_hash_alloc(nsize);
668 if (nhash == NULL)
669 return;
670
671 ohash = clhash->hash;
672 osize = clhash->hashsize;
673
674 sch_tree_lock(sch);
675 for (i = 0; i < osize; i++) {
b67bfe0d 676 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
6fe1c7a5
PM
677 h = qdisc_class_hash(cl->classid, nmask);
678 hlist_add_head(&cl->hnode, &nhash[h]);
679 }
680 }
681 clhash->hash = nhash;
682 clhash->hashsize = nsize;
683 clhash->hashmask = nmask;
684 sch_tree_unlock(sch);
685
686 qdisc_class_hash_free(ohash, osize);
687}
688EXPORT_SYMBOL(qdisc_class_hash_grow);
689
690int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
691{
692 unsigned int size = 4;
693
694 clhash->hash = qdisc_class_hash_alloc(size);
695 if (clhash->hash == NULL)
696 return -ENOMEM;
697 clhash->hashsize = size;
698 clhash->hashmask = size - 1;
699 clhash->hashelems = 0;
700 return 0;
701}
702EXPORT_SYMBOL(qdisc_class_hash_init);
703
704void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
705{
706 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
707}
708EXPORT_SYMBOL(qdisc_class_hash_destroy);
709
710void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
711 struct Qdisc_class_common *cl)
712{
713 unsigned int h;
714
715 INIT_HLIST_NODE(&cl->hnode);
716 h = qdisc_class_hash(cl->classid, clhash->hashmask);
717 hlist_add_head(&cl->hnode, &clhash->hash[h]);
718 clhash->hashelems++;
719}
720EXPORT_SYMBOL(qdisc_class_hash_insert);
721
722void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
723 struct Qdisc_class_common *cl)
724{
725 hlist_del(&cl->hnode);
726 clhash->hashelems--;
727}
728EXPORT_SYMBOL(qdisc_class_hash_remove);
729
fa0f5aa7
ED
730/* Allocate an unique handle from space managed by kernel
731 * Possible range is [8000-FFFF]:0000 (0x8000 values)
732 */
1da177e4
LT
733static u32 qdisc_alloc_handle(struct net_device *dev)
734{
fa0f5aa7 735 int i = 0x8000;
1da177e4
LT
736 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
737
738 do {
739 autohandle += TC_H_MAKE(0x10000U, 0);
740 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
741 autohandle = TC_H_MAKE(0x80000000U, 0);
fa0f5aa7
ED
742 if (!qdisc_lookup(dev, autohandle))
743 return autohandle;
744 cond_resched();
745 } while (--i > 0);
1da177e4 746
fa0f5aa7 747 return 0;
1da177e4
LT
748}
749
2ccccf5f
WC
750void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
751 unsigned int len)
43effa1e 752{
20fea08b 753 const struct Qdisc_class_ops *cops;
43effa1e
PM
754 unsigned long cl;
755 u32 parentid;
2c8c8e6f 756 int drops;
43effa1e 757
2ccccf5f 758 if (n == 0 && len == 0)
43effa1e 759 return;
2c8c8e6f 760 drops = max_t(int, n, 0);
4eaf3b84 761 rcu_read_lock();
43effa1e 762 while ((parentid = sch->parent)) {
066a3b5b 763 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
4eaf3b84 764 break;
066a3b5b 765
4eaf3b84
ED
766 if (sch->flags & TCQ_F_NOPARENT)
767 break;
768 /* TODO: perform the search on a per txq basis */
5ce2d488 769 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
ffc8fefa 770 if (sch == NULL) {
4eaf3b84
ED
771 WARN_ON_ONCE(parentid != TC_H_ROOT);
772 break;
ffc8fefa 773 }
43effa1e
PM
774 cops = sch->ops->cl_ops;
775 if (cops->qlen_notify) {
776 cl = cops->get(sch, parentid);
777 cops->qlen_notify(sch, cl);
778 cops->put(sch, cl);
779 }
780 sch->q.qlen -= n;
2ccccf5f 781 sch->qstats.backlog -= len;
25331d6c 782 __qdisc_qstats_drop(sch, drops);
43effa1e 783 }
4eaf3b84 784 rcu_read_unlock();
43effa1e 785}
2ccccf5f 786EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
1da177e4 787
7316ae88
TG
788static void notify_and_destroy(struct net *net, struct sk_buff *skb,
789 struct nlmsghdr *n, u32 clid,
99194cff
DM
790 struct Qdisc *old, struct Qdisc *new)
791{
792 if (new || old)
7316ae88 793 qdisc_notify(net, skb, n, clid, old, new);
1da177e4 794
4d8863a2 795 if (old)
99194cff 796 qdisc_destroy(old);
99194cff
DM
797}
798
799/* Graft qdisc "new" to class "classid" of qdisc "parent" or
800 * to device "dev".
801 *
802 * When appropriate send a netlink notification using 'skb'
803 * and "n".
804 *
805 * On success, destroy old qdisc.
1da177e4
LT
806 */
807
808static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
99194cff
DM
809 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
810 struct Qdisc *new, struct Qdisc *old)
1da177e4 811{
99194cff 812 struct Qdisc *q = old;
7316ae88 813 struct net *net = dev_net(dev);
1da177e4 814 int err = 0;
1da177e4 815
10297b99 816 if (parent == NULL) {
99194cff
DM
817 unsigned int i, num_q, ingress;
818
819 ingress = 0;
820 num_q = dev->num_tx_queues;
8d50b53d
DM
821 if ((q && q->flags & TCQ_F_INGRESS) ||
822 (new && new->flags & TCQ_F_INGRESS)) {
99194cff
DM
823 num_q = 1;
824 ingress = 1;
24824a09
ED
825 if (!dev_ingress_queue(dev))
826 return -ENOENT;
99194cff
DM
827 }
828
829 if (dev->flags & IFF_UP)
830 dev_deactivate(dev);
831
86e363dc
WC
832 if (new && new->ops->attach)
833 goto skip;
6ec1c69a 834
99194cff 835 for (i = 0; i < num_q; i++) {
24824a09 836 struct netdev_queue *dev_queue = dev_ingress_queue(dev);
99194cff
DM
837
838 if (!ingress)
839 dev_queue = netdev_get_tx_queue(dev, i);
840
8d50b53d
DM
841 old = dev_graft_qdisc(dev_queue, new);
842 if (new && i > 0)
843 atomic_inc(&new->refcnt);
844
036d6a67
JP
845 if (!ingress)
846 qdisc_destroy(old);
1da177e4 847 }
99194cff 848
86e363dc 849skip:
036d6a67 850 if (!ingress) {
7316ae88
TG
851 notify_and_destroy(net, skb, n, classid,
852 dev->qdisc, new);
036d6a67
JP
853 if (new && !new->ops->attach)
854 atomic_inc(&new->refcnt);
855 dev->qdisc = new ? : &noop_qdisc;
86e363dc
WC
856
857 if (new && new->ops->attach)
858 new->ops->attach(new);
036d6a67 859 } else {
7316ae88 860 notify_and_destroy(net, skb, n, classid, old, new);
036d6a67 861 }
af356afa 862
99194cff
DM
863 if (dev->flags & IFF_UP)
864 dev_activate(dev);
1da177e4 865 } else {
20fea08b 866 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1da177e4 867
c9f1d038
PM
868 err = -EOPNOTSUPP;
869 if (cops && cops->graft) {
1da177e4
LT
870 unsigned long cl = cops->get(parent, classid);
871 if (cl) {
99194cff 872 err = cops->graft(parent, cl, new, &old);
1da177e4 873 cops->put(parent, cl);
c9f1d038
PM
874 } else
875 err = -ENOENT;
1da177e4 876 }
99194cff 877 if (!err)
7316ae88 878 notify_and_destroy(net, skb, n, classid, old, new);
1da177e4
LT
879 }
880 return err;
881}
882
25bfcd5a
JP
883/* lockdep annotation is needed for ingress; egress gets it only for name */
884static struct lock_class_key qdisc_tx_lock;
885static struct lock_class_key qdisc_rx_lock;
886
1da177e4
LT
887/*
888 Allocate and initialize new qdisc.
889
890 Parameters are passed via opt.
891 */
892
5a7a5555
JHS
893static struct Qdisc *qdisc_create(struct net_device *dev,
894 struct netdev_queue *dev_queue,
895 struct Qdisc *p, u32 parent, u32 handle,
896 struct nlattr **tca, int *errp)
1da177e4
LT
897{
898 int err;
1e90474c 899 struct nlattr *kind = tca[TCA_KIND];
1da177e4
LT
900 struct Qdisc *sch;
901 struct Qdisc_ops *ops;
175f9c1b 902 struct qdisc_size_table *stab;
1da177e4
LT
903
904 ops = qdisc_lookup_ops(kind);
95a5afca 905#ifdef CONFIG_MODULES
1da177e4
LT
906 if (ops == NULL && kind != NULL) {
907 char name[IFNAMSIZ];
1e90474c 908 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
1da177e4
LT
909 /* We dropped the RTNL semaphore in order to
910 * perform the module load. So, even if we
911 * succeeded in loading the module we have to
912 * tell the caller to replay the request. We
913 * indicate this using -EAGAIN.
914 * We replay the request because the device may
915 * go away in the mean time.
916 */
917 rtnl_unlock();
918 request_module("sch_%s", name);
919 rtnl_lock();
920 ops = qdisc_lookup_ops(kind);
921 if (ops != NULL) {
922 /* We will try again qdisc_lookup_ops,
923 * so don't keep a reference.
924 */
925 module_put(ops->owner);
926 err = -EAGAIN;
927 goto err_out;
928 }
929 }
930 }
931#endif
932
b9e2cc0f 933 err = -ENOENT;
1da177e4
LT
934 if (ops == NULL)
935 goto err_out;
936
5ce2d488 937 sch = qdisc_alloc(dev_queue, ops);
3d54b82f
TG
938 if (IS_ERR(sch)) {
939 err = PTR_ERR(sch);
1da177e4 940 goto err_out2;
3d54b82f 941 }
1da177e4 942
ffc8fefa
PM
943 sch->parent = parent;
944
3d54b82f 945 if (handle == TC_H_INGRESS) {
1da177e4 946 sch->flags |= TCQ_F_INGRESS;
3d54b82f 947 handle = TC_H_MAKE(TC_H_INGRESS, 0);
25bfcd5a 948 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
fd44de7c 949 } else {
fd44de7c
PM
950 if (handle == 0) {
951 handle = qdisc_alloc_handle(dev);
952 err = -ENOMEM;
953 if (handle == 0)
954 goto err_out3;
955 }
25bfcd5a 956 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
1abbe139 957 if (!netif_is_multiqueue(dev))
225734de 958 sch->flags |= TCQ_F_ONETXQUEUE;
1da177e4
LT
959 }
960
3d54b82f 961 sch->handle = handle;
1da177e4 962
1e90474c 963 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
22e0f8b9
JF
964 if (qdisc_is_percpu_stats(sch)) {
965 sch->cpu_bstats =
7c1c97d5 966 netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
22e0f8b9
JF
967 if (!sch->cpu_bstats)
968 goto err_out4;
b0ab6f92
JF
969
970 sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
971 if (!sch->cpu_qstats)
972 goto err_out4;
22e0f8b9
JF
973 }
974
175f9c1b
JK
975 if (tca[TCA_STAB]) {
976 stab = qdisc_get_stab(tca[TCA_STAB]);
977 if (IS_ERR(stab)) {
978 err = PTR_ERR(stab);
7c64b9f3 979 goto err_out4;
175f9c1b 980 }
a2da570d 981 rcu_assign_pointer(sch->stab, stab);
175f9c1b 982 }
1e90474c 983 if (tca[TCA_RATE]) {
edb09eb1 984 seqcount_t *running;
f6f9b93f 985
23bcf634
PM
986 err = -EOPNOTSUPP;
987 if (sch->flags & TCQ_F_MQROOT)
988 goto err_out4;
989
f6f9b93f 990 if ((sch->parent != TC_H_ROOT) &&
23bcf634
PM
991 !(sch->flags & TCQ_F_INGRESS) &&
992 (!p || !(p->flags & TCQ_F_MQROOT)))
edb09eb1 993 running = qdisc_root_sleeping_running(sch);
f6f9b93f 994 else
edb09eb1 995 running = &sch->running;
f6f9b93f 996
22e0f8b9
JF
997 err = gen_new_estimator(&sch->bstats,
998 sch->cpu_bstats,
999 &sch->rate_est,
edb09eb1
ED
1000 NULL,
1001 running,
22e0f8b9 1002 tca[TCA_RATE]);
23bcf634
PM
1003 if (err)
1004 goto err_out4;
023e09a7 1005 }
f6e0b239 1006
59cc1f61 1007 qdisc_hash_add(sch);
1da177e4 1008
1da177e4
LT
1009 return sch;
1010 }
1011err_out3:
1012 dev_put(dev);
3d54b82f 1013 kfree((char *) sch - sch->padded);
1da177e4
LT
1014err_out2:
1015 module_put(ops->owner);
1016err_out:
1017 *errp = err;
1da177e4 1018 return NULL;
23bcf634
PM
1019
1020err_out4:
22e0f8b9 1021 free_percpu(sch->cpu_bstats);
b0ab6f92 1022 free_percpu(sch->cpu_qstats);
23bcf634
PM
1023 /*
1024 * Any broken qdiscs that would require a ops->reset() here?
1025 * The qdisc was never in action so it shouldn't be necessary.
1026 */
a2da570d 1027 qdisc_put_stab(rtnl_dereference(sch->stab));
23bcf634
PM
1028 if (ops->destroy)
1029 ops->destroy(sch);
1030 goto err_out3;
1da177e4
LT
1031}
1032
1e90474c 1033static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
1da177e4 1034{
a2da570d 1035 struct qdisc_size_table *ostab, *stab = NULL;
175f9c1b 1036 int err = 0;
1da177e4 1037
175f9c1b 1038 if (tca[TCA_OPTIONS]) {
1da177e4
LT
1039 if (sch->ops->change == NULL)
1040 return -EINVAL;
1e90474c 1041 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
1da177e4
LT
1042 if (err)
1043 return err;
1044 }
175f9c1b
JK
1045
1046 if (tca[TCA_STAB]) {
1047 stab = qdisc_get_stab(tca[TCA_STAB]);
1048 if (IS_ERR(stab))
1049 return PTR_ERR(stab);
1050 }
1051
a2da570d
ED
1052 ostab = rtnl_dereference(sch->stab);
1053 rcu_assign_pointer(sch->stab, stab);
1054 qdisc_put_stab(ostab);
175f9c1b 1055
23bcf634 1056 if (tca[TCA_RATE]) {
71bcb09a
SH
1057 /* NB: ignores errors from replace_estimator
1058 because change can't be undone. */
23bcf634
PM
1059 if (sch->flags & TCQ_F_MQROOT)
1060 goto out;
22e0f8b9
JF
1061 gen_replace_estimator(&sch->bstats,
1062 sch->cpu_bstats,
1063 &sch->rate_est,
edb09eb1
ED
1064 NULL,
1065 qdisc_root_sleeping_running(sch),
22e0f8b9 1066 tca[TCA_RATE]);
23bcf634
PM
1067 }
1068out:
1da177e4
LT
1069 return 0;
1070}
1071
cc7ec456
ED
1072struct check_loop_arg {
1073 struct qdisc_walker w;
1da177e4
LT
1074 struct Qdisc *p;
1075 int depth;
1076};
1077
5a7a5555
JHS
1078static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1079 struct qdisc_walker *w);
1da177e4
LT
1080
1081static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1082{
1083 struct check_loop_arg arg;
1084
1085 if (q->ops->cl_ops == NULL)
1086 return 0;
1087
1088 arg.w.stop = arg.w.skip = arg.w.count = 0;
1089 arg.w.fn = check_loop_fn;
1090 arg.depth = depth;
1091 arg.p = p;
1092 q->ops->cl_ops->walk(q, &arg.w);
1093 return arg.w.stop ? -ELOOP : 0;
1094}
1095
1096static int
1097check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1098{
1099 struct Qdisc *leaf;
20fea08b 1100 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1da177e4
LT
1101 struct check_loop_arg *arg = (struct check_loop_arg *)w;
1102
1103 leaf = cops->leaf(q, cl);
1104 if (leaf) {
1105 if (leaf == arg->p || arg->depth > 7)
1106 return -ELOOP;
1107 return check_loop(leaf, arg->p, arg->depth + 1);
1108 }
1109 return 0;
1110}
1111
1112/*
1113 * Delete/get qdisc.
1114 */
1115
661d2967 1116static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
1da177e4 1117{
3b1e0a65 1118 struct net *net = sock_net(skb->sk);
02ef22ca 1119 struct tcmsg *tcm = nlmsg_data(n);
1e90474c 1120 struct nlattr *tca[TCA_MAX + 1];
1da177e4 1121 struct net_device *dev;
de179c8c 1122 u32 clid;
1da177e4
LT
1123 struct Qdisc *q = NULL;
1124 struct Qdisc *p = NULL;
1125 int err;
1126
4e8bbb81 1127 if ((n->nlmsg_type != RTM_GETQDISC) &&
5f013c9b 1128 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
dfc47ef8
EB
1129 return -EPERM;
1130
1e90474c
PM
1131 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1132 if (err < 0)
1133 return err;
1134
de179c8c
H
1135 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1136 if (!dev)
1137 return -ENODEV;
1138
1139 clid = tcm->tcm_parent;
1da177e4
LT
1140 if (clid) {
1141 if (clid != TC_H_ROOT) {
1142 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
cc7ec456
ED
1143 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1144 if (!p)
1da177e4
LT
1145 return -ENOENT;
1146 q = qdisc_leaf(p, clid);
cc7ec456
ED
1147 } else if (dev_ingress_queue(dev)) {
1148 q = dev_ingress_queue(dev)->qdisc_sleeping;
10297b99 1149 }
1da177e4 1150 } else {
af356afa 1151 q = dev->qdisc;
1da177e4
LT
1152 }
1153 if (!q)
1154 return -ENOENT;
1155
1156 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1157 return -EINVAL;
1158 } else {
cc7ec456
ED
1159 q = qdisc_lookup(dev, tcm->tcm_handle);
1160 if (!q)
1da177e4
LT
1161 return -ENOENT;
1162 }
1163
1e90474c 1164 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1da177e4
LT
1165 return -EINVAL;
1166
1167 if (n->nlmsg_type == RTM_DELQDISC) {
1168 if (!clid)
1169 return -EINVAL;
1170 if (q->handle == 0)
1171 return -ENOENT;
cc7ec456
ED
1172 err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1173 if (err != 0)
1da177e4 1174 return err;
1da177e4 1175 } else {
7316ae88 1176 qdisc_notify(net, skb, n, clid, NULL, q);
1da177e4
LT
1177 }
1178 return 0;
1179}
1180
1181/*
cc7ec456 1182 * Create/change qdisc.
1da177e4
LT
1183 */
1184
661d2967 1185static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
1da177e4 1186{
3b1e0a65 1187 struct net *net = sock_net(skb->sk);
1da177e4 1188 struct tcmsg *tcm;
1e90474c 1189 struct nlattr *tca[TCA_MAX + 1];
1da177e4
LT
1190 struct net_device *dev;
1191 u32 clid;
1192 struct Qdisc *q, *p;
1193 int err;
1194
5f013c9b 1195 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
dfc47ef8
EB
1196 return -EPERM;
1197
1da177e4
LT
1198replay:
1199 /* Reinit, just in case something touches this. */
de179c8c
H
1200 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1201 if (err < 0)
1202 return err;
1203
02ef22ca 1204 tcm = nlmsg_data(n);
1da177e4
LT
1205 clid = tcm->tcm_parent;
1206 q = p = NULL;
1207
cc7ec456
ED
1208 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1209 if (!dev)
1da177e4
LT
1210 return -ENODEV;
1211
1e90474c 1212
1da177e4
LT
1213 if (clid) {
1214 if (clid != TC_H_ROOT) {
1215 if (clid != TC_H_INGRESS) {
cc7ec456
ED
1216 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1217 if (!p)
1da177e4
LT
1218 return -ENOENT;
1219 q = qdisc_leaf(p, clid);
cc7ec456
ED
1220 } else if (dev_ingress_queue_create(dev)) {
1221 q = dev_ingress_queue(dev)->qdisc_sleeping;
1da177e4
LT
1222 }
1223 } else {
af356afa 1224 q = dev->qdisc;
1da177e4
LT
1225 }
1226
1227 /* It may be default qdisc, ignore it */
1228 if (q && q->handle == 0)
1229 q = NULL;
1230
1231 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1232 if (tcm->tcm_handle) {
cc7ec456 1233 if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
1da177e4
LT
1234 return -EEXIST;
1235 if (TC_H_MIN(tcm->tcm_handle))
1236 return -EINVAL;
cc7ec456
ED
1237 q = qdisc_lookup(dev, tcm->tcm_handle);
1238 if (!q)
1da177e4 1239 goto create_n_graft;
cc7ec456 1240 if (n->nlmsg_flags & NLM_F_EXCL)
1da177e4 1241 return -EEXIST;
1e90474c 1242 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1da177e4
LT
1243 return -EINVAL;
1244 if (q == p ||
1245 (p && check_loop(q, p, 0)))
1246 return -ELOOP;
1247 atomic_inc(&q->refcnt);
1248 goto graft;
1249 } else {
cc7ec456 1250 if (!q)
1da177e4
LT
1251 goto create_n_graft;
1252
1253 /* This magic test requires explanation.
1254 *
1255 * We know, that some child q is already
1256 * attached to this parent and have choice:
1257 * either to change it or to create/graft new one.
1258 *
1259 * 1. We are allowed to create/graft only
1260 * if CREATE and REPLACE flags are set.
1261 *
1262 * 2. If EXCL is set, requestor wanted to say,
1263 * that qdisc tcm_handle is not expected
1264 * to exist, so that we choose create/graft too.
1265 *
1266 * 3. The last case is when no flags are set.
1267 * Alas, it is sort of hole in API, we
1268 * cannot decide what to do unambiguously.
1269 * For now we select create/graft, if
1270 * user gave KIND, which does not match existing.
1271 */
cc7ec456
ED
1272 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1273 (n->nlmsg_flags & NLM_F_REPLACE) &&
1274 ((n->nlmsg_flags & NLM_F_EXCL) ||
1e90474c
PM
1275 (tca[TCA_KIND] &&
1276 nla_strcmp(tca[TCA_KIND], q->ops->id))))
1da177e4
LT
1277 goto create_n_graft;
1278 }
1279 }
1280 } else {
1281 if (!tcm->tcm_handle)
1282 return -EINVAL;
1283 q = qdisc_lookup(dev, tcm->tcm_handle);
1284 }
1285
1286 /* Change qdisc parameters */
1287 if (q == NULL)
1288 return -ENOENT;
cc7ec456 1289 if (n->nlmsg_flags & NLM_F_EXCL)
1da177e4 1290 return -EEXIST;
1e90474c 1291 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1da177e4
LT
1292 return -EINVAL;
1293 err = qdisc_change(q, tca);
1294 if (err == 0)
7316ae88 1295 qdisc_notify(net, skb, n, clid, NULL, q);
1da177e4
LT
1296 return err;
1297
1298create_n_graft:
cc7ec456 1299 if (!(n->nlmsg_flags & NLM_F_CREATE))
1da177e4 1300 return -ENOENT;
24824a09
ED
1301 if (clid == TC_H_INGRESS) {
1302 if (dev_ingress_queue(dev))
1303 q = qdisc_create(dev, dev_ingress_queue(dev), p,
1304 tcm->tcm_parent, tcm->tcm_parent,
1305 tca, &err);
1306 else
1307 err = -ENOENT;
1308 } else {
926e61b7 1309 struct netdev_queue *dev_queue;
6ec1c69a
DM
1310
1311 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
926e61b7
JP
1312 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1313 else if (p)
1314 dev_queue = p->dev_queue;
1315 else
1316 dev_queue = netdev_get_tx_queue(dev, 0);
6ec1c69a 1317
926e61b7 1318 q = qdisc_create(dev, dev_queue, p,
bb949fbd 1319 tcm->tcm_parent, tcm->tcm_handle,
ffc8fefa 1320 tca, &err);
6ec1c69a 1321 }
1da177e4
LT
1322 if (q == NULL) {
1323 if (err == -EAGAIN)
1324 goto replay;
1325 return err;
1326 }
1327
1328graft:
e5befbd9
IJ
1329 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1330 if (err) {
1331 if (q)
1332 qdisc_destroy(q);
1333 return err;
1da177e4 1334 }
e5befbd9 1335
1da177e4
LT
1336 return 0;
1337}
1338
1339static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
15e47304 1340 u32 portid, u32 seq, u16 flags, int event)
1da177e4 1341{
22e0f8b9 1342 struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
b0ab6f92 1343 struct gnet_stats_queue __percpu *cpu_qstats = NULL;
1da177e4
LT
1344 struct tcmsg *tcm;
1345 struct nlmsghdr *nlh;
27a884dc 1346 unsigned char *b = skb_tail_pointer(skb);
1da177e4 1347 struct gnet_dump d;
a2da570d 1348 struct qdisc_size_table *stab;
64015853 1349 __u32 qlen;
1da177e4 1350
fba373d2 1351 cond_resched();
15e47304 1352 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
02ef22ca
DM
1353 if (!nlh)
1354 goto out_nlmsg_trim;
1355 tcm = nlmsg_data(nlh);
1da177e4 1356 tcm->tcm_family = AF_UNSPEC;
9ef1d4c7
PM
1357 tcm->tcm__pad1 = 0;
1358 tcm->tcm__pad2 = 0;
5ce2d488 1359 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1da177e4
LT
1360 tcm->tcm_parent = clid;
1361 tcm->tcm_handle = q->handle;
1362 tcm->tcm_info = atomic_read(&q->refcnt);
1b34ec43
DM
1363 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1364 goto nla_put_failure;
1da177e4 1365 if (q->ops->dump && q->ops->dump(q, skb) < 0)
1e90474c 1366 goto nla_put_failure;
64015853 1367 qlen = q->q.qlen;
1da177e4 1368
a2da570d
ED
1369 stab = rtnl_dereference(q->stab);
1370 if (stab && qdisc_dump_stab(skb, stab) < 0)
175f9c1b
JK
1371 goto nla_put_failure;
1372
102396ae 1373 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
edb09eb1 1374 NULL, &d, TCA_PAD) < 0)
1e90474c 1375 goto nla_put_failure;
1da177e4
LT
1376
1377 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
1e90474c 1378 goto nla_put_failure;
1da177e4 1379
b0ab6f92 1380 if (qdisc_is_percpu_stats(q)) {
22e0f8b9 1381 cpu_bstats = q->cpu_bstats;
b0ab6f92
JF
1382 cpu_qstats = q->cpu_qstats;
1383 }
22e0f8b9 1384
edb09eb1
ED
1385 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
1386 &d, cpu_bstats, &q->bstats) < 0 ||
d250a5f9 1387 gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
b0ab6f92 1388 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
1e90474c 1389 goto nla_put_failure;
10297b99 1390
1da177e4 1391 if (gnet_stats_finish_copy(&d) < 0)
1e90474c 1392 goto nla_put_failure;
10297b99 1393
27a884dc 1394 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1da177e4
LT
1395 return skb->len;
1396
02ef22ca 1397out_nlmsg_trim:
1e90474c 1398nla_put_failure:
dc5fc579 1399 nlmsg_trim(skb, b);
1da177e4
LT
1400 return -1;
1401}
1402
53b0f080
ED
1403static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1404{
1405 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1406}
1407
7316ae88
TG
1408static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1409 struct nlmsghdr *n, u32 clid,
1410 struct Qdisc *old, struct Qdisc *new)
1da177e4
LT
1411{
1412 struct sk_buff *skb;
15e47304 1413 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1da177e4
LT
1414
1415 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1416 if (!skb)
1417 return -ENOBUFS;
1418
53b0f080 1419 if (old && !tc_qdisc_dump_ignore(old)) {
15e47304 1420 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
cc7ec456 1421 0, RTM_DELQDISC) < 0)
1da177e4
LT
1422 goto err_out;
1423 }
53b0f080 1424 if (new && !tc_qdisc_dump_ignore(new)) {
15e47304 1425 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
cc7ec456 1426 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1da177e4
LT
1427 goto err_out;
1428 }
1429
1430 if (skb->len)
15e47304 1431 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
cc7ec456 1432 n->nlmsg_flags & NLM_F_ECHO);
1da177e4
LT
1433
1434err_out:
1435 kfree_skb(skb);
1436 return -EINVAL;
1437}
1438
30723673
DM
1439static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1440 struct netlink_callback *cb,
ea327469 1441 int *q_idx_p, int s_q_idx, bool recur)
30723673
DM
1442{
1443 int ret = 0, q_idx = *q_idx_p;
1444 struct Qdisc *q;
59cc1f61 1445 int b;
30723673
DM
1446
1447 if (!root)
1448 return 0;
1449
1450 q = root;
1451 if (q_idx < s_q_idx) {
1452 q_idx++;
1453 } else {
1454 if (!tc_qdisc_dump_ignore(q) &&
15e47304 1455 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
5a7a5555
JHS
1456 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1457 RTM_NEWQDISC) <= 0)
30723673
DM
1458 goto done;
1459 q_idx++;
1460 }
69012ae4 1461
ea327469
JK
1462 /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1463 * itself has already been dumped.
1464 *
1465 * If we've already dumped the top-level (ingress) qdisc above and the global
1466 * qdisc hashtable, we don't want to hit it again
1467 */
1468 if (!qdisc_dev(root) || !recur)
69012ae4
JK
1469 goto out;
1470
59cc1f61 1471 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
30723673
DM
1472 if (q_idx < s_q_idx) {
1473 q_idx++;
1474 continue;
1475 }
cc7ec456 1476 if (!tc_qdisc_dump_ignore(q) &&
15e47304 1477 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
5a7a5555
JHS
1478 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1479 RTM_NEWQDISC) <= 0)
30723673
DM
1480 goto done;
1481 q_idx++;
1482 }
1483
1484out:
1485 *q_idx_p = q_idx;
1486 return ret;
1487done:
1488 ret = -1;
1489 goto out;
1490}
1491
1da177e4
LT
1492static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1493{
3b1e0a65 1494 struct net *net = sock_net(skb->sk);
1da177e4
LT
1495 int idx, q_idx;
1496 int s_idx, s_q_idx;
1497 struct net_device *dev;
1da177e4
LT
1498
1499 s_idx = cb->args[0];
1500 s_q_idx = q_idx = cb->args[1];
f1e9016d 1501
7562f876 1502 idx = 0;
15dc36eb
ED
1503 ASSERT_RTNL();
1504 for_each_netdev(net, dev) {
30723673
DM
1505 struct netdev_queue *dev_queue;
1506
1da177e4 1507 if (idx < s_idx)
7562f876 1508 goto cont;
1da177e4
LT
1509 if (idx > s_idx)
1510 s_q_idx = 0;
1da177e4 1511 q_idx = 0;
30723673 1512
5a7a5555
JHS
1513 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1514 true) < 0)
30723673
DM
1515 goto done;
1516
24824a09
ED
1517 dev_queue = dev_ingress_queue(dev);
1518 if (dev_queue &&
1519 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
ea327469 1520 &q_idx, s_q_idx, false) < 0)
30723673
DM
1521 goto done;
1522
7562f876
PE
1523cont:
1524 idx++;
1da177e4
LT
1525 }
1526
1527done:
1da177e4
LT
1528 cb->args[0] = idx;
1529 cb->args[1] = q_idx;
1530
1531 return skb->len;
1532}
1533
1534
1535
1536/************************************************
1537 * Traffic classes manipulation. *
1538 ************************************************/
1539
1540
1541
661d2967 1542static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
1da177e4 1543{
3b1e0a65 1544 struct net *net = sock_net(skb->sk);
02ef22ca 1545 struct tcmsg *tcm = nlmsg_data(n);
1e90474c 1546 struct nlattr *tca[TCA_MAX + 1];
1da177e4
LT
1547 struct net_device *dev;
1548 struct Qdisc *q = NULL;
20fea08b 1549 const struct Qdisc_class_ops *cops;
1da177e4
LT
1550 unsigned long cl = 0;
1551 unsigned long new_cl;
de179c8c
H
1552 u32 portid;
1553 u32 clid;
1554 u32 qid;
1da177e4
LT
1555 int err;
1556
4e8bbb81 1557 if ((n->nlmsg_type != RTM_GETTCLASS) &&
5f013c9b 1558 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
dfc47ef8
EB
1559 return -EPERM;
1560
1e90474c
PM
1561 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1562 if (err < 0)
1563 return err;
1564
de179c8c
H
1565 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1566 if (!dev)
1567 return -ENODEV;
1568
1da177e4
LT
1569 /*
1570 parent == TC_H_UNSPEC - unspecified parent.
1571 parent == TC_H_ROOT - class is root, which has no parent.
1572 parent == X:0 - parent is root class.
1573 parent == X:Y - parent is a node in hierarchy.
1574 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1575
1576 handle == 0:0 - generate handle from kernel pool.
1577 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1578 handle == X:Y - clear.
1579 handle == X:0 - root class.
1580 */
1581
1582 /* Step 1. Determine qdisc handle X:0 */
1583
de179c8c
H
1584 portid = tcm->tcm_parent;
1585 clid = tcm->tcm_handle;
1586 qid = TC_H_MAJ(clid);
1587
15e47304
EB
1588 if (portid != TC_H_ROOT) {
1589 u32 qid1 = TC_H_MAJ(portid);
1da177e4
LT
1590
1591 if (qid && qid1) {
1592 /* If both majors are known, they must be identical. */
1593 if (qid != qid1)
1594 return -EINVAL;
1595 } else if (qid1) {
1596 qid = qid1;
1597 } else if (qid == 0)
af356afa 1598 qid = dev->qdisc->handle;
1da177e4
LT
1599
1600 /* Now qid is genuine qdisc handle consistent
cc7ec456
ED
1601 * both with parent and child.
1602 *
15e47304 1603 * TC_H_MAJ(portid) still may be unspecified, complete it now.
1da177e4 1604 */
15e47304
EB
1605 if (portid)
1606 portid = TC_H_MAKE(qid, portid);
1da177e4
LT
1607 } else {
1608 if (qid == 0)
af356afa 1609 qid = dev->qdisc->handle;
1da177e4
LT
1610 }
1611
1612 /* OK. Locate qdisc */
cc7ec456
ED
1613 q = qdisc_lookup(dev, qid);
1614 if (!q)
1da177e4
LT
1615 return -ENOENT;
1616
1617 /* An check that it supports classes */
1618 cops = q->ops->cl_ops;
1619 if (cops == NULL)
1620 return -EINVAL;
1621
1622 /* Now try to get class */
1623 if (clid == 0) {
15e47304 1624 if (portid == TC_H_ROOT)
1da177e4
LT
1625 clid = qid;
1626 } else
1627 clid = TC_H_MAKE(qid, clid);
1628
1629 if (clid)
1630 cl = cops->get(q, clid);
1631
1632 if (cl == 0) {
1633 err = -ENOENT;
cc7ec456
ED
1634 if (n->nlmsg_type != RTM_NEWTCLASS ||
1635 !(n->nlmsg_flags & NLM_F_CREATE))
1da177e4
LT
1636 goto out;
1637 } else {
1638 switch (n->nlmsg_type) {
10297b99 1639 case RTM_NEWTCLASS:
1da177e4 1640 err = -EEXIST;
cc7ec456 1641 if (n->nlmsg_flags & NLM_F_EXCL)
1da177e4
LT
1642 goto out;
1643 break;
1644 case RTM_DELTCLASS:
de6d5cdf
PM
1645 err = -EOPNOTSUPP;
1646 if (cops->delete)
1647 err = cops->delete(q, cl);
1da177e4 1648 if (err == 0)
5a7a5555
JHS
1649 tclass_notify(net, skb, n, q, cl,
1650 RTM_DELTCLASS);
1da177e4
LT
1651 goto out;
1652 case RTM_GETTCLASS:
7316ae88 1653 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
1da177e4
LT
1654 goto out;
1655 default:
1656 err = -EINVAL;
1657 goto out;
1658 }
1659 }
1660
1661 new_cl = cl;
de6d5cdf
PM
1662 err = -EOPNOTSUPP;
1663 if (cops->change)
15e47304 1664 err = cops->change(q, clid, portid, tca, &new_cl);
1da177e4 1665 if (err == 0)
7316ae88 1666 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
1da177e4
LT
1667
1668out:
1669 if (cl)
1670 cops->put(q, cl);
1671
1672 return err;
1673}
1674
1675
1676static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1677 unsigned long cl,
15e47304 1678 u32 portid, u32 seq, u16 flags, int event)
1da177e4
LT
1679{
1680 struct tcmsg *tcm;
1681 struct nlmsghdr *nlh;
27a884dc 1682 unsigned char *b = skb_tail_pointer(skb);
1da177e4 1683 struct gnet_dump d;
20fea08b 1684 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1da177e4 1685
fba373d2 1686 cond_resched();
15e47304 1687 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
02ef22ca
DM
1688 if (!nlh)
1689 goto out_nlmsg_trim;
1690 tcm = nlmsg_data(nlh);
1da177e4 1691 tcm->tcm_family = AF_UNSPEC;
16ebb5e0
ED
1692 tcm->tcm__pad1 = 0;
1693 tcm->tcm__pad2 = 0;
5ce2d488 1694 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1da177e4
LT
1695 tcm->tcm_parent = q->handle;
1696 tcm->tcm_handle = q->handle;
1697 tcm->tcm_info = 0;
1b34ec43
DM
1698 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1699 goto nla_put_failure;
1da177e4 1700 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1e90474c 1701 goto nla_put_failure;
1da177e4 1702
102396ae 1703 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
edb09eb1 1704 NULL, &d, TCA_PAD) < 0)
1e90474c 1705 goto nla_put_failure;
1da177e4
LT
1706
1707 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1e90474c 1708 goto nla_put_failure;
1da177e4
LT
1709
1710 if (gnet_stats_finish_copy(&d) < 0)
1e90474c 1711 goto nla_put_failure;
1da177e4 1712
27a884dc 1713 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1da177e4
LT
1714 return skb->len;
1715
02ef22ca 1716out_nlmsg_trim:
1e90474c 1717nla_put_failure:
dc5fc579 1718 nlmsg_trim(skb, b);
1da177e4
LT
1719 return -1;
1720}
1721
7316ae88
TG
1722static int tclass_notify(struct net *net, struct sk_buff *oskb,
1723 struct nlmsghdr *n, struct Qdisc *q,
1724 unsigned long cl, int event)
1da177e4
LT
1725{
1726 struct sk_buff *skb;
15e47304 1727 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1da177e4
LT
1728
1729 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1730 if (!skb)
1731 return -ENOBUFS;
1732
15e47304 1733 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1da177e4
LT
1734 kfree_skb(skb);
1735 return -EINVAL;
1736 }
1737
15e47304 1738 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
cc7ec456 1739 n->nlmsg_flags & NLM_F_ECHO);
1da177e4
LT
1740}
1741
cc7ec456
ED
1742struct qdisc_dump_args {
1743 struct qdisc_walker w;
1744 struct sk_buff *skb;
1745 struct netlink_callback *cb;
1da177e4
LT
1746};
1747
5a7a5555
JHS
1748static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
1749 struct qdisc_walker *arg)
1da177e4
LT
1750{
1751 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1752
15e47304 1753 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
5a7a5555
JHS
1754 a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1755 RTM_NEWTCLASS);
1da177e4
LT
1756}
1757
30723673
DM
1758static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1759 struct tcmsg *tcm, struct netlink_callback *cb,
1760 int *t_p, int s_t)
1761{
1762 struct qdisc_dump_args arg;
1763
1764 if (tc_qdisc_dump_ignore(q) ||
1765 *t_p < s_t || !q->ops->cl_ops ||
1766 (tcm->tcm_parent &&
1767 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1768 (*t_p)++;
1769 return 0;
1770 }
1771 if (*t_p > s_t)
1772 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1773 arg.w.fn = qdisc_class_dump;
1774 arg.skb = skb;
1775 arg.cb = cb;
1776 arg.w.stop = 0;
1777 arg.w.skip = cb->args[1];
1778 arg.w.count = 0;
1779 q->ops->cl_ops->walk(q, &arg.w);
1780 cb->args[1] = arg.w.count;
1781 if (arg.w.stop)
1782 return -1;
1783 (*t_p)++;
1784 return 0;
1785}
1786
1787static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1788 struct tcmsg *tcm, struct netlink_callback *cb,
1789 int *t_p, int s_t)
1790{
1791 struct Qdisc *q;
59cc1f61 1792 int b;
30723673
DM
1793
1794 if (!root)
1795 return 0;
1796
1797 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1798 return -1;
1799
69012ae4
JK
1800 if (!qdisc_dev(root))
1801 return 0;
1802
59cc1f61 1803 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
30723673
DM
1804 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1805 return -1;
1806 }
1807
1808 return 0;
1809}
1810
1da177e4
LT
1811static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1812{
02ef22ca 1813 struct tcmsg *tcm = nlmsg_data(cb->nlh);
3b1e0a65 1814 struct net *net = sock_net(skb->sk);
30723673 1815 struct netdev_queue *dev_queue;
1da177e4 1816 struct net_device *dev;
30723673 1817 int t, s_t;
1da177e4 1818
573ce260 1819 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1da177e4 1820 return 0;
cc7ec456
ED
1821 dev = dev_get_by_index(net, tcm->tcm_ifindex);
1822 if (!dev)
1da177e4
LT
1823 return 0;
1824
1825 s_t = cb->args[0];
1826 t = 0;
1827
af356afa 1828 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
30723673
DM
1829 goto done;
1830
24824a09
ED
1831 dev_queue = dev_ingress_queue(dev);
1832 if (dev_queue &&
1833 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1834 &t, s_t) < 0)
30723673 1835 goto done;
1da177e4 1836
30723673 1837done:
1da177e4
LT
1838 cb->args[0] = t;
1839
1840 dev_put(dev);
1841 return skb->len;
1842}
1843
1844/* Main classifier routine: scans classifier chain attached
cc7ec456
ED
1845 * to this qdisc, (optionally) tests for protocol and asks
1846 * specific classifiers.
1da177e4 1847 */
3b3ae880
DB
1848int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
1849 struct tcf_result *res, bool compat_mode)
73ca4918 1850{
d8b9605d 1851 __be16 protocol = tc_skb_protocol(skb);
3b3ae880
DB
1852#ifdef CONFIG_NET_CLS_ACT
1853 const struct tcf_proto *old_tp = tp;
1854 int limit = 0;
73ca4918 1855
3b3ae880
DB
1856reclassify:
1857#endif
25d8c0d5 1858 for (; tp; tp = rcu_dereference_bh(tp->next)) {
3b3ae880
DB
1859 int err;
1860
cc7ec456
ED
1861 if (tp->protocol != protocol &&
1862 tp->protocol != htons(ETH_P_ALL))
1863 continue;
cc7ec456 1864
3b3ae880
DB
1865 err = tp->classify(skb, tp, res);
1866#ifdef CONFIG_NET_CLS_ACT
c1b3b199 1867 if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode))
3b3ae880
DB
1868 goto reset;
1869#endif
e578d9c0 1870 if (err >= 0)
73ca4918 1871 return err;
73ca4918 1872 }
73ca4918 1873
7e6e18fb 1874 return TC_ACT_UNSPEC; /* signal: continue lookup */
1da177e4 1875#ifdef CONFIG_NET_CLS_ACT
3b3ae880
DB
1876reset:
1877 if (unlikely(limit++ >= MAX_REC_LOOP)) {
c1b3b199
DB
1878 net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
1879 tp->q->ops->id, tp->prio & 0xffff,
1880 ntohs(tp->protocol));
3b3ae880 1881 return TC_ACT_SHOT;
1da177e4 1882 }
3b3ae880
DB
1883
1884 tp = old_tp;
619fe326 1885 protocol = tc_skb_protocol(skb);
3b3ae880 1886 goto reclassify;
73ca4918 1887#endif
1da177e4 1888}
73ca4918 1889EXPORT_SYMBOL(tc_classify);
1da177e4 1890
1e052be6 1891bool tcf_destroy(struct tcf_proto *tp, bool force)
a48b5a61 1892{
1e052be6
CW
1893 if (tp->ops->destroy(tp, force)) {
1894 module_put(tp->ops->owner);
1895 kfree_rcu(tp, rcu);
1896 return true;
1897 }
1898
1899 return false;
a48b5a61
PM
1900}
1901
25d8c0d5 1902void tcf_destroy_chain(struct tcf_proto __rcu **fl)
a48b5a61
PM
1903{
1904 struct tcf_proto *tp;
1905
25d8c0d5
JF
1906 while ((tp = rtnl_dereference(*fl)) != NULL) {
1907 RCU_INIT_POINTER(*fl, tp->next);
1e052be6 1908 tcf_destroy(tp, true);
a48b5a61
PM
1909 }
1910}
1911EXPORT_SYMBOL(tcf_destroy_chain);
1912
1da177e4
LT
1913#ifdef CONFIG_PROC_FS
1914static int psched_show(struct seq_file *seq, void *v)
1915{
1916 seq_printf(seq, "%08x %08x %08x %08x\n",
ca44d6e6 1917 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
514bca32 1918 1000000,
1e317688 1919 (u32)NSEC_PER_SEC / hrtimer_resolution);
1da177e4
LT
1920
1921 return 0;
1922}
1923
1924static int psched_open(struct inode *inode, struct file *file)
1925{
7e5ab157 1926 return single_open(file, psched_show, NULL);
1da177e4
LT
1927}
1928
da7071d7 1929static const struct file_operations psched_fops = {
1da177e4
LT
1930 .owner = THIS_MODULE,
1931 .open = psched_open,
1932 .read = seq_read,
1933 .llseek = seq_lseek,
1934 .release = single_release,
10297b99 1935};
7316ae88
TG
1936
1937static int __net_init psched_net_init(struct net *net)
1938{
1939 struct proc_dir_entry *e;
1940
d4beaa66 1941 e = proc_create("psched", 0, net->proc_net, &psched_fops);
7316ae88
TG
1942 if (e == NULL)
1943 return -ENOMEM;
1944
1945 return 0;
1946}
1947
1948static void __net_exit psched_net_exit(struct net *net)
1949{
ece31ffd 1950 remove_proc_entry("psched", net->proc_net);
7316ae88
TG
1951}
1952#else
1953static int __net_init psched_net_init(struct net *net)
1954{
1955 return 0;
1956}
1957
1958static void __net_exit psched_net_exit(struct net *net)
1959{
1960}
1da177e4
LT
1961#endif
1962
7316ae88
TG
1963static struct pernet_operations psched_net_ops = {
1964 .init = psched_net_init,
1965 .exit = psched_net_exit,
1966};
1967
1da177e4
LT
1968static int __init pktsched_init(void)
1969{
7316ae88
TG
1970 int err;
1971
1972 err = register_pernet_subsys(&psched_net_ops);
1973 if (err) {
cc7ec456 1974 pr_err("pktsched_init: "
7316ae88
TG
1975 "cannot initialize per netns operations\n");
1976 return err;
1977 }
1978
6da7c8fc 1979 register_qdisc(&pfifo_fast_ops);
1da177e4
LT
1980 register_qdisc(&pfifo_qdisc_ops);
1981 register_qdisc(&bfifo_qdisc_ops);
57dbb2d8 1982 register_qdisc(&pfifo_head_drop_qdisc_ops);
6ec1c69a 1983 register_qdisc(&mq_qdisc_ops);
d66d6c31 1984 register_qdisc(&noqueue_qdisc_ops);
1da177e4 1985
c7ac8679
GR
1986 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
1987 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
5a7a5555
JHS
1988 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
1989 NULL);
c7ac8679
GR
1990 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
1991 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
5a7a5555
JHS
1992 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
1993 NULL);
be577ddc 1994
1da177e4
LT
1995 return 0;
1996}
1997
1998subsys_initcall(pktsched_init);