]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/sched/sch_api.c
igb: do not allow phy sw reset code to make calls to null pointers
[mirror_ubuntu-artful-kernel.git] / net / sched / sch_api.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
1da177e4
LT
18#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
1da177e4 21#include <linux/string.h>
1da177e4 22#include <linux/errno.h>
1da177e4 23#include <linux/skbuff.h>
1da177e4
LT
24#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
4179477f 29#include <linux/hrtimer.h>
25bfcd5a 30#include <linux/lockdep.h>
1da177e4 31
457c4cbc 32#include <net/net_namespace.h>
b854272b 33#include <net/sock.h>
dc5fc579 34#include <net/netlink.h>
1da177e4
LT
35#include <net/pkt_sched.h>
36
1da177e4
LT
37static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
38 struct Qdisc *old, struct Qdisc *new);
39static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
40 struct Qdisc *q, unsigned long cl, int event);
41
42/*
43
44 Short review.
45 -------------
46
47 This file consists of two interrelated parts:
48
49 1. queueing disciplines manager frontend.
50 2. traffic classes manager frontend.
51
52 Generally, queueing discipline ("qdisc") is a black box,
53 which is able to enqueue packets and to dequeue them (when
54 device is ready to send something) in order and at times
55 determined by algorithm hidden in it.
56
57 qdisc's are divided to two categories:
58 - "queues", which have no internal structure visible from outside.
59 - "schedulers", which split all the packets to "traffic classes",
60 using "packet classifiers" (look at cls_api.c)
61
62 In turn, classes may have child qdiscs (as rule, queues)
63 attached to them etc. etc. etc.
64
65 The goal of the routines in this file is to translate
66 information supplied by user in the form of handles
67 to more intelligible for kernel form, to make some sanity
68 checks and part of work, which is common to all qdiscs
69 and to provide rtnetlink notifications.
70
71 All real intelligent work is done inside qdisc modules.
72
73
74
75 Every discipline has two major routines: enqueue and dequeue.
76
77 ---dequeue
78
79 dequeue usually returns a skb to send. It is allowed to return NULL,
80 but it does not mean that queue is empty, it just means that
81 discipline does not want to send anything this time.
82 Queue is really empty if q->q.qlen == 0.
83 For complicated disciplines with multiple queues q->q is not
84 real packet queue, but however q->q.qlen must be valid.
85
86 ---enqueue
87
88 enqueue returns 0, if packet was enqueued successfully.
89 If packet (this one or another one) was dropped, it returns
90 not zero error code.
91 NET_XMIT_DROP - this packet dropped
92 Expected action: do not backoff, but wait until queue will clear.
93 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
94 Expected action: backoff or ignore
95 NET_XMIT_POLICED - dropped by police.
96 Expected action: backoff or error to real-time apps.
97
98 Auxiliary routines:
99
99c0db26
JP
100 ---peek
101
102 like dequeue but without removing a packet from the queue
103
1da177e4
LT
104 ---reset
105
106 returns qdisc to initial state: purge all buffers, clear all
107 timers, counters (except for statistics) etc.
108
109 ---init
110
111 initializes newly created qdisc.
112
113 ---destroy
114
115 destroys resources allocated by init and during lifetime of qdisc.
116
117 ---change
118
119 changes qdisc parameters.
120 */
121
122/* Protects list of registered TC modules. It is pure SMP lock. */
123static DEFINE_RWLOCK(qdisc_mod_lock);
124
125
126/************************************************
127 * Queueing disciplines manipulation. *
128 ************************************************/
129
130
131/* The list of all installed queueing disciplines. */
132
133static struct Qdisc_ops *qdisc_base;
134
135/* Register/uregister queueing discipline */
136
137int register_qdisc(struct Qdisc_ops *qops)
138{
139 struct Qdisc_ops *q, **qp;
140 int rc = -EEXIST;
141
142 write_lock(&qdisc_mod_lock);
143 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
144 if (!strcmp(qops->id, q->id))
145 goto out;
146
147 if (qops->enqueue == NULL)
148 qops->enqueue = noop_qdisc_ops.enqueue;
99c0db26
JP
149 if (qops->peek == NULL) {
150 if (qops->dequeue == NULL) {
151 qops->peek = noop_qdisc_ops.peek;
152 } else {
153 rc = -EINVAL;
154 goto out;
155 }
156 }
1da177e4
LT
157 if (qops->dequeue == NULL)
158 qops->dequeue = noop_qdisc_ops.dequeue;
159
160 qops->next = NULL;
161 *qp = qops;
162 rc = 0;
163out:
164 write_unlock(&qdisc_mod_lock);
165 return rc;
166}
62e3ba1b 167EXPORT_SYMBOL(register_qdisc);
1da177e4
LT
168
169int unregister_qdisc(struct Qdisc_ops *qops)
170{
171 struct Qdisc_ops *q, **qp;
172 int err = -ENOENT;
173
174 write_lock(&qdisc_mod_lock);
175 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
176 if (q == qops)
177 break;
178 if (q) {
179 *qp = q->next;
180 q->next = NULL;
181 err = 0;
182 }
183 write_unlock(&qdisc_mod_lock);
184 return err;
185}
62e3ba1b 186EXPORT_SYMBOL(unregister_qdisc);
1da177e4
LT
187
188/* We know handle. Find qdisc among all qdisc's attached to device
189 (root qdisc, all its children, children of children etc.)
190 */
191
6113b748 192static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
8123b421
DM
193{
194 struct Qdisc *q;
195
196 if (!(root->flags & TCQ_F_BUILTIN) &&
197 root->handle == handle)
198 return root;
199
200 list_for_each_entry(q, &root->list, list) {
201 if (q->handle == handle)
202 return q;
203 }
204 return NULL;
205}
206
f6e0b239
JP
207static void qdisc_list_add(struct Qdisc *q)
208{
f6486d40 209 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
af356afa 210 list_add_tail(&q->list, &qdisc_dev(q)->qdisc->list);
f6e0b239
JP
211}
212
213void qdisc_list_del(struct Qdisc *q)
214{
f6486d40 215 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
f6e0b239 216 list_del(&q->list);
f6e0b239
JP
217}
218EXPORT_SYMBOL(qdisc_list_del);
219
ead81cc5 220struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
1da177e4 221{
f6e0b239
JP
222 struct Qdisc *q;
223
af356afa
PM
224 q = qdisc_match_from_root(dev->qdisc, handle);
225 if (q)
226 goto out;
f6e0b239
JP
227
228 q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
f6486d40 229out:
f6e0b239 230 return q;
1da177e4
LT
231}
232
233static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
234{
235 unsigned long cl;
236 struct Qdisc *leaf;
20fea08b 237 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
1da177e4
LT
238
239 if (cops == NULL)
240 return NULL;
241 cl = cops->get(p, classid);
242
243 if (cl == 0)
244 return NULL;
245 leaf = cops->leaf(p, cl);
246 cops->put(p, cl);
247 return leaf;
248}
249
250/* Find queueing discipline by name */
251
1e90474c 252static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
1da177e4
LT
253{
254 struct Qdisc_ops *q = NULL;
255
256 if (kind) {
257 read_lock(&qdisc_mod_lock);
258 for (q = qdisc_base; q; q = q->next) {
1e90474c 259 if (nla_strcmp(kind, q->id) == 0) {
1da177e4
LT
260 if (!try_module_get(q->owner))
261 q = NULL;
262 break;
263 }
264 }
265 read_unlock(&qdisc_mod_lock);
266 }
267 return q;
268}
269
270static struct qdisc_rate_table *qdisc_rtab_list;
271
1e90474c 272struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
1da177e4
LT
273{
274 struct qdisc_rate_table *rtab;
275
276 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
277 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
278 rtab->refcnt++;
279 return rtab;
280 }
281 }
282
5feb5e1a
PM
283 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
284 nla_len(tab) != TC_RTAB_SIZE)
1da177e4
LT
285 return NULL;
286
287 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
288 if (rtab) {
289 rtab->rate = *r;
290 rtab->refcnt = 1;
1e90474c 291 memcpy(rtab->data, nla_data(tab), 1024);
1da177e4
LT
292 rtab->next = qdisc_rtab_list;
293 qdisc_rtab_list = rtab;
294 }
295 return rtab;
296}
62e3ba1b 297EXPORT_SYMBOL(qdisc_get_rtab);
1da177e4
LT
298
299void qdisc_put_rtab(struct qdisc_rate_table *tab)
300{
301 struct qdisc_rate_table *rtab, **rtabp;
302
303 if (!tab || --tab->refcnt)
304 return;
305
306 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
307 if (rtab == tab) {
308 *rtabp = rtab->next;
309 kfree(rtab);
310 return;
311 }
312 }
313}
62e3ba1b 314EXPORT_SYMBOL(qdisc_put_rtab);
1da177e4 315
175f9c1b
JK
316static LIST_HEAD(qdisc_stab_list);
317static DEFINE_SPINLOCK(qdisc_stab_lock);
318
319static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
320 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
321 [TCA_STAB_DATA] = { .type = NLA_BINARY },
322};
323
324static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
325{
326 struct nlattr *tb[TCA_STAB_MAX + 1];
327 struct qdisc_size_table *stab;
328 struct tc_sizespec *s;
329 unsigned int tsize = 0;
330 u16 *tab = NULL;
331 int err;
332
333 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
334 if (err < 0)
335 return ERR_PTR(err);
336 if (!tb[TCA_STAB_BASE])
337 return ERR_PTR(-EINVAL);
338
339 s = nla_data(tb[TCA_STAB_BASE]);
340
341 if (s->tsize > 0) {
342 if (!tb[TCA_STAB_DATA])
343 return ERR_PTR(-EINVAL);
344 tab = nla_data(tb[TCA_STAB_DATA]);
345 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
346 }
347
348 if (!s || tsize != s->tsize || (!tab && tsize > 0))
349 return ERR_PTR(-EINVAL);
350
f3b9605d 351 spin_lock(&qdisc_stab_lock);
175f9c1b
JK
352
353 list_for_each_entry(stab, &qdisc_stab_list, list) {
354 if (memcmp(&stab->szopts, s, sizeof(*s)))
355 continue;
356 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
357 continue;
358 stab->refcnt++;
f3b9605d 359 spin_unlock(&qdisc_stab_lock);
175f9c1b
JK
360 return stab;
361 }
362
f3b9605d 363 spin_unlock(&qdisc_stab_lock);
175f9c1b
JK
364
365 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
366 if (!stab)
367 return ERR_PTR(-ENOMEM);
368
369 stab->refcnt = 1;
370 stab->szopts = *s;
371 if (tsize > 0)
372 memcpy(stab->data, tab, tsize * sizeof(u16));
373
f3b9605d 374 spin_lock(&qdisc_stab_lock);
175f9c1b 375 list_add_tail(&stab->list, &qdisc_stab_list);
f3b9605d 376 spin_unlock(&qdisc_stab_lock);
175f9c1b
JK
377
378 return stab;
379}
380
381void qdisc_put_stab(struct qdisc_size_table *tab)
382{
383 if (!tab)
384 return;
385
f3b9605d 386 spin_lock(&qdisc_stab_lock);
175f9c1b
JK
387
388 if (--tab->refcnt == 0) {
389 list_del(&tab->list);
390 kfree(tab);
391 }
392
f3b9605d 393 spin_unlock(&qdisc_stab_lock);
175f9c1b
JK
394}
395EXPORT_SYMBOL(qdisc_put_stab);
396
397static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
398{
399 struct nlattr *nest;
400
401 nest = nla_nest_start(skb, TCA_STAB);
3aa4614d
PM
402 if (nest == NULL)
403 goto nla_put_failure;
175f9c1b
JK
404 NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts);
405 nla_nest_end(skb, nest);
406
407 return skb->len;
408
409nla_put_failure:
410 return -1;
411}
412
413void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
414{
415 int pkt_len, slot;
416
417 pkt_len = skb->len + stab->szopts.overhead;
418 if (unlikely(!stab->szopts.tsize))
419 goto out;
420
421 slot = pkt_len + stab->szopts.cell_align;
422 if (unlikely(slot < 0))
423 slot = 0;
424
425 slot >>= stab->szopts.cell_log;
426 if (likely(slot < stab->szopts.tsize))
427 pkt_len = stab->data[slot];
428 else
429 pkt_len = stab->data[stab->szopts.tsize - 1] *
430 (slot / stab->szopts.tsize) +
431 stab->data[slot % stab->szopts.tsize];
432
433 pkt_len <<= stab->szopts.size_log;
434out:
435 if (unlikely(pkt_len < 1))
436 pkt_len = 1;
437 qdisc_skb_cb(skb)->pkt_len = pkt_len;
438}
439EXPORT_SYMBOL(qdisc_calculate_pkt_len);
440
b00355db
JP
441void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
442{
443 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
444 printk(KERN_WARNING
445 "%s: %s qdisc %X: is non-work-conserving?\n",
446 txt, qdisc->ops->id, qdisc->handle >> 16);
447 qdisc->flags |= TCQ_F_WARN_NONWC;
448 }
449}
450EXPORT_SYMBOL(qdisc_warn_nonwc);
451
4179477f
PM
452static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
453{
454 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
2fbd3da3 455 timer);
4179477f
PM
456
457 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
8608db03 458 __netif_schedule(qdisc_root(wd->qdisc));
1936502d 459
4179477f
PM
460 return HRTIMER_NORESTART;
461}
462
463void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
464{
2fbd3da3
DM
465 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
466 wd->timer.function = qdisc_watchdog;
4179477f
PM
467 wd->qdisc = qdisc;
468}
469EXPORT_SYMBOL(qdisc_watchdog_init);
470
471void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
472{
473 ktime_t time;
474
2540e051
JP
475 if (test_bit(__QDISC_STATE_DEACTIVATED,
476 &qdisc_root_sleeping(wd->qdisc)->state))
477 return;
478
4179477f
PM
479 wd->qdisc->flags |= TCQ_F_THROTTLED;
480 time = ktime_set(0, 0);
ca44d6e6 481 time = ktime_add_ns(time, PSCHED_TICKS2NS(expires));
2fbd3da3 482 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
4179477f
PM
483}
484EXPORT_SYMBOL(qdisc_watchdog_schedule);
485
486void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
487{
2fbd3da3 488 hrtimer_cancel(&wd->timer);
4179477f
PM
489 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
490}
491EXPORT_SYMBOL(qdisc_watchdog_cancel);
1da177e4 492
a94f779f 493static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
6fe1c7a5
PM
494{
495 unsigned int size = n * sizeof(struct hlist_head), i;
496 struct hlist_head *h;
497
498 if (size <= PAGE_SIZE)
499 h = kmalloc(size, GFP_KERNEL);
500 else
501 h = (struct hlist_head *)
502 __get_free_pages(GFP_KERNEL, get_order(size));
503
504 if (h != NULL) {
505 for (i = 0; i < n; i++)
506 INIT_HLIST_HEAD(&h[i]);
507 }
508 return h;
509}
510
511static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
512{
513 unsigned int size = n * sizeof(struct hlist_head);
514
515 if (size <= PAGE_SIZE)
516 kfree(h);
517 else
518 free_pages((unsigned long)h, get_order(size));
519}
520
521void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
522{
523 struct Qdisc_class_common *cl;
524 struct hlist_node *n, *next;
525 struct hlist_head *nhash, *ohash;
526 unsigned int nsize, nmask, osize;
527 unsigned int i, h;
528
529 /* Rehash when load factor exceeds 0.75 */
530 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
531 return;
532 nsize = clhash->hashsize * 2;
533 nmask = nsize - 1;
534 nhash = qdisc_class_hash_alloc(nsize);
535 if (nhash == NULL)
536 return;
537
538 ohash = clhash->hash;
539 osize = clhash->hashsize;
540
541 sch_tree_lock(sch);
542 for (i = 0; i < osize; i++) {
543 hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
544 h = qdisc_class_hash(cl->classid, nmask);
545 hlist_add_head(&cl->hnode, &nhash[h]);
546 }
547 }
548 clhash->hash = nhash;
549 clhash->hashsize = nsize;
550 clhash->hashmask = nmask;
551 sch_tree_unlock(sch);
552
553 qdisc_class_hash_free(ohash, osize);
554}
555EXPORT_SYMBOL(qdisc_class_hash_grow);
556
557int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
558{
559 unsigned int size = 4;
560
561 clhash->hash = qdisc_class_hash_alloc(size);
562 if (clhash->hash == NULL)
563 return -ENOMEM;
564 clhash->hashsize = size;
565 clhash->hashmask = size - 1;
566 clhash->hashelems = 0;
567 return 0;
568}
569EXPORT_SYMBOL(qdisc_class_hash_init);
570
571void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
572{
573 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
574}
575EXPORT_SYMBOL(qdisc_class_hash_destroy);
576
577void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
578 struct Qdisc_class_common *cl)
579{
580 unsigned int h;
581
582 INIT_HLIST_NODE(&cl->hnode);
583 h = qdisc_class_hash(cl->classid, clhash->hashmask);
584 hlist_add_head(&cl->hnode, &clhash->hash[h]);
585 clhash->hashelems++;
586}
587EXPORT_SYMBOL(qdisc_class_hash_insert);
588
589void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
590 struct Qdisc_class_common *cl)
591{
592 hlist_del(&cl->hnode);
593 clhash->hashelems--;
594}
595EXPORT_SYMBOL(qdisc_class_hash_remove);
596
1da177e4
LT
597/* Allocate an unique handle from space managed by kernel */
598
599static u32 qdisc_alloc_handle(struct net_device *dev)
600{
601 int i = 0x10000;
602 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
603
604 do {
605 autohandle += TC_H_MAKE(0x10000U, 0);
606 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
607 autohandle = TC_H_MAKE(0x80000000U, 0);
608 } while (qdisc_lookup(dev, autohandle) && --i > 0);
609
610 return i>0 ? autohandle : 0;
611}
612
43effa1e
PM
613void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
614{
20fea08b 615 const struct Qdisc_class_ops *cops;
43effa1e
PM
616 unsigned long cl;
617 u32 parentid;
618
619 if (n == 0)
620 return;
621 while ((parentid = sch->parent)) {
066a3b5b
JP
622 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
623 return;
624
5ce2d488 625 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
ffc8fefa
PM
626 if (sch == NULL) {
627 WARN_ON(parentid != TC_H_ROOT);
628 return;
629 }
43effa1e
PM
630 cops = sch->ops->cl_ops;
631 if (cops->qlen_notify) {
632 cl = cops->get(sch, parentid);
633 cops->qlen_notify(sch, cl);
634 cops->put(sch, cl);
635 }
636 sch->q.qlen -= n;
637 }
638}
639EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
1da177e4 640
99194cff
DM
641static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
642 struct Qdisc *old, struct Qdisc *new)
643{
644 if (new || old)
645 qdisc_notify(skb, n, clid, old, new);
1da177e4 646
4d8863a2 647 if (old)
99194cff 648 qdisc_destroy(old);
99194cff
DM
649}
650
651/* Graft qdisc "new" to class "classid" of qdisc "parent" or
652 * to device "dev".
653 *
654 * When appropriate send a netlink notification using 'skb'
655 * and "n".
656 *
657 * On success, destroy old qdisc.
1da177e4
LT
658 */
659
660static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
99194cff
DM
661 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
662 struct Qdisc *new, struct Qdisc *old)
1da177e4 663{
99194cff 664 struct Qdisc *q = old;
1da177e4 665 int err = 0;
1da177e4 666
10297b99 667 if (parent == NULL) {
99194cff
DM
668 unsigned int i, num_q, ingress;
669
670 ingress = 0;
671 num_q = dev->num_tx_queues;
8d50b53d
DM
672 if ((q && q->flags & TCQ_F_INGRESS) ||
673 (new && new->flags & TCQ_F_INGRESS)) {
99194cff
DM
674 num_q = 1;
675 ingress = 1;
676 }
677
678 if (dev->flags & IFF_UP)
679 dev_deactivate(dev);
680
6ec1c69a
DM
681 if (new && new->ops->attach) {
682 new->ops->attach(new);
683 num_q = 0;
684 }
685
99194cff
DM
686 for (i = 0; i < num_q; i++) {
687 struct netdev_queue *dev_queue = &dev->rx_queue;
688
689 if (!ingress)
690 dev_queue = netdev_get_tx_queue(dev, i);
691
8d50b53d
DM
692 old = dev_graft_qdisc(dev_queue, new);
693 if (new && i > 0)
694 atomic_inc(&new->refcnt);
695
af356afa 696 qdisc_destroy(old);
1da177e4 697 }
99194cff 698
af356afa 699 notify_and_destroy(skb, n, classid, dev->qdisc, new);
6ec1c69a 700 if (new && !new->ops->attach)
af356afa
PM
701 atomic_inc(&new->refcnt);
702 dev->qdisc = new ? : &noop_qdisc;
703
99194cff
DM
704 if (dev->flags & IFF_UP)
705 dev_activate(dev);
1da177e4 706 } else {
20fea08b 707 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1da177e4 708
c9f1d038
PM
709 err = -EOPNOTSUPP;
710 if (cops && cops->graft) {
1da177e4
LT
711 unsigned long cl = cops->get(parent, classid);
712 if (cl) {
99194cff 713 err = cops->graft(parent, cl, new, &old);
1da177e4 714 cops->put(parent, cl);
c9f1d038
PM
715 } else
716 err = -ENOENT;
1da177e4 717 }
99194cff
DM
718 if (!err)
719 notify_and_destroy(skb, n, classid, old, new);
1da177e4
LT
720 }
721 return err;
722}
723
25bfcd5a
JP
724/* lockdep annotation is needed for ingress; egress gets it only for name */
725static struct lock_class_key qdisc_tx_lock;
726static struct lock_class_key qdisc_rx_lock;
727
1da177e4
LT
728/*
729 Allocate and initialize new qdisc.
730
731 Parameters are passed via opt.
732 */
733
734static struct Qdisc *
bb949fbd 735qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
23bcf634
PM
736 struct Qdisc *p, u32 parent, u32 handle,
737 struct nlattr **tca, int *errp)
1da177e4
LT
738{
739 int err;
1e90474c 740 struct nlattr *kind = tca[TCA_KIND];
1da177e4
LT
741 struct Qdisc *sch;
742 struct Qdisc_ops *ops;
175f9c1b 743 struct qdisc_size_table *stab;
1da177e4
LT
744
745 ops = qdisc_lookup_ops(kind);
95a5afca 746#ifdef CONFIG_MODULES
1da177e4
LT
747 if (ops == NULL && kind != NULL) {
748 char name[IFNAMSIZ];
1e90474c 749 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
1da177e4
LT
750 /* We dropped the RTNL semaphore in order to
751 * perform the module load. So, even if we
752 * succeeded in loading the module we have to
753 * tell the caller to replay the request. We
754 * indicate this using -EAGAIN.
755 * We replay the request because the device may
756 * go away in the mean time.
757 */
758 rtnl_unlock();
759 request_module("sch_%s", name);
760 rtnl_lock();
761 ops = qdisc_lookup_ops(kind);
762 if (ops != NULL) {
763 /* We will try again qdisc_lookup_ops,
764 * so don't keep a reference.
765 */
766 module_put(ops->owner);
767 err = -EAGAIN;
768 goto err_out;
769 }
770 }
771 }
772#endif
773
b9e2cc0f 774 err = -ENOENT;
1da177e4
LT
775 if (ops == NULL)
776 goto err_out;
777
5ce2d488 778 sch = qdisc_alloc(dev_queue, ops);
3d54b82f
TG
779 if (IS_ERR(sch)) {
780 err = PTR_ERR(sch);
1da177e4 781 goto err_out2;
3d54b82f 782 }
1da177e4 783
ffc8fefa
PM
784 sch->parent = parent;
785
3d54b82f 786 if (handle == TC_H_INGRESS) {
1da177e4 787 sch->flags |= TCQ_F_INGRESS;
3d54b82f 788 handle = TC_H_MAKE(TC_H_INGRESS, 0);
25bfcd5a 789 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
fd44de7c 790 } else {
fd44de7c
PM
791 if (handle == 0) {
792 handle = qdisc_alloc_handle(dev);
793 err = -ENOMEM;
794 if (handle == 0)
795 goto err_out3;
796 }
25bfcd5a 797 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
1da177e4
LT
798 }
799
3d54b82f 800 sch->handle = handle;
1da177e4 801
1e90474c 802 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
175f9c1b
JK
803 if (tca[TCA_STAB]) {
804 stab = qdisc_get_stab(tca[TCA_STAB]);
805 if (IS_ERR(stab)) {
806 err = PTR_ERR(stab);
807 goto err_out3;
808 }
809 sch->stab = stab;
810 }
1e90474c 811 if (tca[TCA_RATE]) {
f6f9b93f
JP
812 spinlock_t *root_lock;
813
23bcf634
PM
814 err = -EOPNOTSUPP;
815 if (sch->flags & TCQ_F_MQROOT)
816 goto err_out4;
817
f6f9b93f 818 if ((sch->parent != TC_H_ROOT) &&
23bcf634
PM
819 !(sch->flags & TCQ_F_INGRESS) &&
820 (!p || !(p->flags & TCQ_F_MQROOT)))
f6f9b93f
JP
821 root_lock = qdisc_root_sleeping_lock(sch);
822 else
823 root_lock = qdisc_lock(sch);
824
023e09a7 825 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
f6f9b93f 826 root_lock, tca[TCA_RATE]);
23bcf634
PM
827 if (err)
828 goto err_out4;
023e09a7 829 }
f6e0b239
JP
830
831 qdisc_list_add(sch);
1da177e4 832
1da177e4
LT
833 return sch;
834 }
835err_out3:
175f9c1b 836 qdisc_put_stab(sch->stab);
1da177e4 837 dev_put(dev);
3d54b82f 838 kfree((char *) sch - sch->padded);
1da177e4
LT
839err_out2:
840 module_put(ops->owner);
841err_out:
842 *errp = err;
1da177e4 843 return NULL;
23bcf634
PM
844
845err_out4:
846 /*
847 * Any broken qdiscs that would require a ops->reset() here?
848 * The qdisc was never in action so it shouldn't be necessary.
849 */
850 if (ops->destroy)
851 ops->destroy(sch);
852 goto err_out3;
1da177e4
LT
853}
854
1e90474c 855static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
1da177e4 856{
175f9c1b
JK
857 struct qdisc_size_table *stab = NULL;
858 int err = 0;
1da177e4 859
175f9c1b 860 if (tca[TCA_OPTIONS]) {
1da177e4
LT
861 if (sch->ops->change == NULL)
862 return -EINVAL;
1e90474c 863 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
1da177e4
LT
864 if (err)
865 return err;
866 }
175f9c1b
JK
867
868 if (tca[TCA_STAB]) {
869 stab = qdisc_get_stab(tca[TCA_STAB]);
870 if (IS_ERR(stab))
871 return PTR_ERR(stab);
872 }
873
874 qdisc_put_stab(sch->stab);
875 sch->stab = stab;
876
23bcf634 877 if (tca[TCA_RATE]) {
71bcb09a
SH
878 /* NB: ignores errors from replace_estimator
879 because change can't be undone. */
23bcf634
PM
880 if (sch->flags & TCQ_F_MQROOT)
881 goto out;
1da177e4 882 gen_replace_estimator(&sch->bstats, &sch->rate_est,
71bcb09a
SH
883 qdisc_root_sleeping_lock(sch),
884 tca[TCA_RATE]);
23bcf634
PM
885 }
886out:
1da177e4
LT
887 return 0;
888}
889
890struct check_loop_arg
891{
892 struct qdisc_walker w;
893 struct Qdisc *p;
894 int depth;
895};
896
897static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
898
899static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
900{
901 struct check_loop_arg arg;
902
903 if (q->ops->cl_ops == NULL)
904 return 0;
905
906 arg.w.stop = arg.w.skip = arg.w.count = 0;
907 arg.w.fn = check_loop_fn;
908 arg.depth = depth;
909 arg.p = p;
910 q->ops->cl_ops->walk(q, &arg.w);
911 return arg.w.stop ? -ELOOP : 0;
912}
913
914static int
915check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
916{
917 struct Qdisc *leaf;
20fea08b 918 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1da177e4
LT
919 struct check_loop_arg *arg = (struct check_loop_arg *)w;
920
921 leaf = cops->leaf(q, cl);
922 if (leaf) {
923 if (leaf == arg->p || arg->depth > 7)
924 return -ELOOP;
925 return check_loop(leaf, arg->p, arg->depth + 1);
926 }
927 return 0;
928}
929
930/*
931 * Delete/get qdisc.
932 */
933
934static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
935{
3b1e0a65 936 struct net *net = sock_net(skb->sk);
1da177e4 937 struct tcmsg *tcm = NLMSG_DATA(n);
1e90474c 938 struct nlattr *tca[TCA_MAX + 1];
1da177e4
LT
939 struct net_device *dev;
940 u32 clid = tcm->tcm_parent;
941 struct Qdisc *q = NULL;
942 struct Qdisc *p = NULL;
943 int err;
944
b854272b
DL
945 if (net != &init_net)
946 return -EINVAL;
947
881d966b 948 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1da177e4
LT
949 return -ENODEV;
950
1e90474c
PM
951 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
952 if (err < 0)
953 return err;
954
1da177e4
LT
955 if (clid) {
956 if (clid != TC_H_ROOT) {
957 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
958 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
959 return -ENOENT;
960 q = qdisc_leaf(p, clid);
961 } else { /* ingress */
8123b421 962 q = dev->rx_queue.qdisc_sleeping;
10297b99 963 }
1da177e4 964 } else {
af356afa 965 q = dev->qdisc;
1da177e4
LT
966 }
967 if (!q)
968 return -ENOENT;
969
970 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
971 return -EINVAL;
972 } else {
973 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
974 return -ENOENT;
975 }
976
1e90474c 977 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1da177e4
LT
978 return -EINVAL;
979
980 if (n->nlmsg_type == RTM_DELQDISC) {
981 if (!clid)
982 return -EINVAL;
983 if (q->handle == 0)
984 return -ENOENT;
99194cff 985 if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
1da177e4 986 return err;
1da177e4
LT
987 } else {
988 qdisc_notify(skb, n, clid, NULL, q);
989 }
990 return 0;
991}
992
993/*
994 Create/change qdisc.
995 */
996
997static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
998{
3b1e0a65 999 struct net *net = sock_net(skb->sk);
1da177e4 1000 struct tcmsg *tcm;
1e90474c 1001 struct nlattr *tca[TCA_MAX + 1];
1da177e4
LT
1002 struct net_device *dev;
1003 u32 clid;
1004 struct Qdisc *q, *p;
1005 int err;
1006
b854272b
DL
1007 if (net != &init_net)
1008 return -EINVAL;
1009
1da177e4
LT
1010replay:
1011 /* Reinit, just in case something touches this. */
1012 tcm = NLMSG_DATA(n);
1da177e4
LT
1013 clid = tcm->tcm_parent;
1014 q = p = NULL;
1015
881d966b 1016 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1da177e4
LT
1017 return -ENODEV;
1018
1e90474c
PM
1019 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1020 if (err < 0)
1021 return err;
1022
1da177e4
LT
1023 if (clid) {
1024 if (clid != TC_H_ROOT) {
1025 if (clid != TC_H_INGRESS) {
1026 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
1027 return -ENOENT;
1028 q = qdisc_leaf(p, clid);
1029 } else { /*ingress */
8123b421 1030 q = dev->rx_queue.qdisc_sleeping;
1da177e4
LT
1031 }
1032 } else {
af356afa 1033 q = dev->qdisc;
1da177e4
LT
1034 }
1035
1036 /* It may be default qdisc, ignore it */
1037 if (q && q->handle == 0)
1038 q = NULL;
1039
1040 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1041 if (tcm->tcm_handle) {
1042 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
1043 return -EEXIST;
1044 if (TC_H_MIN(tcm->tcm_handle))
1045 return -EINVAL;
1046 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
1047 goto create_n_graft;
1048 if (n->nlmsg_flags&NLM_F_EXCL)
1049 return -EEXIST;
1e90474c 1050 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1da177e4
LT
1051 return -EINVAL;
1052 if (q == p ||
1053 (p && check_loop(q, p, 0)))
1054 return -ELOOP;
1055 atomic_inc(&q->refcnt);
1056 goto graft;
1057 } else {
1058 if (q == NULL)
1059 goto create_n_graft;
1060
1061 /* This magic test requires explanation.
1062 *
1063 * We know, that some child q is already
1064 * attached to this parent and have choice:
1065 * either to change it or to create/graft new one.
1066 *
1067 * 1. We are allowed to create/graft only
1068 * if CREATE and REPLACE flags are set.
1069 *
1070 * 2. If EXCL is set, requestor wanted to say,
1071 * that qdisc tcm_handle is not expected
1072 * to exist, so that we choose create/graft too.
1073 *
1074 * 3. The last case is when no flags are set.
1075 * Alas, it is sort of hole in API, we
1076 * cannot decide what to do unambiguously.
1077 * For now we select create/graft, if
1078 * user gave KIND, which does not match existing.
1079 */
1080 if ((n->nlmsg_flags&NLM_F_CREATE) &&
1081 (n->nlmsg_flags&NLM_F_REPLACE) &&
1082 ((n->nlmsg_flags&NLM_F_EXCL) ||
1e90474c
PM
1083 (tca[TCA_KIND] &&
1084 nla_strcmp(tca[TCA_KIND], q->ops->id))))
1da177e4
LT
1085 goto create_n_graft;
1086 }
1087 }
1088 } else {
1089 if (!tcm->tcm_handle)
1090 return -EINVAL;
1091 q = qdisc_lookup(dev, tcm->tcm_handle);
1092 }
1093
1094 /* Change qdisc parameters */
1095 if (q == NULL)
1096 return -ENOENT;
1097 if (n->nlmsg_flags&NLM_F_EXCL)
1098 return -EEXIST;
1e90474c 1099 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1da177e4
LT
1100 return -EINVAL;
1101 err = qdisc_change(q, tca);
1102 if (err == 0)
1103 qdisc_notify(skb, n, clid, NULL, q);
1104 return err;
1105
1106create_n_graft:
1107 if (!(n->nlmsg_flags&NLM_F_CREATE))
1108 return -ENOENT;
1109 if (clid == TC_H_INGRESS)
23bcf634 1110 q = qdisc_create(dev, &dev->rx_queue, p,
bb949fbd 1111 tcm->tcm_parent, tcm->tcm_parent,
ffc8fefa 1112 tca, &err);
6ec1c69a
DM
1113 else {
1114 unsigned int ntx = 0;
1115
1116 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1117 ntx = p->ops->cl_ops->select_queue(p, tcm);
1118
23bcf634 1119 q = qdisc_create(dev, netdev_get_tx_queue(dev, ntx), p,
bb949fbd 1120 tcm->tcm_parent, tcm->tcm_handle,
ffc8fefa 1121 tca, &err);
6ec1c69a 1122 }
1da177e4
LT
1123 if (q == NULL) {
1124 if (err == -EAGAIN)
1125 goto replay;
1126 return err;
1127 }
1128
1129graft:
e5befbd9
IJ
1130 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1131 if (err) {
1132 if (q)
1133 qdisc_destroy(q);
1134 return err;
1da177e4 1135 }
e5befbd9 1136
1da177e4
LT
1137 return 0;
1138}
1139
1140static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
e431b8c0 1141 u32 pid, u32 seq, u16 flags, int event)
1da177e4
LT
1142{
1143 struct tcmsg *tcm;
1144 struct nlmsghdr *nlh;
27a884dc 1145 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1146 struct gnet_dump d;
1147
e431b8c0 1148 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1da177e4
LT
1149 tcm = NLMSG_DATA(nlh);
1150 tcm->tcm_family = AF_UNSPEC;
9ef1d4c7
PM
1151 tcm->tcm__pad1 = 0;
1152 tcm->tcm__pad2 = 0;
5ce2d488 1153 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1da177e4
LT
1154 tcm->tcm_parent = clid;
1155 tcm->tcm_handle = q->handle;
1156 tcm->tcm_info = atomic_read(&q->refcnt);
57e1c487 1157 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
1da177e4 1158 if (q->ops->dump && q->ops->dump(q, skb) < 0)
1e90474c 1159 goto nla_put_failure;
1da177e4
LT
1160 q->qstats.qlen = q->q.qlen;
1161
175f9c1b
JK
1162 if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
1163 goto nla_put_failure;
1164
102396ae
JP
1165 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1166 qdisc_root_sleeping_lock(q), &d) < 0)
1e90474c 1167 goto nla_put_failure;
1da177e4
LT
1168
1169 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
1e90474c 1170 goto nla_put_failure;
1da177e4
LT
1171
1172 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
1da177e4 1173 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
1da177e4 1174 gnet_stats_copy_queue(&d, &q->qstats) < 0)
1e90474c 1175 goto nla_put_failure;
10297b99 1176
1da177e4 1177 if (gnet_stats_finish_copy(&d) < 0)
1e90474c 1178 goto nla_put_failure;
10297b99 1179
27a884dc 1180 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1da177e4
LT
1181 return skb->len;
1182
1183nlmsg_failure:
1e90474c 1184nla_put_failure:
dc5fc579 1185 nlmsg_trim(skb, b);
1da177e4
LT
1186 return -1;
1187}
1188
1189static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1190 u32 clid, struct Qdisc *old, struct Qdisc *new)
1191{
1192 struct sk_buff *skb;
1193 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1194
1195 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1196 if (!skb)
1197 return -ENOBUFS;
1198
1199 if (old && old->handle) {
1200 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
1201 goto err_out;
1202 }
1203 if (new) {
1204 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1205 goto err_out;
1206 }
1207
1208 if (skb->len)
97c53cac 1209 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1da177e4
LT
1210
1211err_out:
1212 kfree_skb(skb);
1213 return -EINVAL;
1214}
1215
30723673
DM
1216static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1217{
1218 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1219}
1220
1221static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1222 struct netlink_callback *cb,
1223 int *q_idx_p, int s_q_idx)
1224{
1225 int ret = 0, q_idx = *q_idx_p;
1226 struct Qdisc *q;
1227
1228 if (!root)
1229 return 0;
1230
1231 q = root;
1232 if (q_idx < s_q_idx) {
1233 q_idx++;
1234 } else {
1235 if (!tc_qdisc_dump_ignore(q) &&
1236 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1237 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1238 goto done;
1239 q_idx++;
1240 }
1241 list_for_each_entry(q, &root->list, list) {
1242 if (q_idx < s_q_idx) {
1243 q_idx++;
1244 continue;
1245 }
1246 if (!tc_qdisc_dump_ignore(q) &&
1247 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1248 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1249 goto done;
1250 q_idx++;
1251 }
1252
1253out:
1254 *q_idx_p = q_idx;
1255 return ret;
1256done:
1257 ret = -1;
1258 goto out;
1259}
1260
1da177e4
LT
1261static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1262{
3b1e0a65 1263 struct net *net = sock_net(skb->sk);
1da177e4
LT
1264 int idx, q_idx;
1265 int s_idx, s_q_idx;
1266 struct net_device *dev;
1da177e4 1267
b854272b
DL
1268 if (net != &init_net)
1269 return 0;
1270
1da177e4
LT
1271 s_idx = cb->args[0];
1272 s_q_idx = q_idx = cb->args[1];
1273 read_lock(&dev_base_lock);
7562f876 1274 idx = 0;
881d966b 1275 for_each_netdev(&init_net, dev) {
30723673
DM
1276 struct netdev_queue *dev_queue;
1277
1da177e4 1278 if (idx < s_idx)
7562f876 1279 goto cont;
1da177e4
LT
1280 if (idx > s_idx)
1281 s_q_idx = 0;
1da177e4 1282 q_idx = 0;
30723673 1283
af356afa 1284 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
30723673
DM
1285 goto done;
1286
1287 dev_queue = &dev->rx_queue;
827ebd64 1288 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
30723673
DM
1289 goto done;
1290
7562f876
PE
1291cont:
1292 idx++;
1da177e4
LT
1293 }
1294
1295done:
1296 read_unlock(&dev_base_lock);
1297
1298 cb->args[0] = idx;
1299 cb->args[1] = q_idx;
1300
1301 return skb->len;
1302}
1303
1304
1305
1306/************************************************
1307 * Traffic classes manipulation. *
1308 ************************************************/
1309
1310
1311
1312static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1313{
3b1e0a65 1314 struct net *net = sock_net(skb->sk);
1da177e4 1315 struct tcmsg *tcm = NLMSG_DATA(n);
1e90474c 1316 struct nlattr *tca[TCA_MAX + 1];
1da177e4
LT
1317 struct net_device *dev;
1318 struct Qdisc *q = NULL;
20fea08b 1319 const struct Qdisc_class_ops *cops;
1da177e4
LT
1320 unsigned long cl = 0;
1321 unsigned long new_cl;
1322 u32 pid = tcm->tcm_parent;
1323 u32 clid = tcm->tcm_handle;
1324 u32 qid = TC_H_MAJ(clid);
1325 int err;
1326
b854272b
DL
1327 if (net != &init_net)
1328 return -EINVAL;
1329
881d966b 1330 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1da177e4
LT
1331 return -ENODEV;
1332
1e90474c
PM
1333 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1334 if (err < 0)
1335 return err;
1336
1da177e4
LT
1337 /*
1338 parent == TC_H_UNSPEC - unspecified parent.
1339 parent == TC_H_ROOT - class is root, which has no parent.
1340 parent == X:0 - parent is root class.
1341 parent == X:Y - parent is a node in hierarchy.
1342 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1343
1344 handle == 0:0 - generate handle from kernel pool.
1345 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1346 handle == X:Y - clear.
1347 handle == X:0 - root class.
1348 */
1349
1350 /* Step 1. Determine qdisc handle X:0 */
1351
1352 if (pid != TC_H_ROOT) {
1353 u32 qid1 = TC_H_MAJ(pid);
1354
1355 if (qid && qid1) {
1356 /* If both majors are known, they must be identical. */
1357 if (qid != qid1)
1358 return -EINVAL;
1359 } else if (qid1) {
1360 qid = qid1;
1361 } else if (qid == 0)
af356afa 1362 qid = dev->qdisc->handle;
1da177e4
LT
1363
1364 /* Now qid is genuine qdisc handle consistent
1365 both with parent and child.
1366
1367 TC_H_MAJ(pid) still may be unspecified, complete it now.
1368 */
1369 if (pid)
1370 pid = TC_H_MAKE(qid, pid);
1371 } else {
1372 if (qid == 0)
af356afa 1373 qid = dev->qdisc->handle;
1da177e4
LT
1374 }
1375
1376 /* OK. Locate qdisc */
10297b99 1377 if ((q = qdisc_lookup(dev, qid)) == NULL)
1da177e4
LT
1378 return -ENOENT;
1379
1380 /* An check that it supports classes */
1381 cops = q->ops->cl_ops;
1382 if (cops == NULL)
1383 return -EINVAL;
1384
1385 /* Now try to get class */
1386 if (clid == 0) {
1387 if (pid == TC_H_ROOT)
1388 clid = qid;
1389 } else
1390 clid = TC_H_MAKE(qid, clid);
1391
1392 if (clid)
1393 cl = cops->get(q, clid);
1394
1395 if (cl == 0) {
1396 err = -ENOENT;
1397 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
1398 goto out;
1399 } else {
1400 switch (n->nlmsg_type) {
10297b99 1401 case RTM_NEWTCLASS:
1da177e4
LT
1402 err = -EEXIST;
1403 if (n->nlmsg_flags&NLM_F_EXCL)
1404 goto out;
1405 break;
1406 case RTM_DELTCLASS:
de6d5cdf
PM
1407 err = -EOPNOTSUPP;
1408 if (cops->delete)
1409 err = cops->delete(q, cl);
1da177e4
LT
1410 if (err == 0)
1411 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
1412 goto out;
1413 case RTM_GETTCLASS:
1414 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
1415 goto out;
1416 default:
1417 err = -EINVAL;
1418 goto out;
1419 }
1420 }
1421
1422 new_cl = cl;
de6d5cdf
PM
1423 err = -EOPNOTSUPP;
1424 if (cops->change)
1425 err = cops->change(q, clid, pid, tca, &new_cl);
1da177e4
LT
1426 if (err == 0)
1427 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
1428
1429out:
1430 if (cl)
1431 cops->put(q, cl);
1432
1433 return err;
1434}
1435
1436
1437static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1438 unsigned long cl,
e431b8c0 1439 u32 pid, u32 seq, u16 flags, int event)
1da177e4
LT
1440{
1441 struct tcmsg *tcm;
1442 struct nlmsghdr *nlh;
27a884dc 1443 unsigned char *b = skb_tail_pointer(skb);
1da177e4 1444 struct gnet_dump d;
20fea08b 1445 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1da177e4 1446
e431b8c0 1447 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1da177e4
LT
1448 tcm = NLMSG_DATA(nlh);
1449 tcm->tcm_family = AF_UNSPEC;
16ebb5e0
ED
1450 tcm->tcm__pad1 = 0;
1451 tcm->tcm__pad2 = 0;
5ce2d488 1452 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1da177e4
LT
1453 tcm->tcm_parent = q->handle;
1454 tcm->tcm_handle = q->handle;
1455 tcm->tcm_info = 0;
57e1c487 1456 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
1da177e4 1457 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1e90474c 1458 goto nla_put_failure;
1da177e4 1459
102396ae
JP
1460 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1461 qdisc_root_sleeping_lock(q), &d) < 0)
1e90474c 1462 goto nla_put_failure;
1da177e4
LT
1463
1464 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1e90474c 1465 goto nla_put_failure;
1da177e4
LT
1466
1467 if (gnet_stats_finish_copy(&d) < 0)
1e90474c 1468 goto nla_put_failure;
1da177e4 1469
27a884dc 1470 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1da177e4
LT
1471 return skb->len;
1472
1473nlmsg_failure:
1e90474c 1474nla_put_failure:
dc5fc579 1475 nlmsg_trim(skb, b);
1da177e4
LT
1476 return -1;
1477}
1478
1479static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1480 struct Qdisc *q, unsigned long cl, int event)
1481{
1482 struct sk_buff *skb;
1483 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1484
1485 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1486 if (!skb)
1487 return -ENOBUFS;
1488
1489 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1490 kfree_skb(skb);
1491 return -EINVAL;
1492 }
1493
97c53cac 1494 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1da177e4
LT
1495}
1496
1497struct qdisc_dump_args
1498{
1499 struct qdisc_walker w;
1500 struct sk_buff *skb;
1501 struct netlink_callback *cb;
1502};
1503
1504static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1505{
1506 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1507
1508 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1509 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1510}
1511
30723673
DM
1512static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1513 struct tcmsg *tcm, struct netlink_callback *cb,
1514 int *t_p, int s_t)
1515{
1516 struct qdisc_dump_args arg;
1517
1518 if (tc_qdisc_dump_ignore(q) ||
1519 *t_p < s_t || !q->ops->cl_ops ||
1520 (tcm->tcm_parent &&
1521 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1522 (*t_p)++;
1523 return 0;
1524 }
1525 if (*t_p > s_t)
1526 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1527 arg.w.fn = qdisc_class_dump;
1528 arg.skb = skb;
1529 arg.cb = cb;
1530 arg.w.stop = 0;
1531 arg.w.skip = cb->args[1];
1532 arg.w.count = 0;
1533 q->ops->cl_ops->walk(q, &arg.w);
1534 cb->args[1] = arg.w.count;
1535 if (arg.w.stop)
1536 return -1;
1537 (*t_p)++;
1538 return 0;
1539}
1540
1541static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1542 struct tcmsg *tcm, struct netlink_callback *cb,
1543 int *t_p, int s_t)
1544{
1545 struct Qdisc *q;
1546
1547 if (!root)
1548 return 0;
1549
1550 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1551 return -1;
1552
1553 list_for_each_entry(q, &root->list, list) {
1554 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1555 return -1;
1556 }
1557
1558 return 0;
1559}
1560
1da177e4
LT
1561static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1562{
30723673 1563 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
3b1e0a65 1564 struct net *net = sock_net(skb->sk);
30723673 1565 struct netdev_queue *dev_queue;
1da177e4 1566 struct net_device *dev;
30723673 1567 int t, s_t;
1da177e4 1568
b854272b
DL
1569 if (net != &init_net)
1570 return 0;
1571
1da177e4
LT
1572 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1573 return 0;
881d966b 1574 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1da177e4
LT
1575 return 0;
1576
1577 s_t = cb->args[0];
1578 t = 0;
1579
af356afa 1580 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
30723673
DM
1581 goto done;
1582
1583 dev_queue = &dev->rx_queue;
8123b421 1584 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
30723673 1585 goto done;
1da177e4 1586
30723673 1587done:
1da177e4
LT
1588 cb->args[0] = t;
1589
1590 dev_put(dev);
1591 return skb->len;
1592}
1593
1594/* Main classifier routine: scans classifier chain attached
1595 to this qdisc, (optionally) tests for protocol and asks
1596 specific classifiers.
1597 */
73ca4918
PM
1598int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1599 struct tcf_result *res)
1600{
1601 __be16 protocol = skb->protocol;
1602 int err = 0;
1603
1604 for (; tp; tp = tp->next) {
1605 if ((tp->protocol == protocol ||
1606 tp->protocol == htons(ETH_P_ALL)) &&
1607 (err = tp->classify(skb, tp, res)) >= 0) {
1608#ifdef CONFIG_NET_CLS_ACT
1609 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1610 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1611#endif
1612 return err;
1613 }
1614 }
1615 return -1;
1616}
1617EXPORT_SYMBOL(tc_classify_compat);
1618
1da177e4 1619int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
73ca4918 1620 struct tcf_result *res)
1da177e4
LT
1621{
1622 int err = 0;
73ca4918 1623 __be16 protocol;
1da177e4
LT
1624#ifdef CONFIG_NET_CLS_ACT
1625 struct tcf_proto *otp = tp;
1626reclassify:
1627#endif
1628 protocol = skb->protocol;
1629
73ca4918 1630 err = tc_classify_compat(skb, tp, res);
1da177e4 1631#ifdef CONFIG_NET_CLS_ACT
73ca4918
PM
1632 if (err == TC_ACT_RECLASSIFY) {
1633 u32 verd = G_TC_VERD(skb->tc_verd);
1634 tp = otp;
1635
1636 if (verd++ >= MAX_REC_LOOP) {
1637 printk("rule prio %u protocol %02x reclassify loop, "
1638 "packet dropped\n",
1639 tp->prio&0xffff, ntohs(tp->protocol));
1640 return TC_ACT_SHOT;
1da177e4 1641 }
73ca4918
PM
1642 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1643 goto reclassify;
1da177e4 1644 }
73ca4918
PM
1645#endif
1646 return err;
1da177e4 1647}
73ca4918 1648EXPORT_SYMBOL(tc_classify);
1da177e4 1649
a48b5a61
PM
1650void tcf_destroy(struct tcf_proto *tp)
1651{
1652 tp->ops->destroy(tp);
1653 module_put(tp->ops->owner);
1654 kfree(tp);
1655}
1656
ff31ab56 1657void tcf_destroy_chain(struct tcf_proto **fl)
a48b5a61
PM
1658{
1659 struct tcf_proto *tp;
1660
ff31ab56
PM
1661 while ((tp = *fl) != NULL) {
1662 *fl = tp->next;
a48b5a61
PM
1663 tcf_destroy(tp);
1664 }
1665}
1666EXPORT_SYMBOL(tcf_destroy_chain);
1667
1da177e4
LT
1668#ifdef CONFIG_PROC_FS
1669static int psched_show(struct seq_file *seq, void *v)
1670{
3c0cfc13
PM
1671 struct timespec ts;
1672
1673 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
1da177e4 1674 seq_printf(seq, "%08x %08x %08x %08x\n",
ca44d6e6 1675 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
514bca32 1676 1000000,
3c0cfc13 1677 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
1da177e4
LT
1678
1679 return 0;
1680}
1681
1682static int psched_open(struct inode *inode, struct file *file)
1683{
1684 return single_open(file, psched_show, PDE(inode)->data);
1685}
1686
da7071d7 1687static const struct file_operations psched_fops = {
1da177e4
LT
1688 .owner = THIS_MODULE,
1689 .open = psched_open,
1690 .read = seq_read,
1691 .llseek = seq_lseek,
1692 .release = single_release,
10297b99 1693};
1da177e4
LT
1694#endif
1695
1da177e4
LT
1696static int __init pktsched_init(void)
1697{
1da177e4
LT
1698 register_qdisc(&pfifo_qdisc_ops);
1699 register_qdisc(&bfifo_qdisc_ops);
6ec1c69a 1700 register_qdisc(&mq_qdisc_ops);
457c4cbc 1701 proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
1da177e4 1702
be577ddc
TG
1703 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1704 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1705 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1706 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1707 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1708 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1709
1da177e4
LT
1710 return 0;
1711}
1712
1713subsys_initcall(pktsched_init);