]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/sched/sch_api.c
pkt_sched: Kill qdisc_lock_tree usage in cls_route.c
[mirror_ubuntu-artful-kernel.git] / net / sched / sch_api.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
1da177e4
LT
18#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
1da177e4 21#include <linux/string.h>
1da177e4 22#include <linux/errno.h>
1da177e4 23#include <linux/skbuff.h>
1da177e4
LT
24#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
4179477f 29#include <linux/hrtimer.h>
1da177e4 30
457c4cbc 31#include <net/net_namespace.h>
b854272b 32#include <net/sock.h>
dc5fc579 33#include <net/netlink.h>
1da177e4
LT
34#include <net/pkt_sched.h>
35
1da177e4
LT
36static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
37 struct Qdisc *old, struct Qdisc *new);
38static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
39 struct Qdisc *q, unsigned long cl, int event);
40
41/*
42
43 Short review.
44 -------------
45
46 This file consists of two interrelated parts:
47
48 1. queueing disciplines manager frontend.
49 2. traffic classes manager frontend.
50
51 Generally, queueing discipline ("qdisc") is a black box,
52 which is able to enqueue packets and to dequeue them (when
53 device is ready to send something) in order and at times
54 determined by algorithm hidden in it.
55
56 qdisc's are divided to two categories:
57 - "queues", which have no internal structure visible from outside.
58 - "schedulers", which split all the packets to "traffic classes",
59 using "packet classifiers" (look at cls_api.c)
60
61 In turn, classes may have child qdiscs (as rule, queues)
62 attached to them etc. etc. etc.
63
64 The goal of the routines in this file is to translate
65 information supplied by user in the form of handles
66 to more intelligible for kernel form, to make some sanity
67 checks and part of work, which is common to all qdiscs
68 and to provide rtnetlink notifications.
69
70 All real intelligent work is done inside qdisc modules.
71
72
73
74 Every discipline has two major routines: enqueue and dequeue.
75
76 ---dequeue
77
78 dequeue usually returns a skb to send. It is allowed to return NULL,
79 but it does not mean that queue is empty, it just means that
80 discipline does not want to send anything this time.
81 Queue is really empty if q->q.qlen == 0.
82 For complicated disciplines with multiple queues q->q is not
83 real packet queue, but however q->q.qlen must be valid.
84
85 ---enqueue
86
87 enqueue returns 0, if packet was enqueued successfully.
88 If packet (this one or another one) was dropped, it returns
89 not zero error code.
90 NET_XMIT_DROP - this packet dropped
91 Expected action: do not backoff, but wait until queue will clear.
92 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
93 Expected action: backoff or ignore
94 NET_XMIT_POLICED - dropped by police.
95 Expected action: backoff or error to real-time apps.
96
97 Auxiliary routines:
98
99 ---requeue
100
101 requeues once dequeued packet. It is used for non-standard or
e65d22e1 102 just buggy devices, which can defer output even if netif_queue_stopped()=0.
1da177e4
LT
103
104 ---reset
105
106 returns qdisc to initial state: purge all buffers, clear all
107 timers, counters (except for statistics) etc.
108
109 ---init
110
111 initializes newly created qdisc.
112
113 ---destroy
114
115 destroys resources allocated by init and during lifetime of qdisc.
116
117 ---change
118
119 changes qdisc parameters.
120 */
121
122/* Protects list of registered TC modules. It is pure SMP lock. */
123static DEFINE_RWLOCK(qdisc_mod_lock);
124
125
126/************************************************
127 * Queueing disciplines manipulation. *
128 ************************************************/
129
130
131/* The list of all installed queueing disciplines. */
132
133static struct Qdisc_ops *qdisc_base;
134
135/* Register/uregister queueing discipline */
136
137int register_qdisc(struct Qdisc_ops *qops)
138{
139 struct Qdisc_ops *q, **qp;
140 int rc = -EEXIST;
141
142 write_lock(&qdisc_mod_lock);
143 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
144 if (!strcmp(qops->id, q->id))
145 goto out;
146
147 if (qops->enqueue == NULL)
148 qops->enqueue = noop_qdisc_ops.enqueue;
149 if (qops->requeue == NULL)
150 qops->requeue = noop_qdisc_ops.requeue;
151 if (qops->dequeue == NULL)
152 qops->dequeue = noop_qdisc_ops.dequeue;
153
154 qops->next = NULL;
155 *qp = qops;
156 rc = 0;
157out:
158 write_unlock(&qdisc_mod_lock);
159 return rc;
160}
62e3ba1b 161EXPORT_SYMBOL(register_qdisc);
1da177e4
LT
162
163int unregister_qdisc(struct Qdisc_ops *qops)
164{
165 struct Qdisc_ops *q, **qp;
166 int err = -ENOENT;
167
168 write_lock(&qdisc_mod_lock);
169 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
170 if (q == qops)
171 break;
172 if (q) {
173 *qp = q->next;
174 q->next = NULL;
175 err = 0;
176 }
177 write_unlock(&qdisc_mod_lock);
178 return err;
179}
62e3ba1b 180EXPORT_SYMBOL(unregister_qdisc);
1da177e4
LT
181
182/* We know handle. Find qdisc among all qdisc's attached to device
183 (root qdisc, all its children, children of children etc.)
184 */
185
e8a0464c 186static struct Qdisc *__qdisc_lookup(struct netdev_queue *dev_queue, u32 handle)
1da177e4
LT
187{
188 struct Qdisc *q;
189
b0e1e646 190 list_for_each_entry(q, &dev_queue->qdisc_list, list) {
43effa1e 191 if (q->handle == handle)
1da177e4 192 return q;
1da177e4 193 }
1da177e4
LT
194 return NULL;
195}
196
e8a0464c
DM
197struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
198{
199 unsigned int i;
200
201 for (i = 0; i < dev->num_tx_queues; i++) {
202 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
203 struct Qdisc *q = __qdisc_lookup(txq, handle);
204 if (q)
205 return q;
206 }
207 return NULL;
208}
209
1da177e4
LT
210static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
211{
212 unsigned long cl;
213 struct Qdisc *leaf;
20fea08b 214 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
1da177e4
LT
215
216 if (cops == NULL)
217 return NULL;
218 cl = cops->get(p, classid);
219
220 if (cl == 0)
221 return NULL;
222 leaf = cops->leaf(p, cl);
223 cops->put(p, cl);
224 return leaf;
225}
226
227/* Find queueing discipline by name */
228
1e90474c 229static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
1da177e4
LT
230{
231 struct Qdisc_ops *q = NULL;
232
233 if (kind) {
234 read_lock(&qdisc_mod_lock);
235 for (q = qdisc_base; q; q = q->next) {
1e90474c 236 if (nla_strcmp(kind, q->id) == 0) {
1da177e4
LT
237 if (!try_module_get(q->owner))
238 q = NULL;
239 break;
240 }
241 }
242 read_unlock(&qdisc_mod_lock);
243 }
244 return q;
245}
246
247static struct qdisc_rate_table *qdisc_rtab_list;
248
1e90474c 249struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
1da177e4
LT
250{
251 struct qdisc_rate_table *rtab;
252
253 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
254 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
255 rtab->refcnt++;
256 return rtab;
257 }
258 }
259
5feb5e1a
PM
260 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
261 nla_len(tab) != TC_RTAB_SIZE)
1da177e4
LT
262 return NULL;
263
264 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
265 if (rtab) {
266 rtab->rate = *r;
267 rtab->refcnt = 1;
1e90474c 268 memcpy(rtab->data, nla_data(tab), 1024);
1da177e4
LT
269 rtab->next = qdisc_rtab_list;
270 qdisc_rtab_list = rtab;
271 }
272 return rtab;
273}
62e3ba1b 274EXPORT_SYMBOL(qdisc_get_rtab);
1da177e4
LT
275
276void qdisc_put_rtab(struct qdisc_rate_table *tab)
277{
278 struct qdisc_rate_table *rtab, **rtabp;
279
280 if (!tab || --tab->refcnt)
281 return;
282
283 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
284 if (rtab == tab) {
285 *rtabp = rtab->next;
286 kfree(rtab);
287 return;
288 }
289 }
290}
62e3ba1b 291EXPORT_SYMBOL(qdisc_put_rtab);
1da177e4 292
4179477f
PM
293static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
294{
295 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
296 timer);
297
298 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
11274e5a 299 smp_wmb();
37437bb2 300 __netif_schedule(wd->qdisc);
1936502d 301
4179477f
PM
302 return HRTIMER_NORESTART;
303}
304
305void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
306{
307 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
308 wd->timer.function = qdisc_watchdog;
309 wd->qdisc = qdisc;
310}
311EXPORT_SYMBOL(qdisc_watchdog_init);
312
313void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
314{
315 ktime_t time;
316
317 wd->qdisc->flags |= TCQ_F_THROTTLED;
318 time = ktime_set(0, 0);
319 time = ktime_add_ns(time, PSCHED_US2NS(expires));
320 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
321}
322EXPORT_SYMBOL(qdisc_watchdog_schedule);
323
324void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
325{
326 hrtimer_cancel(&wd->timer);
327 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
328}
329EXPORT_SYMBOL(qdisc_watchdog_cancel);
1da177e4 330
6fe1c7a5
PM
331struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
332{
333 unsigned int size = n * sizeof(struct hlist_head), i;
334 struct hlist_head *h;
335
336 if (size <= PAGE_SIZE)
337 h = kmalloc(size, GFP_KERNEL);
338 else
339 h = (struct hlist_head *)
340 __get_free_pages(GFP_KERNEL, get_order(size));
341
342 if (h != NULL) {
343 for (i = 0; i < n; i++)
344 INIT_HLIST_HEAD(&h[i]);
345 }
346 return h;
347}
348
349static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
350{
351 unsigned int size = n * sizeof(struct hlist_head);
352
353 if (size <= PAGE_SIZE)
354 kfree(h);
355 else
356 free_pages((unsigned long)h, get_order(size));
357}
358
359void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
360{
361 struct Qdisc_class_common *cl;
362 struct hlist_node *n, *next;
363 struct hlist_head *nhash, *ohash;
364 unsigned int nsize, nmask, osize;
365 unsigned int i, h;
366
367 /* Rehash when load factor exceeds 0.75 */
368 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
369 return;
370 nsize = clhash->hashsize * 2;
371 nmask = nsize - 1;
372 nhash = qdisc_class_hash_alloc(nsize);
373 if (nhash == NULL)
374 return;
375
376 ohash = clhash->hash;
377 osize = clhash->hashsize;
378
379 sch_tree_lock(sch);
380 for (i = 0; i < osize; i++) {
381 hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
382 h = qdisc_class_hash(cl->classid, nmask);
383 hlist_add_head(&cl->hnode, &nhash[h]);
384 }
385 }
386 clhash->hash = nhash;
387 clhash->hashsize = nsize;
388 clhash->hashmask = nmask;
389 sch_tree_unlock(sch);
390
391 qdisc_class_hash_free(ohash, osize);
392}
393EXPORT_SYMBOL(qdisc_class_hash_grow);
394
395int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
396{
397 unsigned int size = 4;
398
399 clhash->hash = qdisc_class_hash_alloc(size);
400 if (clhash->hash == NULL)
401 return -ENOMEM;
402 clhash->hashsize = size;
403 clhash->hashmask = size - 1;
404 clhash->hashelems = 0;
405 return 0;
406}
407EXPORT_SYMBOL(qdisc_class_hash_init);
408
409void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
410{
411 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
412}
413EXPORT_SYMBOL(qdisc_class_hash_destroy);
414
415void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
416 struct Qdisc_class_common *cl)
417{
418 unsigned int h;
419
420 INIT_HLIST_NODE(&cl->hnode);
421 h = qdisc_class_hash(cl->classid, clhash->hashmask);
422 hlist_add_head(&cl->hnode, &clhash->hash[h]);
423 clhash->hashelems++;
424}
425EXPORT_SYMBOL(qdisc_class_hash_insert);
426
427void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
428 struct Qdisc_class_common *cl)
429{
430 hlist_del(&cl->hnode);
431 clhash->hashelems--;
432}
433EXPORT_SYMBOL(qdisc_class_hash_remove);
434
1da177e4
LT
435/* Allocate an unique handle from space managed by kernel */
436
437static u32 qdisc_alloc_handle(struct net_device *dev)
438{
439 int i = 0x10000;
440 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
441
442 do {
443 autohandle += TC_H_MAKE(0x10000U, 0);
444 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
445 autohandle = TC_H_MAKE(0x80000000U, 0);
446 } while (qdisc_lookup(dev, autohandle) && --i > 0);
447
448 return i>0 ? autohandle : 0;
449}
450
451/* Attach toplevel qdisc to device dev */
452
453static struct Qdisc *
454dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
455{
b0e1e646 456 struct netdev_queue *dev_queue;
1da177e4
LT
457 struct Qdisc *oqdisc;
458
459 if (dev->flags & IFF_UP)
460 dev_deactivate(dev);
461
462 qdisc_lock_tree(dev);
463 if (qdisc && qdisc->flags&TCQ_F_INGRESS) {
816f3258
DM
464 dev_queue = &dev->rx_queue;
465 oqdisc = dev_queue->qdisc;
1da177e4
LT
466 /* Prune old scheduler */
467 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
468 /* delete */
469 qdisc_reset(oqdisc);
816f3258 470 dev_queue->qdisc = NULL;
1da177e4 471 } else { /* new */
816f3258 472 dev_queue->qdisc = qdisc;
1da177e4
LT
473 }
474
475 } else {
e8a0464c 476 dev_queue = netdev_get_tx_queue(dev, 0);
b0e1e646 477 oqdisc = dev_queue->qdisc_sleeping;
1da177e4
LT
478
479 /* Prune old scheduler */
480 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
481 qdisc_reset(oqdisc);
482
483 /* ... and graft new one */
484 if (qdisc == NULL)
485 qdisc = &noop_qdisc;
b0e1e646
DM
486 dev_queue->qdisc_sleeping = qdisc;
487 dev_queue->qdisc = &noop_qdisc;
1da177e4
LT
488 }
489
490 qdisc_unlock_tree(dev);
491
492 if (dev->flags & IFF_UP)
493 dev_activate(dev);
494
495 return oqdisc;
496}
497
43effa1e
PM
498void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
499{
20fea08b 500 const struct Qdisc_class_ops *cops;
43effa1e
PM
501 unsigned long cl;
502 u32 parentid;
503
504 if (n == 0)
505 return;
506 while ((parentid = sch->parent)) {
066a3b5b
JP
507 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
508 return;
509
5ce2d488 510 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
ffc8fefa
PM
511 if (sch == NULL) {
512 WARN_ON(parentid != TC_H_ROOT);
513 return;
514 }
43effa1e
PM
515 cops = sch->ops->cl_ops;
516 if (cops->qlen_notify) {
517 cl = cops->get(sch, parentid);
518 cops->qlen_notify(sch, cl);
519 cops->put(sch, cl);
520 }
521 sch->q.qlen -= n;
522 }
523}
524EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
1da177e4
LT
525
526/* Graft qdisc "new" to class "classid" of qdisc "parent" or
527 to device "dev".
528
529 Old qdisc is not destroyed but returned in *old.
530 */
531
532static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
533 u32 classid,
534 struct Qdisc *new, struct Qdisc **old)
535{
536 int err = 0;
537 struct Qdisc *q = *old;
538
539
10297b99 540 if (parent == NULL) {
1da177e4
LT
541 if (q && q->flags&TCQ_F_INGRESS) {
542 *old = dev_graft_qdisc(dev, q);
543 } else {
544 *old = dev_graft_qdisc(dev, new);
545 }
546 } else {
20fea08b 547 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1da177e4
LT
548
549 err = -EINVAL;
550
551 if (cops) {
552 unsigned long cl = cops->get(parent, classid);
553 if (cl) {
554 err = cops->graft(parent, cl, new, old);
1da177e4
LT
555 cops->put(parent, cl);
556 }
557 }
558 }
559 return err;
560}
561
562/*
563 Allocate and initialize new qdisc.
564
565 Parameters are passed via opt.
566 */
567
568static struct Qdisc *
bb949fbd
DM
569qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
570 u32 parent, u32 handle, struct nlattr **tca, int *errp)
1da177e4
LT
571{
572 int err;
1e90474c 573 struct nlattr *kind = tca[TCA_KIND];
1da177e4
LT
574 struct Qdisc *sch;
575 struct Qdisc_ops *ops;
1da177e4
LT
576
577 ops = qdisc_lookup_ops(kind);
578#ifdef CONFIG_KMOD
579 if (ops == NULL && kind != NULL) {
580 char name[IFNAMSIZ];
1e90474c 581 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
1da177e4
LT
582 /* We dropped the RTNL semaphore in order to
583 * perform the module load. So, even if we
584 * succeeded in loading the module we have to
585 * tell the caller to replay the request. We
586 * indicate this using -EAGAIN.
587 * We replay the request because the device may
588 * go away in the mean time.
589 */
590 rtnl_unlock();
591 request_module("sch_%s", name);
592 rtnl_lock();
593 ops = qdisc_lookup_ops(kind);
594 if (ops != NULL) {
595 /* We will try again qdisc_lookup_ops,
596 * so don't keep a reference.
597 */
598 module_put(ops->owner);
599 err = -EAGAIN;
600 goto err_out;
601 }
602 }
603 }
604#endif
605
b9e2cc0f 606 err = -ENOENT;
1da177e4
LT
607 if (ops == NULL)
608 goto err_out;
609
5ce2d488 610 sch = qdisc_alloc(dev_queue, ops);
3d54b82f
TG
611 if (IS_ERR(sch)) {
612 err = PTR_ERR(sch);
1da177e4 613 goto err_out2;
3d54b82f 614 }
1da177e4 615
ffc8fefa
PM
616 sch->parent = parent;
617
3d54b82f 618 if (handle == TC_H_INGRESS) {
1da177e4 619 sch->flags |= TCQ_F_INGRESS;
3d54b82f 620 handle = TC_H_MAKE(TC_H_INGRESS, 0);
fd44de7c 621 } else {
fd44de7c
PM
622 if (handle == 0) {
623 handle = qdisc_alloc_handle(dev);
624 err = -ENOMEM;
625 if (handle == 0)
626 goto err_out3;
627 }
1da177e4
LT
628 }
629
3d54b82f 630 sch->handle = handle;
1da177e4 631
1e90474c
PM
632 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
633 if (tca[TCA_RATE]) {
023e09a7 634 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
7698b4fc 635 qdisc_root_lock(sch),
1e90474c 636 tca[TCA_RATE]);
023e09a7
TG
637 if (err) {
638 /*
639 * Any broken qdiscs that would require
640 * a ops->reset() here? The qdisc was never
641 * in action so it shouldn't be necessary.
642 */
643 if (ops->destroy)
644 ops->destroy(sch);
645 goto err_out3;
646 }
647 }
1da177e4 648 qdisc_lock_tree(dev);
b0e1e646 649 list_add_tail(&sch->list, &dev_queue->qdisc_list);
1da177e4
LT
650 qdisc_unlock_tree(dev);
651
1da177e4
LT
652 return sch;
653 }
654err_out3:
655 dev_put(dev);
3d54b82f 656 kfree((char *) sch - sch->padded);
1da177e4
LT
657err_out2:
658 module_put(ops->owner);
659err_out:
660 *errp = err;
1da177e4
LT
661 return NULL;
662}
663
1e90474c 664static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
1da177e4 665{
1e90474c 666 if (tca[TCA_OPTIONS]) {
1da177e4
LT
667 int err;
668
669 if (sch->ops->change == NULL)
670 return -EINVAL;
1e90474c 671 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
1da177e4
LT
672 if (err)
673 return err;
674 }
1e90474c 675 if (tca[TCA_RATE])
1da177e4 676 gen_replace_estimator(&sch->bstats, &sch->rate_est,
7698b4fc 677 qdisc_root_lock(sch), tca[TCA_RATE]);
1da177e4
LT
678 return 0;
679}
680
681struct check_loop_arg
682{
683 struct qdisc_walker w;
684 struct Qdisc *p;
685 int depth;
686};
687
688static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
689
690static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
691{
692 struct check_loop_arg arg;
693
694 if (q->ops->cl_ops == NULL)
695 return 0;
696
697 arg.w.stop = arg.w.skip = arg.w.count = 0;
698 arg.w.fn = check_loop_fn;
699 arg.depth = depth;
700 arg.p = p;
701 q->ops->cl_ops->walk(q, &arg.w);
702 return arg.w.stop ? -ELOOP : 0;
703}
704
705static int
706check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
707{
708 struct Qdisc *leaf;
20fea08b 709 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1da177e4
LT
710 struct check_loop_arg *arg = (struct check_loop_arg *)w;
711
712 leaf = cops->leaf(q, cl);
713 if (leaf) {
714 if (leaf == arg->p || arg->depth > 7)
715 return -ELOOP;
716 return check_loop(leaf, arg->p, arg->depth + 1);
717 }
718 return 0;
719}
720
721/*
722 * Delete/get qdisc.
723 */
724
725static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
726{
3b1e0a65 727 struct net *net = sock_net(skb->sk);
1da177e4 728 struct tcmsg *tcm = NLMSG_DATA(n);
1e90474c 729 struct nlattr *tca[TCA_MAX + 1];
1da177e4
LT
730 struct net_device *dev;
731 u32 clid = tcm->tcm_parent;
732 struct Qdisc *q = NULL;
733 struct Qdisc *p = NULL;
734 int err;
735
b854272b
DL
736 if (net != &init_net)
737 return -EINVAL;
738
881d966b 739 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1da177e4
LT
740 return -ENODEV;
741
1e90474c
PM
742 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
743 if (err < 0)
744 return err;
745
1da177e4
LT
746 if (clid) {
747 if (clid != TC_H_ROOT) {
748 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
749 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
750 return -ENOENT;
751 q = qdisc_leaf(p, clid);
752 } else { /* ingress */
816f3258 753 q = dev->rx_queue.qdisc;
10297b99 754 }
1da177e4 755 } else {
e8a0464c
DM
756 struct netdev_queue *dev_queue;
757 dev_queue = netdev_get_tx_queue(dev, 0);
b0e1e646 758 q = dev_queue->qdisc_sleeping;
1da177e4
LT
759 }
760 if (!q)
761 return -ENOENT;
762
763 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
764 return -EINVAL;
765 } else {
766 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
767 return -ENOENT;
768 }
769
1e90474c 770 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1da177e4
LT
771 return -EINVAL;
772
773 if (n->nlmsg_type == RTM_DELQDISC) {
774 if (!clid)
775 return -EINVAL;
776 if (q->handle == 0)
777 return -ENOENT;
778 if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0)
779 return err;
780 if (q) {
781 qdisc_notify(skb, n, clid, q, NULL);
fd44de7c 782 qdisc_lock_tree(dev);
1da177e4 783 qdisc_destroy(q);
fd44de7c 784 qdisc_unlock_tree(dev);
1da177e4
LT
785 }
786 } else {
787 qdisc_notify(skb, n, clid, NULL, q);
788 }
789 return 0;
790}
791
792/*
793 Create/change qdisc.
794 */
795
796static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
797{
3b1e0a65 798 struct net *net = sock_net(skb->sk);
1da177e4 799 struct tcmsg *tcm;
1e90474c 800 struct nlattr *tca[TCA_MAX + 1];
1da177e4
LT
801 struct net_device *dev;
802 u32 clid;
803 struct Qdisc *q, *p;
804 int err;
805
b854272b
DL
806 if (net != &init_net)
807 return -EINVAL;
808
1da177e4
LT
809replay:
810 /* Reinit, just in case something touches this. */
811 tcm = NLMSG_DATA(n);
1da177e4
LT
812 clid = tcm->tcm_parent;
813 q = p = NULL;
814
881d966b 815 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1da177e4
LT
816 return -ENODEV;
817
1e90474c
PM
818 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
819 if (err < 0)
820 return err;
821
1da177e4
LT
822 if (clid) {
823 if (clid != TC_H_ROOT) {
824 if (clid != TC_H_INGRESS) {
825 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
826 return -ENOENT;
827 q = qdisc_leaf(p, clid);
828 } else { /*ingress */
816f3258 829 q = dev->rx_queue.qdisc;
1da177e4
LT
830 }
831 } else {
e8a0464c
DM
832 struct netdev_queue *dev_queue;
833 dev_queue = netdev_get_tx_queue(dev, 0);
b0e1e646 834 q = dev_queue->qdisc_sleeping;
1da177e4
LT
835 }
836
837 /* It may be default qdisc, ignore it */
838 if (q && q->handle == 0)
839 q = NULL;
840
841 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
842 if (tcm->tcm_handle) {
843 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
844 return -EEXIST;
845 if (TC_H_MIN(tcm->tcm_handle))
846 return -EINVAL;
847 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
848 goto create_n_graft;
849 if (n->nlmsg_flags&NLM_F_EXCL)
850 return -EEXIST;
1e90474c 851 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1da177e4
LT
852 return -EINVAL;
853 if (q == p ||
854 (p && check_loop(q, p, 0)))
855 return -ELOOP;
856 atomic_inc(&q->refcnt);
857 goto graft;
858 } else {
859 if (q == NULL)
860 goto create_n_graft;
861
862 /* This magic test requires explanation.
863 *
864 * We know, that some child q is already
865 * attached to this parent and have choice:
866 * either to change it or to create/graft new one.
867 *
868 * 1. We are allowed to create/graft only
869 * if CREATE and REPLACE flags are set.
870 *
871 * 2. If EXCL is set, requestor wanted to say,
872 * that qdisc tcm_handle is not expected
873 * to exist, so that we choose create/graft too.
874 *
875 * 3. The last case is when no flags are set.
876 * Alas, it is sort of hole in API, we
877 * cannot decide what to do unambiguously.
878 * For now we select create/graft, if
879 * user gave KIND, which does not match existing.
880 */
881 if ((n->nlmsg_flags&NLM_F_CREATE) &&
882 (n->nlmsg_flags&NLM_F_REPLACE) &&
883 ((n->nlmsg_flags&NLM_F_EXCL) ||
1e90474c
PM
884 (tca[TCA_KIND] &&
885 nla_strcmp(tca[TCA_KIND], q->ops->id))))
1da177e4
LT
886 goto create_n_graft;
887 }
888 }
889 } else {
890 if (!tcm->tcm_handle)
891 return -EINVAL;
892 q = qdisc_lookup(dev, tcm->tcm_handle);
893 }
894
895 /* Change qdisc parameters */
896 if (q == NULL)
897 return -ENOENT;
898 if (n->nlmsg_flags&NLM_F_EXCL)
899 return -EEXIST;
1e90474c 900 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1da177e4
LT
901 return -EINVAL;
902 err = qdisc_change(q, tca);
903 if (err == 0)
904 qdisc_notify(skb, n, clid, NULL, q);
905 return err;
906
907create_n_graft:
908 if (!(n->nlmsg_flags&NLM_F_CREATE))
909 return -ENOENT;
910 if (clid == TC_H_INGRESS)
bb949fbd
DM
911 q = qdisc_create(dev, &dev->rx_queue,
912 tcm->tcm_parent, tcm->tcm_parent,
ffc8fefa 913 tca, &err);
10297b99 914 else
e8a0464c 915 q = qdisc_create(dev, netdev_get_tx_queue(dev, 0),
bb949fbd 916 tcm->tcm_parent, tcm->tcm_handle,
ffc8fefa 917 tca, &err);
1da177e4
LT
918 if (q == NULL) {
919 if (err == -EAGAIN)
920 goto replay;
921 return err;
922 }
923
924graft:
925 if (1) {
926 struct Qdisc *old_q = NULL;
927 err = qdisc_graft(dev, p, clid, q, &old_q);
928 if (err) {
929 if (q) {
fd44de7c 930 qdisc_lock_tree(dev);
1da177e4 931 qdisc_destroy(q);
fd44de7c 932 qdisc_unlock_tree(dev);
1da177e4
LT
933 }
934 return err;
935 }
936 qdisc_notify(skb, n, clid, old_q, q);
937 if (old_q) {
fd44de7c 938 qdisc_lock_tree(dev);
1da177e4 939 qdisc_destroy(old_q);
fd44de7c 940 qdisc_unlock_tree(dev);
1da177e4
LT
941 }
942 }
943 return 0;
944}
945
946static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
e431b8c0 947 u32 pid, u32 seq, u16 flags, int event)
1da177e4
LT
948{
949 struct tcmsg *tcm;
950 struct nlmsghdr *nlh;
27a884dc 951 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
952 struct gnet_dump d;
953
e431b8c0 954 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1da177e4
LT
955 tcm = NLMSG_DATA(nlh);
956 tcm->tcm_family = AF_UNSPEC;
9ef1d4c7
PM
957 tcm->tcm__pad1 = 0;
958 tcm->tcm__pad2 = 0;
5ce2d488 959 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1da177e4
LT
960 tcm->tcm_parent = clid;
961 tcm->tcm_handle = q->handle;
962 tcm->tcm_info = atomic_read(&q->refcnt);
57e1c487 963 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
1da177e4 964 if (q->ops->dump && q->ops->dump(q, skb) < 0)
1e90474c 965 goto nla_put_failure;
1da177e4
LT
966 q->qstats.qlen = q->q.qlen;
967
968 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
7698b4fc 969 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
1e90474c 970 goto nla_put_failure;
1da177e4
LT
971
972 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
1e90474c 973 goto nla_put_failure;
1da177e4
LT
974
975 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
1da177e4 976 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
1da177e4 977 gnet_stats_copy_queue(&d, &q->qstats) < 0)
1e90474c 978 goto nla_put_failure;
10297b99 979
1da177e4 980 if (gnet_stats_finish_copy(&d) < 0)
1e90474c 981 goto nla_put_failure;
10297b99 982
27a884dc 983 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1da177e4
LT
984 return skb->len;
985
986nlmsg_failure:
1e90474c 987nla_put_failure:
dc5fc579 988 nlmsg_trim(skb, b);
1da177e4
LT
989 return -1;
990}
991
992static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
993 u32 clid, struct Qdisc *old, struct Qdisc *new)
994{
995 struct sk_buff *skb;
996 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
997
998 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
999 if (!skb)
1000 return -ENOBUFS;
1001
1002 if (old && old->handle) {
1003 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
1004 goto err_out;
1005 }
1006 if (new) {
1007 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1008 goto err_out;
1009 }
1010
1011 if (skb->len)
97c53cac 1012 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1da177e4
LT
1013
1014err_out:
1015 kfree_skb(skb);
1016 return -EINVAL;
1017}
1018
1019static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1020{
3b1e0a65 1021 struct net *net = sock_net(skb->sk);
1da177e4
LT
1022 int idx, q_idx;
1023 int s_idx, s_q_idx;
1024 struct net_device *dev;
1025 struct Qdisc *q;
1026
b854272b
DL
1027 if (net != &init_net)
1028 return 0;
1029
1da177e4
LT
1030 s_idx = cb->args[0];
1031 s_q_idx = q_idx = cb->args[1];
1032 read_lock(&dev_base_lock);
7562f876 1033 idx = 0;
881d966b 1034 for_each_netdev(&init_net, dev) {
b0e1e646 1035 struct netdev_queue *dev_queue;
1da177e4 1036 if (idx < s_idx)
7562f876 1037 goto cont;
1da177e4
LT
1038 if (idx > s_idx)
1039 s_q_idx = 0;
1da177e4 1040 q_idx = 0;
e8a0464c 1041 dev_queue = netdev_get_tx_queue(dev, 0);
b0e1e646 1042 list_for_each_entry(q, &dev_queue->qdisc_list, list) {
1da177e4
LT
1043 if (q_idx < s_q_idx) {
1044 q_idx++;
1045 continue;
1046 }
1047 if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
0463d4ae 1048 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1da177e4 1049 goto done;
1da177e4
LT
1050 q_idx++;
1051 }
7562f876
PE
1052cont:
1053 idx++;
1da177e4
LT
1054 }
1055
1056done:
1057 read_unlock(&dev_base_lock);
1058
1059 cb->args[0] = idx;
1060 cb->args[1] = q_idx;
1061
1062 return skb->len;
1063}
1064
1065
1066
1067/************************************************
1068 * Traffic classes manipulation. *
1069 ************************************************/
1070
1071
1072
1073static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1074{
3b1e0a65 1075 struct net *net = sock_net(skb->sk);
b0e1e646 1076 struct netdev_queue *dev_queue;
1da177e4 1077 struct tcmsg *tcm = NLMSG_DATA(n);
1e90474c 1078 struct nlattr *tca[TCA_MAX + 1];
1da177e4
LT
1079 struct net_device *dev;
1080 struct Qdisc *q = NULL;
20fea08b 1081 const struct Qdisc_class_ops *cops;
1da177e4
LT
1082 unsigned long cl = 0;
1083 unsigned long new_cl;
1084 u32 pid = tcm->tcm_parent;
1085 u32 clid = tcm->tcm_handle;
1086 u32 qid = TC_H_MAJ(clid);
1087 int err;
1088
b854272b
DL
1089 if (net != &init_net)
1090 return -EINVAL;
1091
881d966b 1092 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1da177e4
LT
1093 return -ENODEV;
1094
1e90474c
PM
1095 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1096 if (err < 0)
1097 return err;
1098
1da177e4
LT
1099 /*
1100 parent == TC_H_UNSPEC - unspecified parent.
1101 parent == TC_H_ROOT - class is root, which has no parent.
1102 parent == X:0 - parent is root class.
1103 parent == X:Y - parent is a node in hierarchy.
1104 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1105
1106 handle == 0:0 - generate handle from kernel pool.
1107 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1108 handle == X:Y - clear.
1109 handle == X:0 - root class.
1110 */
1111
1112 /* Step 1. Determine qdisc handle X:0 */
1113
e8a0464c 1114 dev_queue = netdev_get_tx_queue(dev, 0);
1da177e4
LT
1115 if (pid != TC_H_ROOT) {
1116 u32 qid1 = TC_H_MAJ(pid);
1117
1118 if (qid && qid1) {
1119 /* If both majors are known, they must be identical. */
1120 if (qid != qid1)
1121 return -EINVAL;
1122 } else if (qid1) {
1123 qid = qid1;
1124 } else if (qid == 0)
b0e1e646 1125 qid = dev_queue->qdisc_sleeping->handle;
1da177e4
LT
1126
1127 /* Now qid is genuine qdisc handle consistent
1128 both with parent and child.
1129
1130 TC_H_MAJ(pid) still may be unspecified, complete it now.
1131 */
1132 if (pid)
1133 pid = TC_H_MAKE(qid, pid);
1134 } else {
1135 if (qid == 0)
b0e1e646 1136 qid = dev_queue->qdisc_sleeping->handle;
1da177e4
LT
1137 }
1138
1139 /* OK. Locate qdisc */
10297b99 1140 if ((q = qdisc_lookup(dev, qid)) == NULL)
1da177e4
LT
1141 return -ENOENT;
1142
1143 /* An check that it supports classes */
1144 cops = q->ops->cl_ops;
1145 if (cops == NULL)
1146 return -EINVAL;
1147
1148 /* Now try to get class */
1149 if (clid == 0) {
1150 if (pid == TC_H_ROOT)
1151 clid = qid;
1152 } else
1153 clid = TC_H_MAKE(qid, clid);
1154
1155 if (clid)
1156 cl = cops->get(q, clid);
1157
1158 if (cl == 0) {
1159 err = -ENOENT;
1160 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
1161 goto out;
1162 } else {
1163 switch (n->nlmsg_type) {
10297b99 1164 case RTM_NEWTCLASS:
1da177e4
LT
1165 err = -EEXIST;
1166 if (n->nlmsg_flags&NLM_F_EXCL)
1167 goto out;
1168 break;
1169 case RTM_DELTCLASS:
1170 err = cops->delete(q, cl);
1171 if (err == 0)
1172 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
1173 goto out;
1174 case RTM_GETTCLASS:
1175 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
1176 goto out;
1177 default:
1178 err = -EINVAL;
1179 goto out;
1180 }
1181 }
1182
1183 new_cl = cl;
1184 err = cops->change(q, clid, pid, tca, &new_cl);
1185 if (err == 0)
1186 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
1187
1188out:
1189 if (cl)
1190 cops->put(q, cl);
1191
1192 return err;
1193}
1194
1195
1196static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1197 unsigned long cl,
e431b8c0 1198 u32 pid, u32 seq, u16 flags, int event)
1da177e4
LT
1199{
1200 struct tcmsg *tcm;
1201 struct nlmsghdr *nlh;
27a884dc 1202 unsigned char *b = skb_tail_pointer(skb);
1da177e4 1203 struct gnet_dump d;
20fea08b 1204 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1da177e4 1205
e431b8c0 1206 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1da177e4
LT
1207 tcm = NLMSG_DATA(nlh);
1208 tcm->tcm_family = AF_UNSPEC;
5ce2d488 1209 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1da177e4
LT
1210 tcm->tcm_parent = q->handle;
1211 tcm->tcm_handle = q->handle;
1212 tcm->tcm_info = 0;
57e1c487 1213 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
1da177e4 1214 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1e90474c 1215 goto nla_put_failure;
1da177e4
LT
1216
1217 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
7698b4fc 1218 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
1e90474c 1219 goto nla_put_failure;
1da177e4
LT
1220
1221 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1e90474c 1222 goto nla_put_failure;
1da177e4
LT
1223
1224 if (gnet_stats_finish_copy(&d) < 0)
1e90474c 1225 goto nla_put_failure;
1da177e4 1226
27a884dc 1227 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1da177e4
LT
1228 return skb->len;
1229
1230nlmsg_failure:
1e90474c 1231nla_put_failure:
dc5fc579 1232 nlmsg_trim(skb, b);
1da177e4
LT
1233 return -1;
1234}
1235
1236static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1237 struct Qdisc *q, unsigned long cl, int event)
1238{
1239 struct sk_buff *skb;
1240 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1241
1242 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1243 if (!skb)
1244 return -ENOBUFS;
1245
1246 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1247 kfree_skb(skb);
1248 return -EINVAL;
1249 }
1250
97c53cac 1251 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1da177e4
LT
1252}
1253
1254struct qdisc_dump_args
1255{
1256 struct qdisc_walker w;
1257 struct sk_buff *skb;
1258 struct netlink_callback *cb;
1259};
1260
1261static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1262{
1263 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1264
1265 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1266 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1267}
1268
1269static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1270{
3b1e0a65 1271 struct net *net = sock_net(skb->sk);
b0e1e646 1272 struct netdev_queue *dev_queue;
1da177e4
LT
1273 int t;
1274 int s_t;
1275 struct net_device *dev;
1276 struct Qdisc *q;
1277 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1278 struct qdisc_dump_args arg;
1279
b854272b
DL
1280 if (net != &init_net)
1281 return 0;
1282
1da177e4
LT
1283 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1284 return 0;
881d966b 1285 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1da177e4
LT
1286 return 0;
1287
1288 s_t = cb->args[0];
1289 t = 0;
1290
e8a0464c 1291 dev_queue = netdev_get_tx_queue(dev, 0);
b0e1e646 1292 list_for_each_entry(q, &dev_queue->qdisc_list, list) {
1da177e4
LT
1293 if (t < s_t || !q->ops->cl_ops ||
1294 (tcm->tcm_parent &&
1295 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1296 t++;
1297 continue;
1298 }
1299 if (t > s_t)
1300 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1301 arg.w.fn = qdisc_class_dump;
1302 arg.skb = skb;
1303 arg.cb = cb;
1304 arg.w.stop = 0;
1305 arg.w.skip = cb->args[1];
1306 arg.w.count = 0;
1307 q->ops->cl_ops->walk(q, &arg.w);
1308 cb->args[1] = arg.w.count;
1309 if (arg.w.stop)
1310 break;
1311 t++;
1312 }
1da177e4
LT
1313
1314 cb->args[0] = t;
1315
1316 dev_put(dev);
1317 return skb->len;
1318}
1319
1320/* Main classifier routine: scans classifier chain attached
1321 to this qdisc, (optionally) tests for protocol and asks
1322 specific classifiers.
1323 */
73ca4918
PM
1324int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1325 struct tcf_result *res)
1326{
1327 __be16 protocol = skb->protocol;
1328 int err = 0;
1329
1330 for (; tp; tp = tp->next) {
1331 if ((tp->protocol == protocol ||
1332 tp->protocol == htons(ETH_P_ALL)) &&
1333 (err = tp->classify(skb, tp, res)) >= 0) {
1334#ifdef CONFIG_NET_CLS_ACT
1335 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1336 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1337#endif
1338 return err;
1339 }
1340 }
1341 return -1;
1342}
1343EXPORT_SYMBOL(tc_classify_compat);
1344
1da177e4 1345int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
73ca4918 1346 struct tcf_result *res)
1da177e4
LT
1347{
1348 int err = 0;
73ca4918 1349 __be16 protocol;
1da177e4
LT
1350#ifdef CONFIG_NET_CLS_ACT
1351 struct tcf_proto *otp = tp;
1352reclassify:
1353#endif
1354 protocol = skb->protocol;
1355
73ca4918 1356 err = tc_classify_compat(skb, tp, res);
1da177e4 1357#ifdef CONFIG_NET_CLS_ACT
73ca4918
PM
1358 if (err == TC_ACT_RECLASSIFY) {
1359 u32 verd = G_TC_VERD(skb->tc_verd);
1360 tp = otp;
1361
1362 if (verd++ >= MAX_REC_LOOP) {
1363 printk("rule prio %u protocol %02x reclassify loop, "
1364 "packet dropped\n",
1365 tp->prio&0xffff, ntohs(tp->protocol));
1366 return TC_ACT_SHOT;
1da177e4 1367 }
73ca4918
PM
1368 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1369 goto reclassify;
1da177e4 1370 }
73ca4918
PM
1371#endif
1372 return err;
1da177e4 1373}
73ca4918 1374EXPORT_SYMBOL(tc_classify);
1da177e4 1375
a48b5a61
PM
1376void tcf_destroy(struct tcf_proto *tp)
1377{
1378 tp->ops->destroy(tp);
1379 module_put(tp->ops->owner);
1380 kfree(tp);
1381}
1382
ff31ab56 1383void tcf_destroy_chain(struct tcf_proto **fl)
a48b5a61
PM
1384{
1385 struct tcf_proto *tp;
1386
ff31ab56
PM
1387 while ((tp = *fl) != NULL) {
1388 *fl = tp->next;
a48b5a61
PM
1389 tcf_destroy(tp);
1390 }
1391}
1392EXPORT_SYMBOL(tcf_destroy_chain);
1393
1da177e4
LT
1394#ifdef CONFIG_PROC_FS
1395static int psched_show(struct seq_file *seq, void *v)
1396{
3c0cfc13
PM
1397 struct timespec ts;
1398
1399 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
1da177e4 1400 seq_printf(seq, "%08x %08x %08x %08x\n",
641b9e0e 1401 (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
514bca32 1402 1000000,
3c0cfc13 1403 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
1da177e4
LT
1404
1405 return 0;
1406}
1407
1408static int psched_open(struct inode *inode, struct file *file)
1409{
1410 return single_open(file, psched_show, PDE(inode)->data);
1411}
1412
da7071d7 1413static const struct file_operations psched_fops = {
1da177e4
LT
1414 .owner = THIS_MODULE,
1415 .open = psched_open,
1416 .read = seq_read,
1417 .llseek = seq_lseek,
1418 .release = single_release,
10297b99 1419};
1da177e4
LT
1420#endif
1421
1da177e4
LT
1422static int __init pktsched_init(void)
1423{
1da177e4
LT
1424 register_qdisc(&pfifo_qdisc_ops);
1425 register_qdisc(&bfifo_qdisc_ops);
457c4cbc 1426 proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
1da177e4 1427
be577ddc
TG
1428 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1429 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1430 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1431 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1432 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1433 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1434
1da177e4
LT
1435 return 0;
1436}
1437
1438subsys_initcall(pktsched_init);