]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/sched/sch_api.c
net_sched: Add accessor function for packet length for qdiscs
[mirror_ubuntu-artful-kernel.git] / net / sched / sch_api.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
1da177e4
LT
18#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
1da177e4 21#include <linux/string.h>
1da177e4 22#include <linux/errno.h>
1da177e4 23#include <linux/skbuff.h>
1da177e4
LT
24#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
4179477f 29#include <linux/hrtimer.h>
1da177e4 30
457c4cbc 31#include <net/net_namespace.h>
b854272b 32#include <net/sock.h>
dc5fc579 33#include <net/netlink.h>
1da177e4
LT
34#include <net/pkt_sched.h>
35
1da177e4
LT
36static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
37 struct Qdisc *old, struct Qdisc *new);
38static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
39 struct Qdisc *q, unsigned long cl, int event);
40
41/*
42
43 Short review.
44 -------------
45
46 This file consists of two interrelated parts:
47
48 1. queueing disciplines manager frontend.
49 2. traffic classes manager frontend.
50
51 Generally, queueing discipline ("qdisc") is a black box,
52 which is able to enqueue packets and to dequeue them (when
53 device is ready to send something) in order and at times
54 determined by algorithm hidden in it.
55
56 qdisc's are divided to two categories:
57 - "queues", which have no internal structure visible from outside.
58 - "schedulers", which split all the packets to "traffic classes",
59 using "packet classifiers" (look at cls_api.c)
60
61 In turn, classes may have child qdiscs (as rule, queues)
62 attached to them etc. etc. etc.
63
64 The goal of the routines in this file is to translate
65 information supplied by user in the form of handles
66 to more intelligible for kernel form, to make some sanity
67 checks and part of work, which is common to all qdiscs
68 and to provide rtnetlink notifications.
69
70 All real intelligent work is done inside qdisc modules.
71
72
73
74 Every discipline has two major routines: enqueue and dequeue.
75
76 ---dequeue
77
78 dequeue usually returns a skb to send. It is allowed to return NULL,
79 but it does not mean that queue is empty, it just means that
80 discipline does not want to send anything this time.
81 Queue is really empty if q->q.qlen == 0.
82 For complicated disciplines with multiple queues q->q is not
83 real packet queue, but however q->q.qlen must be valid.
84
85 ---enqueue
86
87 enqueue returns 0, if packet was enqueued successfully.
88 If packet (this one or another one) was dropped, it returns
89 not zero error code.
90 NET_XMIT_DROP - this packet dropped
91 Expected action: do not backoff, but wait until queue will clear.
92 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
93 Expected action: backoff or ignore
94 NET_XMIT_POLICED - dropped by police.
95 Expected action: backoff or error to real-time apps.
96
97 Auxiliary routines:
98
99 ---requeue
100
101 requeues once dequeued packet. It is used for non-standard or
e65d22e1 102 just buggy devices, which can defer output even if netif_queue_stopped()=0.
1da177e4
LT
103
104 ---reset
105
106 returns qdisc to initial state: purge all buffers, clear all
107 timers, counters (except for statistics) etc.
108
109 ---init
110
111 initializes newly created qdisc.
112
113 ---destroy
114
115 destroys resources allocated by init and during lifetime of qdisc.
116
117 ---change
118
119 changes qdisc parameters.
120 */
121
122/* Protects list of registered TC modules. It is pure SMP lock. */
123static DEFINE_RWLOCK(qdisc_mod_lock);
124
125
126/************************************************
127 * Queueing disciplines manipulation. *
128 ************************************************/
129
130
131/* The list of all installed queueing disciplines. */
132
133static struct Qdisc_ops *qdisc_base;
134
135/* Register/uregister queueing discipline */
136
137int register_qdisc(struct Qdisc_ops *qops)
138{
139 struct Qdisc_ops *q, **qp;
140 int rc = -EEXIST;
141
142 write_lock(&qdisc_mod_lock);
143 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
144 if (!strcmp(qops->id, q->id))
145 goto out;
146
147 if (qops->enqueue == NULL)
148 qops->enqueue = noop_qdisc_ops.enqueue;
149 if (qops->requeue == NULL)
150 qops->requeue = noop_qdisc_ops.requeue;
151 if (qops->dequeue == NULL)
152 qops->dequeue = noop_qdisc_ops.dequeue;
153
154 qops->next = NULL;
155 *qp = qops;
156 rc = 0;
157out:
158 write_unlock(&qdisc_mod_lock);
159 return rc;
160}
62e3ba1b 161EXPORT_SYMBOL(register_qdisc);
1da177e4
LT
162
163int unregister_qdisc(struct Qdisc_ops *qops)
164{
165 struct Qdisc_ops *q, **qp;
166 int err = -ENOENT;
167
168 write_lock(&qdisc_mod_lock);
169 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
170 if (q == qops)
171 break;
172 if (q) {
173 *qp = q->next;
174 q->next = NULL;
175 err = 0;
176 }
177 write_unlock(&qdisc_mod_lock);
178 return err;
179}
62e3ba1b 180EXPORT_SYMBOL(unregister_qdisc);
1da177e4
LT
181
182/* We know handle. Find qdisc among all qdisc's attached to device
183 (root qdisc, all its children, children of children etc.)
184 */
185
ead81cc5 186struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
1da177e4 187{
30723673
DM
188 unsigned int i;
189
190 for (i = 0; i < dev->num_tx_queues; i++) {
191 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
192 struct Qdisc *q, *txq_root = txq->qdisc;
1da177e4 193
30723673
DM
194 if (!(txq_root->flags & TCQ_F_BUILTIN) &&
195 txq_root->handle == handle)
196 return txq_root;
197
198 list_for_each_entry(q, &txq_root->list, list) {
199 if (q->handle == handle)
200 return q;
201 }
1da177e4 202 }
1da177e4
LT
203 return NULL;
204}
205
206static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
207{
208 unsigned long cl;
209 struct Qdisc *leaf;
20fea08b 210 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
1da177e4
LT
211
212 if (cops == NULL)
213 return NULL;
214 cl = cops->get(p, classid);
215
216 if (cl == 0)
217 return NULL;
218 leaf = cops->leaf(p, cl);
219 cops->put(p, cl);
220 return leaf;
221}
222
223/* Find queueing discipline by name */
224
1e90474c 225static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
1da177e4
LT
226{
227 struct Qdisc_ops *q = NULL;
228
229 if (kind) {
230 read_lock(&qdisc_mod_lock);
231 for (q = qdisc_base; q; q = q->next) {
1e90474c 232 if (nla_strcmp(kind, q->id) == 0) {
1da177e4
LT
233 if (!try_module_get(q->owner))
234 q = NULL;
235 break;
236 }
237 }
238 read_unlock(&qdisc_mod_lock);
239 }
240 return q;
241}
242
243static struct qdisc_rate_table *qdisc_rtab_list;
244
1e90474c 245struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
1da177e4
LT
246{
247 struct qdisc_rate_table *rtab;
248
249 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
250 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
251 rtab->refcnt++;
252 return rtab;
253 }
254 }
255
5feb5e1a
PM
256 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
257 nla_len(tab) != TC_RTAB_SIZE)
1da177e4
LT
258 return NULL;
259
260 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
261 if (rtab) {
262 rtab->rate = *r;
263 rtab->refcnt = 1;
1e90474c 264 memcpy(rtab->data, nla_data(tab), 1024);
1da177e4
LT
265 rtab->next = qdisc_rtab_list;
266 qdisc_rtab_list = rtab;
267 }
268 return rtab;
269}
62e3ba1b 270EXPORT_SYMBOL(qdisc_get_rtab);
1da177e4
LT
271
272void qdisc_put_rtab(struct qdisc_rate_table *tab)
273{
274 struct qdisc_rate_table *rtab, **rtabp;
275
276 if (!tab || --tab->refcnt)
277 return;
278
279 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
280 if (rtab == tab) {
281 *rtabp = rtab->next;
282 kfree(rtab);
283 return;
284 }
285 }
286}
62e3ba1b 287EXPORT_SYMBOL(qdisc_put_rtab);
1da177e4 288
4179477f
PM
289static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
290{
291 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
292 timer);
293
294 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
11274e5a 295 smp_wmb();
37437bb2 296 __netif_schedule(wd->qdisc);
1936502d 297
4179477f
PM
298 return HRTIMER_NORESTART;
299}
300
301void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
302{
303 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
304 wd->timer.function = qdisc_watchdog;
305 wd->qdisc = qdisc;
306}
307EXPORT_SYMBOL(qdisc_watchdog_init);
308
309void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
310{
311 ktime_t time;
312
313 wd->qdisc->flags |= TCQ_F_THROTTLED;
314 time = ktime_set(0, 0);
315 time = ktime_add_ns(time, PSCHED_US2NS(expires));
316 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
317}
318EXPORT_SYMBOL(qdisc_watchdog_schedule);
319
320void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
321{
322 hrtimer_cancel(&wd->timer);
323 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
324}
325EXPORT_SYMBOL(qdisc_watchdog_cancel);
1da177e4 326
6fe1c7a5
PM
327struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
328{
329 unsigned int size = n * sizeof(struct hlist_head), i;
330 struct hlist_head *h;
331
332 if (size <= PAGE_SIZE)
333 h = kmalloc(size, GFP_KERNEL);
334 else
335 h = (struct hlist_head *)
336 __get_free_pages(GFP_KERNEL, get_order(size));
337
338 if (h != NULL) {
339 for (i = 0; i < n; i++)
340 INIT_HLIST_HEAD(&h[i]);
341 }
342 return h;
343}
344
345static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
346{
347 unsigned int size = n * sizeof(struct hlist_head);
348
349 if (size <= PAGE_SIZE)
350 kfree(h);
351 else
352 free_pages((unsigned long)h, get_order(size));
353}
354
355void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
356{
357 struct Qdisc_class_common *cl;
358 struct hlist_node *n, *next;
359 struct hlist_head *nhash, *ohash;
360 unsigned int nsize, nmask, osize;
361 unsigned int i, h;
362
363 /* Rehash when load factor exceeds 0.75 */
364 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
365 return;
366 nsize = clhash->hashsize * 2;
367 nmask = nsize - 1;
368 nhash = qdisc_class_hash_alloc(nsize);
369 if (nhash == NULL)
370 return;
371
372 ohash = clhash->hash;
373 osize = clhash->hashsize;
374
375 sch_tree_lock(sch);
376 for (i = 0; i < osize; i++) {
377 hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
378 h = qdisc_class_hash(cl->classid, nmask);
379 hlist_add_head(&cl->hnode, &nhash[h]);
380 }
381 }
382 clhash->hash = nhash;
383 clhash->hashsize = nsize;
384 clhash->hashmask = nmask;
385 sch_tree_unlock(sch);
386
387 qdisc_class_hash_free(ohash, osize);
388}
389EXPORT_SYMBOL(qdisc_class_hash_grow);
390
391int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
392{
393 unsigned int size = 4;
394
395 clhash->hash = qdisc_class_hash_alloc(size);
396 if (clhash->hash == NULL)
397 return -ENOMEM;
398 clhash->hashsize = size;
399 clhash->hashmask = size - 1;
400 clhash->hashelems = 0;
401 return 0;
402}
403EXPORT_SYMBOL(qdisc_class_hash_init);
404
405void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
406{
407 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
408}
409EXPORT_SYMBOL(qdisc_class_hash_destroy);
410
411void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
412 struct Qdisc_class_common *cl)
413{
414 unsigned int h;
415
416 INIT_HLIST_NODE(&cl->hnode);
417 h = qdisc_class_hash(cl->classid, clhash->hashmask);
418 hlist_add_head(&cl->hnode, &clhash->hash[h]);
419 clhash->hashelems++;
420}
421EXPORT_SYMBOL(qdisc_class_hash_insert);
422
423void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
424 struct Qdisc_class_common *cl)
425{
426 hlist_del(&cl->hnode);
427 clhash->hashelems--;
428}
429EXPORT_SYMBOL(qdisc_class_hash_remove);
430
1da177e4
LT
431/* Allocate an unique handle from space managed by kernel */
432
433static u32 qdisc_alloc_handle(struct net_device *dev)
434{
435 int i = 0x10000;
436 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
437
438 do {
439 autohandle += TC_H_MAKE(0x10000U, 0);
440 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
441 autohandle = TC_H_MAKE(0x80000000U, 0);
442 } while (qdisc_lookup(dev, autohandle) && --i > 0);
443
444 return i>0 ? autohandle : 0;
445}
446
99194cff 447/* Attach toplevel qdisc to device queue. */
1da177e4 448
99194cff
DM
449static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
450 struct Qdisc *qdisc)
1da177e4 451{
53049978 452 spinlock_t *root_lock;
1da177e4 453 struct Qdisc *oqdisc;
53049978 454 int ingress;
1da177e4 455
53049978
DM
456 ingress = 0;
457 if (qdisc && qdisc->flags&TCQ_F_INGRESS)
458 ingress = 1;
459
460 if (ingress) {
816f3258 461 oqdisc = dev_queue->qdisc;
53049978 462 } else {
53049978
DM
463 oqdisc = dev_queue->qdisc_sleeping;
464 }
465
466 root_lock = qdisc_root_lock(oqdisc);
467 spin_lock_bh(root_lock);
468
469 if (ingress) {
1da177e4
LT
470 /* Prune old scheduler */
471 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
472 /* delete */
473 qdisc_reset(oqdisc);
816f3258 474 dev_queue->qdisc = NULL;
1da177e4 475 } else { /* new */
816f3258 476 dev_queue->qdisc = qdisc;
1da177e4
LT
477 }
478
479 } else {
1da177e4
LT
480 /* Prune old scheduler */
481 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
482 qdisc_reset(oqdisc);
483
484 /* ... and graft new one */
485 if (qdisc == NULL)
486 qdisc = &noop_qdisc;
b0e1e646
DM
487 dev_queue->qdisc_sleeping = qdisc;
488 dev_queue->qdisc = &noop_qdisc;
1da177e4
LT
489 }
490
53049978 491 spin_unlock_bh(root_lock);
1da177e4 492
1da177e4
LT
493 return oqdisc;
494}
495
43effa1e
PM
496void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
497{
20fea08b 498 const struct Qdisc_class_ops *cops;
43effa1e
PM
499 unsigned long cl;
500 u32 parentid;
501
502 if (n == 0)
503 return;
504 while ((parentid = sch->parent)) {
066a3b5b
JP
505 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
506 return;
507
5ce2d488 508 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
ffc8fefa
PM
509 if (sch == NULL) {
510 WARN_ON(parentid != TC_H_ROOT);
511 return;
512 }
43effa1e
PM
513 cops = sch->ops->cl_ops;
514 if (cops->qlen_notify) {
515 cl = cops->get(sch, parentid);
516 cops->qlen_notify(sch, cl);
517 cops->put(sch, cl);
518 }
519 sch->q.qlen -= n;
520 }
521}
522EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
1da177e4 523
99194cff
DM
524static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
525 struct Qdisc *old, struct Qdisc *new)
526{
527 if (new || old)
528 qdisc_notify(skb, n, clid, old, new);
1da177e4 529
99194cff
DM
530 if (old) {
531 spin_lock_bh(&old->q.lock);
532 qdisc_destroy(old);
533 spin_unlock_bh(&old->q.lock);
534 }
535}
536
537/* Graft qdisc "new" to class "classid" of qdisc "parent" or
538 * to device "dev".
539 *
540 * When appropriate send a netlink notification using 'skb'
541 * and "n".
542 *
543 * On success, destroy old qdisc.
1da177e4
LT
544 */
545
546static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
99194cff
DM
547 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
548 struct Qdisc *new, struct Qdisc *old)
1da177e4 549{
99194cff 550 struct Qdisc *q = old;
1da177e4 551 int err = 0;
1da177e4 552
10297b99 553 if (parent == NULL) {
99194cff
DM
554 unsigned int i, num_q, ingress;
555
556 ingress = 0;
557 num_q = dev->num_tx_queues;
558 if (q && q->flags & TCQ_F_INGRESS) {
559 num_q = 1;
560 ingress = 1;
561 }
562
563 if (dev->flags & IFF_UP)
564 dev_deactivate(dev);
565
566 for (i = 0; i < num_q; i++) {
567 struct netdev_queue *dev_queue = &dev->rx_queue;
568
569 if (!ingress)
570 dev_queue = netdev_get_tx_queue(dev, i);
571
572 if (ingress) {
573 old = dev_graft_qdisc(dev_queue, q);
574 } else {
575 old = dev_graft_qdisc(dev_queue, new);
576 if (new && i > 0)
577 atomic_inc(&new->refcnt);
578 }
579 notify_and_destroy(skb, n, classid, old, new);
1da177e4 580 }
99194cff
DM
581
582 if (dev->flags & IFF_UP)
583 dev_activate(dev);
1da177e4 584 } else {
20fea08b 585 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1da177e4
LT
586
587 err = -EINVAL;
588
589 if (cops) {
590 unsigned long cl = cops->get(parent, classid);
591 if (cl) {
99194cff 592 err = cops->graft(parent, cl, new, &old);
1da177e4
LT
593 cops->put(parent, cl);
594 }
595 }
99194cff
DM
596 if (!err)
597 notify_and_destroy(skb, n, classid, old, new);
1da177e4
LT
598 }
599 return err;
600}
601
602/*
603 Allocate and initialize new qdisc.
604
605 Parameters are passed via opt.
606 */
607
608static struct Qdisc *
bb949fbd
DM
609qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
610 u32 parent, u32 handle, struct nlattr **tca, int *errp)
1da177e4
LT
611{
612 int err;
1e90474c 613 struct nlattr *kind = tca[TCA_KIND];
1da177e4
LT
614 struct Qdisc *sch;
615 struct Qdisc_ops *ops;
1da177e4
LT
616
617 ops = qdisc_lookup_ops(kind);
618#ifdef CONFIG_KMOD
619 if (ops == NULL && kind != NULL) {
620 char name[IFNAMSIZ];
1e90474c 621 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
1da177e4
LT
622 /* We dropped the RTNL semaphore in order to
623 * perform the module load. So, even if we
624 * succeeded in loading the module we have to
625 * tell the caller to replay the request. We
626 * indicate this using -EAGAIN.
627 * We replay the request because the device may
628 * go away in the mean time.
629 */
630 rtnl_unlock();
631 request_module("sch_%s", name);
632 rtnl_lock();
633 ops = qdisc_lookup_ops(kind);
634 if (ops != NULL) {
635 /* We will try again qdisc_lookup_ops,
636 * so don't keep a reference.
637 */
638 module_put(ops->owner);
639 err = -EAGAIN;
640 goto err_out;
641 }
642 }
643 }
644#endif
645
b9e2cc0f 646 err = -ENOENT;
1da177e4
LT
647 if (ops == NULL)
648 goto err_out;
649
5ce2d488 650 sch = qdisc_alloc(dev_queue, ops);
3d54b82f
TG
651 if (IS_ERR(sch)) {
652 err = PTR_ERR(sch);
1da177e4 653 goto err_out2;
3d54b82f 654 }
1da177e4 655
ffc8fefa
PM
656 sch->parent = parent;
657
3d54b82f 658 if (handle == TC_H_INGRESS) {
1da177e4 659 sch->flags |= TCQ_F_INGRESS;
3d54b82f 660 handle = TC_H_MAKE(TC_H_INGRESS, 0);
fd44de7c 661 } else {
fd44de7c
PM
662 if (handle == 0) {
663 handle = qdisc_alloc_handle(dev);
664 err = -ENOMEM;
665 if (handle == 0)
666 goto err_out3;
667 }
1da177e4
LT
668 }
669
3d54b82f 670 sch->handle = handle;
1da177e4 671
1e90474c
PM
672 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
673 if (tca[TCA_RATE]) {
023e09a7 674 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
7698b4fc 675 qdisc_root_lock(sch),
1e90474c 676 tca[TCA_RATE]);
023e09a7
TG
677 if (err) {
678 /*
679 * Any broken qdiscs that would require
680 * a ops->reset() here? The qdisc was never
681 * in action so it shouldn't be necessary.
682 */
683 if (ops->destroy)
684 ops->destroy(sch);
685 goto err_out3;
686 }
687 }
30723673
DM
688 if (parent)
689 list_add_tail(&sch->list, &dev_queue->qdisc->list);
1da177e4 690
1da177e4
LT
691 return sch;
692 }
693err_out3:
694 dev_put(dev);
3d54b82f 695 kfree((char *) sch - sch->padded);
1da177e4
LT
696err_out2:
697 module_put(ops->owner);
698err_out:
699 *errp = err;
1da177e4
LT
700 return NULL;
701}
702
1e90474c 703static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
1da177e4 704{
1e90474c 705 if (tca[TCA_OPTIONS]) {
1da177e4
LT
706 int err;
707
708 if (sch->ops->change == NULL)
709 return -EINVAL;
1e90474c 710 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
1da177e4
LT
711 if (err)
712 return err;
713 }
1e90474c 714 if (tca[TCA_RATE])
1da177e4 715 gen_replace_estimator(&sch->bstats, &sch->rate_est,
7698b4fc 716 qdisc_root_lock(sch), tca[TCA_RATE]);
1da177e4
LT
717 return 0;
718}
719
720struct check_loop_arg
721{
722 struct qdisc_walker w;
723 struct Qdisc *p;
724 int depth;
725};
726
727static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
728
729static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
730{
731 struct check_loop_arg arg;
732
733 if (q->ops->cl_ops == NULL)
734 return 0;
735
736 arg.w.stop = arg.w.skip = arg.w.count = 0;
737 arg.w.fn = check_loop_fn;
738 arg.depth = depth;
739 arg.p = p;
740 q->ops->cl_ops->walk(q, &arg.w);
741 return arg.w.stop ? -ELOOP : 0;
742}
743
744static int
745check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
746{
747 struct Qdisc *leaf;
20fea08b 748 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1da177e4
LT
749 struct check_loop_arg *arg = (struct check_loop_arg *)w;
750
751 leaf = cops->leaf(q, cl);
752 if (leaf) {
753 if (leaf == arg->p || arg->depth > 7)
754 return -ELOOP;
755 return check_loop(leaf, arg->p, arg->depth + 1);
756 }
757 return 0;
758}
759
760/*
761 * Delete/get qdisc.
762 */
763
764static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
765{
3b1e0a65 766 struct net *net = sock_net(skb->sk);
1da177e4 767 struct tcmsg *tcm = NLMSG_DATA(n);
1e90474c 768 struct nlattr *tca[TCA_MAX + 1];
1da177e4
LT
769 struct net_device *dev;
770 u32 clid = tcm->tcm_parent;
771 struct Qdisc *q = NULL;
772 struct Qdisc *p = NULL;
773 int err;
774
b854272b
DL
775 if (net != &init_net)
776 return -EINVAL;
777
881d966b 778 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1da177e4
LT
779 return -ENODEV;
780
1e90474c
PM
781 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
782 if (err < 0)
783 return err;
784
1da177e4
LT
785 if (clid) {
786 if (clid != TC_H_ROOT) {
787 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
788 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
789 return -ENOENT;
790 q = qdisc_leaf(p, clid);
791 } else { /* ingress */
816f3258 792 q = dev->rx_queue.qdisc;
10297b99 793 }
1da177e4 794 } else {
e8a0464c
DM
795 struct netdev_queue *dev_queue;
796 dev_queue = netdev_get_tx_queue(dev, 0);
b0e1e646 797 q = dev_queue->qdisc_sleeping;
1da177e4
LT
798 }
799 if (!q)
800 return -ENOENT;
801
802 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
803 return -EINVAL;
804 } else {
805 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
806 return -ENOENT;
807 }
808
1e90474c 809 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1da177e4
LT
810 return -EINVAL;
811
812 if (n->nlmsg_type == RTM_DELQDISC) {
813 if (!clid)
814 return -EINVAL;
815 if (q->handle == 0)
816 return -ENOENT;
99194cff 817 if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
1da177e4 818 return err;
1da177e4
LT
819 } else {
820 qdisc_notify(skb, n, clid, NULL, q);
821 }
822 return 0;
823}
824
825/*
826 Create/change qdisc.
827 */
828
829static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
830{
3b1e0a65 831 struct net *net = sock_net(skb->sk);
1da177e4 832 struct tcmsg *tcm;
1e90474c 833 struct nlattr *tca[TCA_MAX + 1];
1da177e4
LT
834 struct net_device *dev;
835 u32 clid;
836 struct Qdisc *q, *p;
837 int err;
838
b854272b
DL
839 if (net != &init_net)
840 return -EINVAL;
841
1da177e4
LT
842replay:
843 /* Reinit, just in case something touches this. */
844 tcm = NLMSG_DATA(n);
1da177e4
LT
845 clid = tcm->tcm_parent;
846 q = p = NULL;
847
881d966b 848 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1da177e4
LT
849 return -ENODEV;
850
1e90474c
PM
851 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
852 if (err < 0)
853 return err;
854
1da177e4
LT
855 if (clid) {
856 if (clid != TC_H_ROOT) {
857 if (clid != TC_H_INGRESS) {
858 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
859 return -ENOENT;
860 q = qdisc_leaf(p, clid);
861 } else { /*ingress */
816f3258 862 q = dev->rx_queue.qdisc;
1da177e4
LT
863 }
864 } else {
e8a0464c
DM
865 struct netdev_queue *dev_queue;
866 dev_queue = netdev_get_tx_queue(dev, 0);
b0e1e646 867 q = dev_queue->qdisc_sleeping;
1da177e4
LT
868 }
869
870 /* It may be default qdisc, ignore it */
871 if (q && q->handle == 0)
872 q = NULL;
873
874 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
875 if (tcm->tcm_handle) {
876 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
877 return -EEXIST;
878 if (TC_H_MIN(tcm->tcm_handle))
879 return -EINVAL;
880 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
881 goto create_n_graft;
882 if (n->nlmsg_flags&NLM_F_EXCL)
883 return -EEXIST;
1e90474c 884 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1da177e4
LT
885 return -EINVAL;
886 if (q == p ||
887 (p && check_loop(q, p, 0)))
888 return -ELOOP;
889 atomic_inc(&q->refcnt);
890 goto graft;
891 } else {
892 if (q == NULL)
893 goto create_n_graft;
894
895 /* This magic test requires explanation.
896 *
897 * We know, that some child q is already
898 * attached to this parent and have choice:
899 * either to change it or to create/graft new one.
900 *
901 * 1. We are allowed to create/graft only
902 * if CREATE and REPLACE flags are set.
903 *
904 * 2. If EXCL is set, requestor wanted to say,
905 * that qdisc tcm_handle is not expected
906 * to exist, so that we choose create/graft too.
907 *
908 * 3. The last case is when no flags are set.
909 * Alas, it is sort of hole in API, we
910 * cannot decide what to do unambiguously.
911 * For now we select create/graft, if
912 * user gave KIND, which does not match existing.
913 */
914 if ((n->nlmsg_flags&NLM_F_CREATE) &&
915 (n->nlmsg_flags&NLM_F_REPLACE) &&
916 ((n->nlmsg_flags&NLM_F_EXCL) ||
1e90474c
PM
917 (tca[TCA_KIND] &&
918 nla_strcmp(tca[TCA_KIND], q->ops->id))))
1da177e4
LT
919 goto create_n_graft;
920 }
921 }
922 } else {
923 if (!tcm->tcm_handle)
924 return -EINVAL;
925 q = qdisc_lookup(dev, tcm->tcm_handle);
926 }
927
928 /* Change qdisc parameters */
929 if (q == NULL)
930 return -ENOENT;
931 if (n->nlmsg_flags&NLM_F_EXCL)
932 return -EEXIST;
1e90474c 933 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1da177e4
LT
934 return -EINVAL;
935 err = qdisc_change(q, tca);
936 if (err == 0)
937 qdisc_notify(skb, n, clid, NULL, q);
938 return err;
939
940create_n_graft:
941 if (!(n->nlmsg_flags&NLM_F_CREATE))
942 return -ENOENT;
943 if (clid == TC_H_INGRESS)
bb949fbd
DM
944 q = qdisc_create(dev, &dev->rx_queue,
945 tcm->tcm_parent, tcm->tcm_parent,
ffc8fefa 946 tca, &err);
10297b99 947 else
e8a0464c 948 q = qdisc_create(dev, netdev_get_tx_queue(dev, 0),
bb949fbd 949 tcm->tcm_parent, tcm->tcm_handle,
ffc8fefa 950 tca, &err);
1da177e4
LT
951 if (q == NULL) {
952 if (err == -EAGAIN)
953 goto replay;
954 return err;
955 }
956
957graft:
958 if (1) {
53049978
DM
959 spinlock_t *root_lock;
960
99194cff 961 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1da177e4
LT
962 if (err) {
963 if (q) {
53049978
DM
964 root_lock = qdisc_root_lock(q);
965 spin_lock_bh(root_lock);
1da177e4 966 qdisc_destroy(q);
53049978 967 spin_unlock_bh(root_lock);
1da177e4
LT
968 }
969 return err;
970 }
1da177e4
LT
971 }
972 return 0;
973}
974
975static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
e431b8c0 976 u32 pid, u32 seq, u16 flags, int event)
1da177e4
LT
977{
978 struct tcmsg *tcm;
979 struct nlmsghdr *nlh;
27a884dc 980 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
981 struct gnet_dump d;
982
e431b8c0 983 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1da177e4
LT
984 tcm = NLMSG_DATA(nlh);
985 tcm->tcm_family = AF_UNSPEC;
9ef1d4c7
PM
986 tcm->tcm__pad1 = 0;
987 tcm->tcm__pad2 = 0;
5ce2d488 988 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1da177e4
LT
989 tcm->tcm_parent = clid;
990 tcm->tcm_handle = q->handle;
991 tcm->tcm_info = atomic_read(&q->refcnt);
57e1c487 992 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
1da177e4 993 if (q->ops->dump && q->ops->dump(q, skb) < 0)
1e90474c 994 goto nla_put_failure;
1da177e4
LT
995 q->qstats.qlen = q->q.qlen;
996
997 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
7698b4fc 998 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
1e90474c 999 goto nla_put_failure;
1da177e4
LT
1000
1001 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
1e90474c 1002 goto nla_put_failure;
1da177e4
LT
1003
1004 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
1da177e4 1005 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
1da177e4 1006 gnet_stats_copy_queue(&d, &q->qstats) < 0)
1e90474c 1007 goto nla_put_failure;
10297b99 1008
1da177e4 1009 if (gnet_stats_finish_copy(&d) < 0)
1e90474c 1010 goto nla_put_failure;
10297b99 1011
27a884dc 1012 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1da177e4
LT
1013 return skb->len;
1014
1015nlmsg_failure:
1e90474c 1016nla_put_failure:
dc5fc579 1017 nlmsg_trim(skb, b);
1da177e4
LT
1018 return -1;
1019}
1020
1021static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1022 u32 clid, struct Qdisc *old, struct Qdisc *new)
1023{
1024 struct sk_buff *skb;
1025 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1026
1027 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1028 if (!skb)
1029 return -ENOBUFS;
1030
1031 if (old && old->handle) {
1032 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
1033 goto err_out;
1034 }
1035 if (new) {
1036 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1037 goto err_out;
1038 }
1039
1040 if (skb->len)
97c53cac 1041 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1da177e4
LT
1042
1043err_out:
1044 kfree_skb(skb);
1045 return -EINVAL;
1046}
1047
30723673
DM
1048static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1049{
1050 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1051}
1052
1053static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1054 struct netlink_callback *cb,
1055 int *q_idx_p, int s_q_idx)
1056{
1057 int ret = 0, q_idx = *q_idx_p;
1058 struct Qdisc *q;
1059
1060 if (!root)
1061 return 0;
1062
1063 q = root;
1064 if (q_idx < s_q_idx) {
1065 q_idx++;
1066 } else {
1067 if (!tc_qdisc_dump_ignore(q) &&
1068 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1069 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1070 goto done;
1071 q_idx++;
1072 }
1073 list_for_each_entry(q, &root->list, list) {
1074 if (q_idx < s_q_idx) {
1075 q_idx++;
1076 continue;
1077 }
1078 if (!tc_qdisc_dump_ignore(q) &&
1079 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1080 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1081 goto done;
1082 q_idx++;
1083 }
1084
1085out:
1086 *q_idx_p = q_idx;
1087 return ret;
1088done:
1089 ret = -1;
1090 goto out;
1091}
1092
1da177e4
LT
1093static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1094{
3b1e0a65 1095 struct net *net = sock_net(skb->sk);
1da177e4
LT
1096 int idx, q_idx;
1097 int s_idx, s_q_idx;
1098 struct net_device *dev;
1da177e4 1099
b854272b
DL
1100 if (net != &init_net)
1101 return 0;
1102
1da177e4
LT
1103 s_idx = cb->args[0];
1104 s_q_idx = q_idx = cb->args[1];
1105 read_lock(&dev_base_lock);
7562f876 1106 idx = 0;
881d966b 1107 for_each_netdev(&init_net, dev) {
30723673
DM
1108 struct netdev_queue *dev_queue;
1109
1da177e4 1110 if (idx < s_idx)
7562f876 1111 goto cont;
1da177e4
LT
1112 if (idx > s_idx)
1113 s_q_idx = 0;
1da177e4 1114 q_idx = 0;
30723673
DM
1115
1116 dev_queue = netdev_get_tx_queue(dev, 0);
1117 if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
1118 goto done;
1119
1120 dev_queue = &dev->rx_queue;
1121 if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
1122 goto done;
1123
7562f876
PE
1124cont:
1125 idx++;
1da177e4
LT
1126 }
1127
1128done:
1129 read_unlock(&dev_base_lock);
1130
1131 cb->args[0] = idx;
1132 cb->args[1] = q_idx;
1133
1134 return skb->len;
1135}
1136
1137
1138
1139/************************************************
1140 * Traffic classes manipulation. *
1141 ************************************************/
1142
1143
1144
1145static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1146{
3b1e0a65 1147 struct net *net = sock_net(skb->sk);
b0e1e646 1148 struct netdev_queue *dev_queue;
1da177e4 1149 struct tcmsg *tcm = NLMSG_DATA(n);
1e90474c 1150 struct nlattr *tca[TCA_MAX + 1];
1da177e4
LT
1151 struct net_device *dev;
1152 struct Qdisc *q = NULL;
20fea08b 1153 const struct Qdisc_class_ops *cops;
1da177e4
LT
1154 unsigned long cl = 0;
1155 unsigned long new_cl;
1156 u32 pid = tcm->tcm_parent;
1157 u32 clid = tcm->tcm_handle;
1158 u32 qid = TC_H_MAJ(clid);
1159 int err;
1160
b854272b
DL
1161 if (net != &init_net)
1162 return -EINVAL;
1163
881d966b 1164 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1da177e4
LT
1165 return -ENODEV;
1166
1e90474c
PM
1167 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1168 if (err < 0)
1169 return err;
1170
1da177e4
LT
1171 /*
1172 parent == TC_H_UNSPEC - unspecified parent.
1173 parent == TC_H_ROOT - class is root, which has no parent.
1174 parent == X:0 - parent is root class.
1175 parent == X:Y - parent is a node in hierarchy.
1176 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1177
1178 handle == 0:0 - generate handle from kernel pool.
1179 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1180 handle == X:Y - clear.
1181 handle == X:0 - root class.
1182 */
1183
1184 /* Step 1. Determine qdisc handle X:0 */
1185
e8a0464c 1186 dev_queue = netdev_get_tx_queue(dev, 0);
1da177e4
LT
1187 if (pid != TC_H_ROOT) {
1188 u32 qid1 = TC_H_MAJ(pid);
1189
1190 if (qid && qid1) {
1191 /* If both majors are known, they must be identical. */
1192 if (qid != qid1)
1193 return -EINVAL;
1194 } else if (qid1) {
1195 qid = qid1;
1196 } else if (qid == 0)
b0e1e646 1197 qid = dev_queue->qdisc_sleeping->handle;
1da177e4
LT
1198
1199 /* Now qid is genuine qdisc handle consistent
1200 both with parent and child.
1201
1202 TC_H_MAJ(pid) still may be unspecified, complete it now.
1203 */
1204 if (pid)
1205 pid = TC_H_MAKE(qid, pid);
1206 } else {
1207 if (qid == 0)
b0e1e646 1208 qid = dev_queue->qdisc_sleeping->handle;
1da177e4
LT
1209 }
1210
1211 /* OK. Locate qdisc */
10297b99 1212 if ((q = qdisc_lookup(dev, qid)) == NULL)
1da177e4
LT
1213 return -ENOENT;
1214
1215 /* An check that it supports classes */
1216 cops = q->ops->cl_ops;
1217 if (cops == NULL)
1218 return -EINVAL;
1219
1220 /* Now try to get class */
1221 if (clid == 0) {
1222 if (pid == TC_H_ROOT)
1223 clid = qid;
1224 } else
1225 clid = TC_H_MAKE(qid, clid);
1226
1227 if (clid)
1228 cl = cops->get(q, clid);
1229
1230 if (cl == 0) {
1231 err = -ENOENT;
1232 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
1233 goto out;
1234 } else {
1235 switch (n->nlmsg_type) {
10297b99 1236 case RTM_NEWTCLASS:
1da177e4
LT
1237 err = -EEXIST;
1238 if (n->nlmsg_flags&NLM_F_EXCL)
1239 goto out;
1240 break;
1241 case RTM_DELTCLASS:
1242 err = cops->delete(q, cl);
1243 if (err == 0)
1244 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
1245 goto out;
1246 case RTM_GETTCLASS:
1247 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
1248 goto out;
1249 default:
1250 err = -EINVAL;
1251 goto out;
1252 }
1253 }
1254
1255 new_cl = cl;
1256 err = cops->change(q, clid, pid, tca, &new_cl);
1257 if (err == 0)
1258 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
1259
1260out:
1261 if (cl)
1262 cops->put(q, cl);
1263
1264 return err;
1265}
1266
1267
1268static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1269 unsigned long cl,
e431b8c0 1270 u32 pid, u32 seq, u16 flags, int event)
1da177e4
LT
1271{
1272 struct tcmsg *tcm;
1273 struct nlmsghdr *nlh;
27a884dc 1274 unsigned char *b = skb_tail_pointer(skb);
1da177e4 1275 struct gnet_dump d;
20fea08b 1276 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1da177e4 1277
e431b8c0 1278 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1da177e4
LT
1279 tcm = NLMSG_DATA(nlh);
1280 tcm->tcm_family = AF_UNSPEC;
5ce2d488 1281 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1da177e4
LT
1282 tcm->tcm_parent = q->handle;
1283 tcm->tcm_handle = q->handle;
1284 tcm->tcm_info = 0;
57e1c487 1285 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
1da177e4 1286 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1e90474c 1287 goto nla_put_failure;
1da177e4
LT
1288
1289 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
7698b4fc 1290 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
1e90474c 1291 goto nla_put_failure;
1da177e4
LT
1292
1293 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1e90474c 1294 goto nla_put_failure;
1da177e4
LT
1295
1296 if (gnet_stats_finish_copy(&d) < 0)
1e90474c 1297 goto nla_put_failure;
1da177e4 1298
27a884dc 1299 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1da177e4
LT
1300 return skb->len;
1301
1302nlmsg_failure:
1e90474c 1303nla_put_failure:
dc5fc579 1304 nlmsg_trim(skb, b);
1da177e4
LT
1305 return -1;
1306}
1307
1308static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1309 struct Qdisc *q, unsigned long cl, int event)
1310{
1311 struct sk_buff *skb;
1312 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1313
1314 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1315 if (!skb)
1316 return -ENOBUFS;
1317
1318 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1319 kfree_skb(skb);
1320 return -EINVAL;
1321 }
1322
97c53cac 1323 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1da177e4
LT
1324}
1325
1326struct qdisc_dump_args
1327{
1328 struct qdisc_walker w;
1329 struct sk_buff *skb;
1330 struct netlink_callback *cb;
1331};
1332
1333static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1334{
1335 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1336
1337 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1338 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1339}
1340
30723673
DM
1341static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1342 struct tcmsg *tcm, struct netlink_callback *cb,
1343 int *t_p, int s_t)
1344{
1345 struct qdisc_dump_args arg;
1346
1347 if (tc_qdisc_dump_ignore(q) ||
1348 *t_p < s_t || !q->ops->cl_ops ||
1349 (tcm->tcm_parent &&
1350 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1351 (*t_p)++;
1352 return 0;
1353 }
1354 if (*t_p > s_t)
1355 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1356 arg.w.fn = qdisc_class_dump;
1357 arg.skb = skb;
1358 arg.cb = cb;
1359 arg.w.stop = 0;
1360 arg.w.skip = cb->args[1];
1361 arg.w.count = 0;
1362 q->ops->cl_ops->walk(q, &arg.w);
1363 cb->args[1] = arg.w.count;
1364 if (arg.w.stop)
1365 return -1;
1366 (*t_p)++;
1367 return 0;
1368}
1369
1370static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1371 struct tcmsg *tcm, struct netlink_callback *cb,
1372 int *t_p, int s_t)
1373{
1374 struct Qdisc *q;
1375
1376 if (!root)
1377 return 0;
1378
1379 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1380 return -1;
1381
1382 list_for_each_entry(q, &root->list, list) {
1383 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1384 return -1;
1385 }
1386
1387 return 0;
1388}
1389
1da177e4
LT
1390static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1391{
30723673 1392 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
3b1e0a65 1393 struct net *net = sock_net(skb->sk);
30723673 1394 struct netdev_queue *dev_queue;
1da177e4 1395 struct net_device *dev;
30723673 1396 int t, s_t;
1da177e4 1397
b854272b
DL
1398 if (net != &init_net)
1399 return 0;
1400
1da177e4
LT
1401 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1402 return 0;
881d966b 1403 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1da177e4
LT
1404 return 0;
1405
1406 s_t = cb->args[0];
1407 t = 0;
1408
30723673
DM
1409 dev_queue = netdev_get_tx_queue(dev, 0);
1410 if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0)
1411 goto done;
1412
1413 dev_queue = &dev->rx_queue;
1414 if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0)
1415 goto done;
1da177e4 1416
30723673 1417done:
1da177e4
LT
1418 cb->args[0] = t;
1419
1420 dev_put(dev);
1421 return skb->len;
1422}
1423
1424/* Main classifier routine: scans classifier chain attached
1425 to this qdisc, (optionally) tests for protocol and asks
1426 specific classifiers.
1427 */
73ca4918
PM
1428int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1429 struct tcf_result *res)
1430{
1431 __be16 protocol = skb->protocol;
1432 int err = 0;
1433
1434 for (; tp; tp = tp->next) {
1435 if ((tp->protocol == protocol ||
1436 tp->protocol == htons(ETH_P_ALL)) &&
1437 (err = tp->classify(skb, tp, res)) >= 0) {
1438#ifdef CONFIG_NET_CLS_ACT
1439 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1440 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1441#endif
1442 return err;
1443 }
1444 }
1445 return -1;
1446}
1447EXPORT_SYMBOL(tc_classify_compat);
1448
1da177e4 1449int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
73ca4918 1450 struct tcf_result *res)
1da177e4
LT
1451{
1452 int err = 0;
73ca4918 1453 __be16 protocol;
1da177e4
LT
1454#ifdef CONFIG_NET_CLS_ACT
1455 struct tcf_proto *otp = tp;
1456reclassify:
1457#endif
1458 protocol = skb->protocol;
1459
73ca4918 1460 err = tc_classify_compat(skb, tp, res);
1da177e4 1461#ifdef CONFIG_NET_CLS_ACT
73ca4918
PM
1462 if (err == TC_ACT_RECLASSIFY) {
1463 u32 verd = G_TC_VERD(skb->tc_verd);
1464 tp = otp;
1465
1466 if (verd++ >= MAX_REC_LOOP) {
1467 printk("rule prio %u protocol %02x reclassify loop, "
1468 "packet dropped\n",
1469 tp->prio&0xffff, ntohs(tp->protocol));
1470 return TC_ACT_SHOT;
1da177e4 1471 }
73ca4918
PM
1472 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1473 goto reclassify;
1da177e4 1474 }
73ca4918
PM
1475#endif
1476 return err;
1da177e4 1477}
73ca4918 1478EXPORT_SYMBOL(tc_classify);
1da177e4 1479
a48b5a61
PM
1480void tcf_destroy(struct tcf_proto *tp)
1481{
1482 tp->ops->destroy(tp);
1483 module_put(tp->ops->owner);
1484 kfree(tp);
1485}
1486
ff31ab56 1487void tcf_destroy_chain(struct tcf_proto **fl)
a48b5a61
PM
1488{
1489 struct tcf_proto *tp;
1490
ff31ab56
PM
1491 while ((tp = *fl) != NULL) {
1492 *fl = tp->next;
a48b5a61
PM
1493 tcf_destroy(tp);
1494 }
1495}
1496EXPORT_SYMBOL(tcf_destroy_chain);
1497
1da177e4
LT
1498#ifdef CONFIG_PROC_FS
1499static int psched_show(struct seq_file *seq, void *v)
1500{
3c0cfc13
PM
1501 struct timespec ts;
1502
1503 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
1da177e4 1504 seq_printf(seq, "%08x %08x %08x %08x\n",
641b9e0e 1505 (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
514bca32 1506 1000000,
3c0cfc13 1507 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
1da177e4
LT
1508
1509 return 0;
1510}
1511
1512static int psched_open(struct inode *inode, struct file *file)
1513{
1514 return single_open(file, psched_show, PDE(inode)->data);
1515}
1516
da7071d7 1517static const struct file_operations psched_fops = {
1da177e4
LT
1518 .owner = THIS_MODULE,
1519 .open = psched_open,
1520 .read = seq_read,
1521 .llseek = seq_lseek,
1522 .release = single_release,
10297b99 1523};
1da177e4
LT
1524#endif
1525
1da177e4
LT
1526static int __init pktsched_init(void)
1527{
1da177e4
LT
1528 register_qdisc(&pfifo_qdisc_ops);
1529 register_qdisc(&bfifo_qdisc_ops);
457c4cbc 1530 proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
1da177e4 1531
be577ddc
TG
1532 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1533 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1534 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1535 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1536 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1537 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1538
1da177e4
LT
1539 return 0;
1540}
1541
1542subsys_initcall(pktsched_init);