]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/sched/sch_api.c
pkt_sched: Fix return value corruption in HTB and TBF.
[mirror_ubuntu-artful-kernel.git] / net / sched / sch_api.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
1da177e4
LT
18#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
1da177e4 21#include <linux/string.h>
1da177e4 22#include <linux/errno.h>
1da177e4 23#include <linux/skbuff.h>
1da177e4
LT
24#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
4179477f 29#include <linux/hrtimer.h>
1da177e4 30
457c4cbc 31#include <net/net_namespace.h>
b854272b 32#include <net/sock.h>
dc5fc579 33#include <net/netlink.h>
1da177e4
LT
34#include <net/pkt_sched.h>
35
1da177e4
LT
36static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
37 struct Qdisc *old, struct Qdisc *new);
38static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
39 struct Qdisc *q, unsigned long cl, int event);
40
41/*
42
43 Short review.
44 -------------
45
46 This file consists of two interrelated parts:
47
48 1. queueing disciplines manager frontend.
49 2. traffic classes manager frontend.
50
51 Generally, queueing discipline ("qdisc") is a black box,
52 which is able to enqueue packets and to dequeue them (when
53 device is ready to send something) in order and at times
54 determined by algorithm hidden in it.
55
56 qdisc's are divided to two categories:
57 - "queues", which have no internal structure visible from outside.
58 - "schedulers", which split all the packets to "traffic classes",
59 using "packet classifiers" (look at cls_api.c)
60
61 In turn, classes may have child qdiscs (as rule, queues)
62 attached to them etc. etc. etc.
63
64 The goal of the routines in this file is to translate
65 information supplied by user in the form of handles
66 to more intelligible for kernel form, to make some sanity
67 checks and part of work, which is common to all qdiscs
68 and to provide rtnetlink notifications.
69
70 All real intelligent work is done inside qdisc modules.
71
72
73
74 Every discipline has two major routines: enqueue and dequeue.
75
76 ---dequeue
77
78 dequeue usually returns a skb to send. It is allowed to return NULL,
79 but it does not mean that queue is empty, it just means that
80 discipline does not want to send anything this time.
81 Queue is really empty if q->q.qlen == 0.
82 For complicated disciplines with multiple queues q->q is not
83 real packet queue, but however q->q.qlen must be valid.
84
85 ---enqueue
86
87 enqueue returns 0, if packet was enqueued successfully.
88 If packet (this one or another one) was dropped, it returns
89 not zero error code.
90 NET_XMIT_DROP - this packet dropped
91 Expected action: do not backoff, but wait until queue will clear.
92 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
93 Expected action: backoff or ignore
94 NET_XMIT_POLICED - dropped by police.
95 Expected action: backoff or error to real-time apps.
96
97 Auxiliary routines:
98
99 ---requeue
100
101 requeues once dequeued packet. It is used for non-standard or
e65d22e1 102 just buggy devices, which can defer output even if netif_queue_stopped()=0.
1da177e4
LT
103
104 ---reset
105
106 returns qdisc to initial state: purge all buffers, clear all
107 timers, counters (except for statistics) etc.
108
109 ---init
110
111 initializes newly created qdisc.
112
113 ---destroy
114
115 destroys resources allocated by init and during lifetime of qdisc.
116
117 ---change
118
119 changes qdisc parameters.
120 */
121
122/* Protects list of registered TC modules. It is pure SMP lock. */
123static DEFINE_RWLOCK(qdisc_mod_lock);
124
125
126/************************************************
127 * Queueing disciplines manipulation. *
128 ************************************************/
129
130
131/* The list of all installed queueing disciplines. */
132
133static struct Qdisc_ops *qdisc_base;
134
135/* Register/uregister queueing discipline */
136
137int register_qdisc(struct Qdisc_ops *qops)
138{
139 struct Qdisc_ops *q, **qp;
140 int rc = -EEXIST;
141
142 write_lock(&qdisc_mod_lock);
143 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
144 if (!strcmp(qops->id, q->id))
145 goto out;
146
147 if (qops->enqueue == NULL)
148 qops->enqueue = noop_qdisc_ops.enqueue;
149 if (qops->requeue == NULL)
150 qops->requeue = noop_qdisc_ops.requeue;
151 if (qops->dequeue == NULL)
152 qops->dequeue = noop_qdisc_ops.dequeue;
153
154 qops->next = NULL;
155 *qp = qops;
156 rc = 0;
157out:
158 write_unlock(&qdisc_mod_lock);
159 return rc;
160}
62e3ba1b 161EXPORT_SYMBOL(register_qdisc);
1da177e4
LT
162
163int unregister_qdisc(struct Qdisc_ops *qops)
164{
165 struct Qdisc_ops *q, **qp;
166 int err = -ENOENT;
167
168 write_lock(&qdisc_mod_lock);
169 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
170 if (q == qops)
171 break;
172 if (q) {
173 *qp = q->next;
174 q->next = NULL;
175 err = 0;
176 }
177 write_unlock(&qdisc_mod_lock);
178 return err;
179}
62e3ba1b 180EXPORT_SYMBOL(unregister_qdisc);
1da177e4
LT
181
182/* We know handle. Find qdisc among all qdisc's attached to device
183 (root qdisc, all its children, children of children etc.)
184 */
185
8123b421
DM
186struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
187{
188 struct Qdisc *q;
189
190 if (!(root->flags & TCQ_F_BUILTIN) &&
191 root->handle == handle)
192 return root;
193
194 list_for_each_entry(q, &root->list, list) {
195 if (q->handle == handle)
196 return q;
197 }
198 return NULL;
199}
200
ead81cc5 201struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
1da177e4 202{
30723673
DM
203 unsigned int i;
204
205 for (i = 0; i < dev->num_tx_queues; i++) {
206 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
827ebd64 207 struct Qdisc *q, *txq_root = txq->qdisc_sleeping;
1da177e4 208
8123b421
DM
209 q = qdisc_match_from_root(txq_root, handle);
210 if (q)
211 return q;
1da177e4 212 }
8123b421 213 return qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
1da177e4
LT
214}
215
216static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
217{
218 unsigned long cl;
219 struct Qdisc *leaf;
20fea08b 220 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
1da177e4
LT
221
222 if (cops == NULL)
223 return NULL;
224 cl = cops->get(p, classid);
225
226 if (cl == 0)
227 return NULL;
228 leaf = cops->leaf(p, cl);
229 cops->put(p, cl);
230 return leaf;
231}
232
233/* Find queueing discipline by name */
234
1e90474c 235static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
1da177e4
LT
236{
237 struct Qdisc_ops *q = NULL;
238
239 if (kind) {
240 read_lock(&qdisc_mod_lock);
241 for (q = qdisc_base; q; q = q->next) {
1e90474c 242 if (nla_strcmp(kind, q->id) == 0) {
1da177e4
LT
243 if (!try_module_get(q->owner))
244 q = NULL;
245 break;
246 }
247 }
248 read_unlock(&qdisc_mod_lock);
249 }
250 return q;
251}
252
253static struct qdisc_rate_table *qdisc_rtab_list;
254
1e90474c 255struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
1da177e4
LT
256{
257 struct qdisc_rate_table *rtab;
258
259 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
260 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
261 rtab->refcnt++;
262 return rtab;
263 }
264 }
265
5feb5e1a
PM
266 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
267 nla_len(tab) != TC_RTAB_SIZE)
1da177e4
LT
268 return NULL;
269
270 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
271 if (rtab) {
272 rtab->rate = *r;
273 rtab->refcnt = 1;
1e90474c 274 memcpy(rtab->data, nla_data(tab), 1024);
1da177e4
LT
275 rtab->next = qdisc_rtab_list;
276 qdisc_rtab_list = rtab;
277 }
278 return rtab;
279}
62e3ba1b 280EXPORT_SYMBOL(qdisc_get_rtab);
1da177e4
LT
281
282void qdisc_put_rtab(struct qdisc_rate_table *tab)
283{
284 struct qdisc_rate_table *rtab, **rtabp;
285
286 if (!tab || --tab->refcnt)
287 return;
288
289 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
290 if (rtab == tab) {
291 *rtabp = rtab->next;
292 kfree(rtab);
293 return;
294 }
295 }
296}
62e3ba1b 297EXPORT_SYMBOL(qdisc_put_rtab);
1da177e4 298
175f9c1b
JK
299static LIST_HEAD(qdisc_stab_list);
300static DEFINE_SPINLOCK(qdisc_stab_lock);
301
302static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
303 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
304 [TCA_STAB_DATA] = { .type = NLA_BINARY },
305};
306
307static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
308{
309 struct nlattr *tb[TCA_STAB_MAX + 1];
310 struct qdisc_size_table *stab;
311 struct tc_sizespec *s;
312 unsigned int tsize = 0;
313 u16 *tab = NULL;
314 int err;
315
316 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
317 if (err < 0)
318 return ERR_PTR(err);
319 if (!tb[TCA_STAB_BASE])
320 return ERR_PTR(-EINVAL);
321
322 s = nla_data(tb[TCA_STAB_BASE]);
323
324 if (s->tsize > 0) {
325 if (!tb[TCA_STAB_DATA])
326 return ERR_PTR(-EINVAL);
327 tab = nla_data(tb[TCA_STAB_DATA]);
328 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
329 }
330
331 if (!s || tsize != s->tsize || (!tab && tsize > 0))
332 return ERR_PTR(-EINVAL);
333
1cfa2666 334 spin_lock_bh(&qdisc_stab_lock);
175f9c1b
JK
335
336 list_for_each_entry(stab, &qdisc_stab_list, list) {
337 if (memcmp(&stab->szopts, s, sizeof(*s)))
338 continue;
339 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
340 continue;
341 stab->refcnt++;
1cfa2666 342 spin_unlock_bh(&qdisc_stab_lock);
175f9c1b
JK
343 return stab;
344 }
345
1cfa2666 346 spin_unlock_bh(&qdisc_stab_lock);
175f9c1b
JK
347
348 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
349 if (!stab)
350 return ERR_PTR(-ENOMEM);
351
352 stab->refcnt = 1;
353 stab->szopts = *s;
354 if (tsize > 0)
355 memcpy(stab->data, tab, tsize * sizeof(u16));
356
1cfa2666 357 spin_lock_bh(&qdisc_stab_lock);
175f9c1b 358 list_add_tail(&stab->list, &qdisc_stab_list);
1cfa2666 359 spin_unlock_bh(&qdisc_stab_lock);
175f9c1b
JK
360
361 return stab;
362}
363
364void qdisc_put_stab(struct qdisc_size_table *tab)
365{
366 if (!tab)
367 return;
368
1cfa2666 369 spin_lock_bh(&qdisc_stab_lock);
175f9c1b
JK
370
371 if (--tab->refcnt == 0) {
372 list_del(&tab->list);
373 kfree(tab);
374 }
375
1cfa2666 376 spin_unlock_bh(&qdisc_stab_lock);
175f9c1b
JK
377}
378EXPORT_SYMBOL(qdisc_put_stab);
379
380static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
381{
382 struct nlattr *nest;
383
384 nest = nla_nest_start(skb, TCA_STAB);
385 NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts);
386 nla_nest_end(skb, nest);
387
388 return skb->len;
389
390nla_put_failure:
391 return -1;
392}
393
394void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
395{
396 int pkt_len, slot;
397
398 pkt_len = skb->len + stab->szopts.overhead;
399 if (unlikely(!stab->szopts.tsize))
400 goto out;
401
402 slot = pkt_len + stab->szopts.cell_align;
403 if (unlikely(slot < 0))
404 slot = 0;
405
406 slot >>= stab->szopts.cell_log;
407 if (likely(slot < stab->szopts.tsize))
408 pkt_len = stab->data[slot];
409 else
410 pkt_len = stab->data[stab->szopts.tsize - 1] *
411 (slot / stab->szopts.tsize) +
412 stab->data[slot % stab->szopts.tsize];
413
414 pkt_len <<= stab->szopts.size_log;
415out:
416 if (unlikely(pkt_len < 1))
417 pkt_len = 1;
418 qdisc_skb_cb(skb)->pkt_len = pkt_len;
419}
420EXPORT_SYMBOL(qdisc_calculate_pkt_len);
421
4179477f
PM
422static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
423{
424 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
425 timer);
426
427 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
11274e5a 428 smp_wmb();
37437bb2 429 __netif_schedule(wd->qdisc);
1936502d 430
4179477f
PM
431 return HRTIMER_NORESTART;
432}
433
434void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
435{
436 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
437 wd->timer.function = qdisc_watchdog;
438 wd->qdisc = qdisc;
439}
440EXPORT_SYMBOL(qdisc_watchdog_init);
441
442void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
443{
444 ktime_t time;
445
446 wd->qdisc->flags |= TCQ_F_THROTTLED;
447 time = ktime_set(0, 0);
448 time = ktime_add_ns(time, PSCHED_US2NS(expires));
449 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
450}
451EXPORT_SYMBOL(qdisc_watchdog_schedule);
452
453void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
454{
455 hrtimer_cancel(&wd->timer);
456 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
457}
458EXPORT_SYMBOL(qdisc_watchdog_cancel);
1da177e4 459
a94f779f 460static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
6fe1c7a5
PM
461{
462 unsigned int size = n * sizeof(struct hlist_head), i;
463 struct hlist_head *h;
464
465 if (size <= PAGE_SIZE)
466 h = kmalloc(size, GFP_KERNEL);
467 else
468 h = (struct hlist_head *)
469 __get_free_pages(GFP_KERNEL, get_order(size));
470
471 if (h != NULL) {
472 for (i = 0; i < n; i++)
473 INIT_HLIST_HEAD(&h[i]);
474 }
475 return h;
476}
477
478static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
479{
480 unsigned int size = n * sizeof(struct hlist_head);
481
482 if (size <= PAGE_SIZE)
483 kfree(h);
484 else
485 free_pages((unsigned long)h, get_order(size));
486}
487
488void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
489{
490 struct Qdisc_class_common *cl;
491 struct hlist_node *n, *next;
492 struct hlist_head *nhash, *ohash;
493 unsigned int nsize, nmask, osize;
494 unsigned int i, h;
495
496 /* Rehash when load factor exceeds 0.75 */
497 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
498 return;
499 nsize = clhash->hashsize * 2;
500 nmask = nsize - 1;
501 nhash = qdisc_class_hash_alloc(nsize);
502 if (nhash == NULL)
503 return;
504
505 ohash = clhash->hash;
506 osize = clhash->hashsize;
507
508 sch_tree_lock(sch);
509 for (i = 0; i < osize; i++) {
510 hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
511 h = qdisc_class_hash(cl->classid, nmask);
512 hlist_add_head(&cl->hnode, &nhash[h]);
513 }
514 }
515 clhash->hash = nhash;
516 clhash->hashsize = nsize;
517 clhash->hashmask = nmask;
518 sch_tree_unlock(sch);
519
520 qdisc_class_hash_free(ohash, osize);
521}
522EXPORT_SYMBOL(qdisc_class_hash_grow);
523
524int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
525{
526 unsigned int size = 4;
527
528 clhash->hash = qdisc_class_hash_alloc(size);
529 if (clhash->hash == NULL)
530 return -ENOMEM;
531 clhash->hashsize = size;
532 clhash->hashmask = size - 1;
533 clhash->hashelems = 0;
534 return 0;
535}
536EXPORT_SYMBOL(qdisc_class_hash_init);
537
538void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
539{
540 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
541}
542EXPORT_SYMBOL(qdisc_class_hash_destroy);
543
544void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
545 struct Qdisc_class_common *cl)
546{
547 unsigned int h;
548
549 INIT_HLIST_NODE(&cl->hnode);
550 h = qdisc_class_hash(cl->classid, clhash->hashmask);
551 hlist_add_head(&cl->hnode, &clhash->hash[h]);
552 clhash->hashelems++;
553}
554EXPORT_SYMBOL(qdisc_class_hash_insert);
555
556void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
557 struct Qdisc_class_common *cl)
558{
559 hlist_del(&cl->hnode);
560 clhash->hashelems--;
561}
562EXPORT_SYMBOL(qdisc_class_hash_remove);
563
1da177e4
LT
564/* Allocate an unique handle from space managed by kernel */
565
566static u32 qdisc_alloc_handle(struct net_device *dev)
567{
568 int i = 0x10000;
569 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
570
571 do {
572 autohandle += TC_H_MAKE(0x10000U, 0);
573 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
574 autohandle = TC_H_MAKE(0x80000000U, 0);
575 } while (qdisc_lookup(dev, autohandle) && --i > 0);
576
577 return i>0 ? autohandle : 0;
578}
579
99194cff 580/* Attach toplevel qdisc to device queue. */
1da177e4 581
99194cff
DM
582static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
583 struct Qdisc *qdisc)
1da177e4 584{
8d50b53d 585 struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
53049978 586 spinlock_t *root_lock;
53049978
DM
587
588 root_lock = qdisc_root_lock(oqdisc);
589 spin_lock_bh(root_lock);
590
8d50b53d
DM
591 /* Prune old scheduler */
592 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
593 qdisc_reset(oqdisc);
1da177e4 594
8d50b53d
DM
595 /* ... and graft new one */
596 if (qdisc == NULL)
597 qdisc = &noop_qdisc;
598 dev_queue->qdisc_sleeping = qdisc;
599 dev_queue->qdisc = &noop_qdisc;
1da177e4 600
53049978 601 spin_unlock_bh(root_lock);
1da177e4 602
1da177e4
LT
603 return oqdisc;
604}
605
43effa1e
PM
606void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
607{
20fea08b 608 const struct Qdisc_class_ops *cops;
43effa1e
PM
609 unsigned long cl;
610 u32 parentid;
611
612 if (n == 0)
613 return;
614 while ((parentid = sch->parent)) {
066a3b5b
JP
615 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
616 return;
617
5ce2d488 618 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
ffc8fefa
PM
619 if (sch == NULL) {
620 WARN_ON(parentid != TC_H_ROOT);
621 return;
622 }
43effa1e
PM
623 cops = sch->ops->cl_ops;
624 if (cops->qlen_notify) {
625 cl = cops->get(sch, parentid);
626 cops->qlen_notify(sch, cl);
627 cops->put(sch, cl);
628 }
629 sch->q.qlen -= n;
630 }
631}
632EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
1da177e4 633
99194cff
DM
634static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
635 struct Qdisc *old, struct Qdisc *new)
636{
637 if (new || old)
638 qdisc_notify(skb, n, clid, old, new);
1da177e4 639
99194cff 640 if (old) {
3a76e371 641 sch_tree_lock(old);
99194cff 642 qdisc_destroy(old);
3a76e371 643 sch_tree_unlock(old);
99194cff
DM
644 }
645}
646
647/* Graft qdisc "new" to class "classid" of qdisc "parent" or
648 * to device "dev".
649 *
650 * When appropriate send a netlink notification using 'skb'
651 * and "n".
652 *
653 * On success, destroy old qdisc.
1da177e4
LT
654 */
655
656static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
99194cff
DM
657 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
658 struct Qdisc *new, struct Qdisc *old)
1da177e4 659{
99194cff 660 struct Qdisc *q = old;
1da177e4 661 int err = 0;
1da177e4 662
10297b99 663 if (parent == NULL) {
99194cff
DM
664 unsigned int i, num_q, ingress;
665
666 ingress = 0;
667 num_q = dev->num_tx_queues;
8d50b53d
DM
668 if ((q && q->flags & TCQ_F_INGRESS) ||
669 (new && new->flags & TCQ_F_INGRESS)) {
99194cff
DM
670 num_q = 1;
671 ingress = 1;
672 }
673
674 if (dev->flags & IFF_UP)
675 dev_deactivate(dev);
676
677 for (i = 0; i < num_q; i++) {
678 struct netdev_queue *dev_queue = &dev->rx_queue;
679
680 if (!ingress)
681 dev_queue = netdev_get_tx_queue(dev, i);
682
8d50b53d
DM
683 old = dev_graft_qdisc(dev_queue, new);
684 if (new && i > 0)
685 atomic_inc(&new->refcnt);
686
99194cff 687 notify_and_destroy(skb, n, classid, old, new);
1da177e4 688 }
99194cff
DM
689
690 if (dev->flags & IFF_UP)
691 dev_activate(dev);
1da177e4 692 } else {
20fea08b 693 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1da177e4
LT
694
695 err = -EINVAL;
696
697 if (cops) {
698 unsigned long cl = cops->get(parent, classid);
699 if (cl) {
99194cff 700 err = cops->graft(parent, cl, new, &old);
1da177e4
LT
701 cops->put(parent, cl);
702 }
703 }
99194cff
DM
704 if (!err)
705 notify_and_destroy(skb, n, classid, old, new);
1da177e4
LT
706 }
707 return err;
708}
709
710/*
711 Allocate and initialize new qdisc.
712
713 Parameters are passed via opt.
714 */
715
716static struct Qdisc *
bb949fbd
DM
717qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
718 u32 parent, u32 handle, struct nlattr **tca, int *errp)
1da177e4
LT
719{
720 int err;
1e90474c 721 struct nlattr *kind = tca[TCA_KIND];
1da177e4
LT
722 struct Qdisc *sch;
723 struct Qdisc_ops *ops;
175f9c1b 724 struct qdisc_size_table *stab;
1da177e4
LT
725
726 ops = qdisc_lookup_ops(kind);
727#ifdef CONFIG_KMOD
728 if (ops == NULL && kind != NULL) {
729 char name[IFNAMSIZ];
1e90474c 730 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
1da177e4
LT
731 /* We dropped the RTNL semaphore in order to
732 * perform the module load. So, even if we
733 * succeeded in loading the module we have to
734 * tell the caller to replay the request. We
735 * indicate this using -EAGAIN.
736 * We replay the request because the device may
737 * go away in the mean time.
738 */
739 rtnl_unlock();
740 request_module("sch_%s", name);
741 rtnl_lock();
742 ops = qdisc_lookup_ops(kind);
743 if (ops != NULL) {
744 /* We will try again qdisc_lookup_ops,
745 * so don't keep a reference.
746 */
747 module_put(ops->owner);
748 err = -EAGAIN;
749 goto err_out;
750 }
751 }
752 }
753#endif
754
b9e2cc0f 755 err = -ENOENT;
1da177e4
LT
756 if (ops == NULL)
757 goto err_out;
758
5ce2d488 759 sch = qdisc_alloc(dev_queue, ops);
3d54b82f
TG
760 if (IS_ERR(sch)) {
761 err = PTR_ERR(sch);
1da177e4 762 goto err_out2;
3d54b82f 763 }
1da177e4 764
ffc8fefa
PM
765 sch->parent = parent;
766
3d54b82f 767 if (handle == TC_H_INGRESS) {
1da177e4 768 sch->flags |= TCQ_F_INGRESS;
3d54b82f 769 handle = TC_H_MAKE(TC_H_INGRESS, 0);
fd44de7c 770 } else {
fd44de7c
PM
771 if (handle == 0) {
772 handle = qdisc_alloc_handle(dev);
773 err = -ENOMEM;
774 if (handle == 0)
775 goto err_out3;
776 }
1da177e4
LT
777 }
778
3d54b82f 779 sch->handle = handle;
1da177e4 780
1e90474c 781 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
175f9c1b
JK
782 if (tca[TCA_STAB]) {
783 stab = qdisc_get_stab(tca[TCA_STAB]);
784 if (IS_ERR(stab)) {
785 err = PTR_ERR(stab);
786 goto err_out3;
787 }
788 sch->stab = stab;
789 }
1e90474c 790 if (tca[TCA_RATE]) {
023e09a7 791 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
7698b4fc 792 qdisc_root_lock(sch),
1e90474c 793 tca[TCA_RATE]);
023e09a7
TG
794 if (err) {
795 /*
796 * Any broken qdiscs that would require
797 * a ops->reset() here? The qdisc was never
798 * in action so it shouldn't be necessary.
799 */
800 if (ops->destroy)
801 ops->destroy(sch);
802 goto err_out3;
803 }
804 }
ee7af826 805 if ((parent != TC_H_ROOT) && !(sch->flags & TCQ_F_INGRESS))
827ebd64 806 list_add_tail(&sch->list, &dev_queue->qdisc_sleeping->list);
1da177e4 807
1da177e4
LT
808 return sch;
809 }
810err_out3:
175f9c1b 811 qdisc_put_stab(sch->stab);
1da177e4 812 dev_put(dev);
3d54b82f 813 kfree((char *) sch - sch->padded);
1da177e4
LT
814err_out2:
815 module_put(ops->owner);
816err_out:
817 *errp = err;
1da177e4
LT
818 return NULL;
819}
820
1e90474c 821static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
1da177e4 822{
175f9c1b
JK
823 struct qdisc_size_table *stab = NULL;
824 int err = 0;
1da177e4 825
175f9c1b 826 if (tca[TCA_OPTIONS]) {
1da177e4
LT
827 if (sch->ops->change == NULL)
828 return -EINVAL;
1e90474c 829 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
1da177e4
LT
830 if (err)
831 return err;
832 }
175f9c1b
JK
833
834 if (tca[TCA_STAB]) {
835 stab = qdisc_get_stab(tca[TCA_STAB]);
836 if (IS_ERR(stab))
837 return PTR_ERR(stab);
838 }
839
840 qdisc_put_stab(sch->stab);
841 sch->stab = stab;
842
1e90474c 843 if (tca[TCA_RATE])
1da177e4 844 gen_replace_estimator(&sch->bstats, &sch->rate_est,
7698b4fc 845 qdisc_root_lock(sch), tca[TCA_RATE]);
1da177e4
LT
846 return 0;
847}
848
849struct check_loop_arg
850{
851 struct qdisc_walker w;
852 struct Qdisc *p;
853 int depth;
854};
855
856static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
857
858static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
859{
860 struct check_loop_arg arg;
861
862 if (q->ops->cl_ops == NULL)
863 return 0;
864
865 arg.w.stop = arg.w.skip = arg.w.count = 0;
866 arg.w.fn = check_loop_fn;
867 arg.depth = depth;
868 arg.p = p;
869 q->ops->cl_ops->walk(q, &arg.w);
870 return arg.w.stop ? -ELOOP : 0;
871}
872
873static int
874check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
875{
876 struct Qdisc *leaf;
20fea08b 877 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1da177e4
LT
878 struct check_loop_arg *arg = (struct check_loop_arg *)w;
879
880 leaf = cops->leaf(q, cl);
881 if (leaf) {
882 if (leaf == arg->p || arg->depth > 7)
883 return -ELOOP;
884 return check_loop(leaf, arg->p, arg->depth + 1);
885 }
886 return 0;
887}
888
889/*
890 * Delete/get qdisc.
891 */
892
893static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
894{
3b1e0a65 895 struct net *net = sock_net(skb->sk);
1da177e4 896 struct tcmsg *tcm = NLMSG_DATA(n);
1e90474c 897 struct nlattr *tca[TCA_MAX + 1];
1da177e4
LT
898 struct net_device *dev;
899 u32 clid = tcm->tcm_parent;
900 struct Qdisc *q = NULL;
901 struct Qdisc *p = NULL;
902 int err;
903
b854272b
DL
904 if (net != &init_net)
905 return -EINVAL;
906
881d966b 907 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1da177e4
LT
908 return -ENODEV;
909
1e90474c
PM
910 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
911 if (err < 0)
912 return err;
913
1da177e4
LT
914 if (clid) {
915 if (clid != TC_H_ROOT) {
916 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
917 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
918 return -ENOENT;
919 q = qdisc_leaf(p, clid);
920 } else { /* ingress */
8123b421 921 q = dev->rx_queue.qdisc_sleeping;
10297b99 922 }
1da177e4 923 } else {
e8a0464c
DM
924 struct netdev_queue *dev_queue;
925 dev_queue = netdev_get_tx_queue(dev, 0);
b0e1e646 926 q = dev_queue->qdisc_sleeping;
1da177e4
LT
927 }
928 if (!q)
929 return -ENOENT;
930
931 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
932 return -EINVAL;
933 } else {
934 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
935 return -ENOENT;
936 }
937
1e90474c 938 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1da177e4
LT
939 return -EINVAL;
940
941 if (n->nlmsg_type == RTM_DELQDISC) {
942 if (!clid)
943 return -EINVAL;
944 if (q->handle == 0)
945 return -ENOENT;
99194cff 946 if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
1da177e4 947 return err;
1da177e4
LT
948 } else {
949 qdisc_notify(skb, n, clid, NULL, q);
950 }
951 return 0;
952}
953
954/*
955 Create/change qdisc.
956 */
957
958static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
959{
3b1e0a65 960 struct net *net = sock_net(skb->sk);
1da177e4 961 struct tcmsg *tcm;
1e90474c 962 struct nlattr *tca[TCA_MAX + 1];
1da177e4
LT
963 struct net_device *dev;
964 u32 clid;
965 struct Qdisc *q, *p;
966 int err;
967
b854272b
DL
968 if (net != &init_net)
969 return -EINVAL;
970
1da177e4
LT
971replay:
972 /* Reinit, just in case something touches this. */
973 tcm = NLMSG_DATA(n);
1da177e4
LT
974 clid = tcm->tcm_parent;
975 q = p = NULL;
976
881d966b 977 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1da177e4
LT
978 return -ENODEV;
979
1e90474c
PM
980 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
981 if (err < 0)
982 return err;
983
1da177e4
LT
984 if (clid) {
985 if (clid != TC_H_ROOT) {
986 if (clid != TC_H_INGRESS) {
987 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
988 return -ENOENT;
989 q = qdisc_leaf(p, clid);
990 } else { /*ingress */
8123b421 991 q = dev->rx_queue.qdisc_sleeping;
1da177e4
LT
992 }
993 } else {
e8a0464c
DM
994 struct netdev_queue *dev_queue;
995 dev_queue = netdev_get_tx_queue(dev, 0);
b0e1e646 996 q = dev_queue->qdisc_sleeping;
1da177e4
LT
997 }
998
999 /* It may be default qdisc, ignore it */
1000 if (q && q->handle == 0)
1001 q = NULL;
1002
1003 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1004 if (tcm->tcm_handle) {
1005 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
1006 return -EEXIST;
1007 if (TC_H_MIN(tcm->tcm_handle))
1008 return -EINVAL;
1009 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
1010 goto create_n_graft;
1011 if (n->nlmsg_flags&NLM_F_EXCL)
1012 return -EEXIST;
1e90474c 1013 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1da177e4
LT
1014 return -EINVAL;
1015 if (q == p ||
1016 (p && check_loop(q, p, 0)))
1017 return -ELOOP;
1018 atomic_inc(&q->refcnt);
1019 goto graft;
1020 } else {
1021 if (q == NULL)
1022 goto create_n_graft;
1023
1024 /* This magic test requires explanation.
1025 *
1026 * We know, that some child q is already
1027 * attached to this parent and have choice:
1028 * either to change it or to create/graft new one.
1029 *
1030 * 1. We are allowed to create/graft only
1031 * if CREATE and REPLACE flags are set.
1032 *
1033 * 2. If EXCL is set, requestor wanted to say,
1034 * that qdisc tcm_handle is not expected
1035 * to exist, so that we choose create/graft too.
1036 *
1037 * 3. The last case is when no flags are set.
1038 * Alas, it is sort of hole in API, we
1039 * cannot decide what to do unambiguously.
1040 * For now we select create/graft, if
1041 * user gave KIND, which does not match existing.
1042 */
1043 if ((n->nlmsg_flags&NLM_F_CREATE) &&
1044 (n->nlmsg_flags&NLM_F_REPLACE) &&
1045 ((n->nlmsg_flags&NLM_F_EXCL) ||
1e90474c
PM
1046 (tca[TCA_KIND] &&
1047 nla_strcmp(tca[TCA_KIND], q->ops->id))))
1da177e4
LT
1048 goto create_n_graft;
1049 }
1050 }
1051 } else {
1052 if (!tcm->tcm_handle)
1053 return -EINVAL;
1054 q = qdisc_lookup(dev, tcm->tcm_handle);
1055 }
1056
1057 /* Change qdisc parameters */
1058 if (q == NULL)
1059 return -ENOENT;
1060 if (n->nlmsg_flags&NLM_F_EXCL)
1061 return -EEXIST;
1e90474c 1062 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1da177e4
LT
1063 return -EINVAL;
1064 err = qdisc_change(q, tca);
1065 if (err == 0)
1066 qdisc_notify(skb, n, clid, NULL, q);
1067 return err;
1068
1069create_n_graft:
1070 if (!(n->nlmsg_flags&NLM_F_CREATE))
1071 return -ENOENT;
1072 if (clid == TC_H_INGRESS)
bb949fbd
DM
1073 q = qdisc_create(dev, &dev->rx_queue,
1074 tcm->tcm_parent, tcm->tcm_parent,
ffc8fefa 1075 tca, &err);
10297b99 1076 else
e8a0464c 1077 q = qdisc_create(dev, netdev_get_tx_queue(dev, 0),
bb949fbd 1078 tcm->tcm_parent, tcm->tcm_handle,
ffc8fefa 1079 tca, &err);
1da177e4
LT
1080 if (q == NULL) {
1081 if (err == -EAGAIN)
1082 goto replay;
1083 return err;
1084 }
1085
1086graft:
1087 if (1) {
53049978
DM
1088 spinlock_t *root_lock;
1089
99194cff 1090 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1da177e4
LT
1091 if (err) {
1092 if (q) {
53049978
DM
1093 root_lock = qdisc_root_lock(q);
1094 spin_lock_bh(root_lock);
1da177e4 1095 qdisc_destroy(q);
53049978 1096 spin_unlock_bh(root_lock);
1da177e4
LT
1097 }
1098 return err;
1099 }
1da177e4
LT
1100 }
1101 return 0;
1102}
1103
1104static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
e431b8c0 1105 u32 pid, u32 seq, u16 flags, int event)
1da177e4
LT
1106{
1107 struct tcmsg *tcm;
1108 struct nlmsghdr *nlh;
27a884dc 1109 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1110 struct gnet_dump d;
1111
e431b8c0 1112 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1da177e4
LT
1113 tcm = NLMSG_DATA(nlh);
1114 tcm->tcm_family = AF_UNSPEC;
9ef1d4c7
PM
1115 tcm->tcm__pad1 = 0;
1116 tcm->tcm__pad2 = 0;
5ce2d488 1117 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1da177e4
LT
1118 tcm->tcm_parent = clid;
1119 tcm->tcm_handle = q->handle;
1120 tcm->tcm_info = atomic_read(&q->refcnt);
57e1c487 1121 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
1da177e4 1122 if (q->ops->dump && q->ops->dump(q, skb) < 0)
1e90474c 1123 goto nla_put_failure;
1da177e4
LT
1124 q->qstats.qlen = q->q.qlen;
1125
175f9c1b
JK
1126 if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
1127 goto nla_put_failure;
1128
1da177e4 1129 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
7698b4fc 1130 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
1e90474c 1131 goto nla_put_failure;
1da177e4
LT
1132
1133 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
1e90474c 1134 goto nla_put_failure;
1da177e4
LT
1135
1136 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
1da177e4 1137 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
1da177e4 1138 gnet_stats_copy_queue(&d, &q->qstats) < 0)
1e90474c 1139 goto nla_put_failure;
10297b99 1140
1da177e4 1141 if (gnet_stats_finish_copy(&d) < 0)
1e90474c 1142 goto nla_put_failure;
10297b99 1143
27a884dc 1144 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1da177e4
LT
1145 return skb->len;
1146
1147nlmsg_failure:
1e90474c 1148nla_put_failure:
dc5fc579 1149 nlmsg_trim(skb, b);
1da177e4
LT
1150 return -1;
1151}
1152
1153static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1154 u32 clid, struct Qdisc *old, struct Qdisc *new)
1155{
1156 struct sk_buff *skb;
1157 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1158
1159 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1160 if (!skb)
1161 return -ENOBUFS;
1162
1163 if (old && old->handle) {
1164 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
1165 goto err_out;
1166 }
1167 if (new) {
1168 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1169 goto err_out;
1170 }
1171
1172 if (skb->len)
97c53cac 1173 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1da177e4
LT
1174
1175err_out:
1176 kfree_skb(skb);
1177 return -EINVAL;
1178}
1179
30723673
DM
1180static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1181{
1182 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1183}
1184
1185static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1186 struct netlink_callback *cb,
1187 int *q_idx_p, int s_q_idx)
1188{
1189 int ret = 0, q_idx = *q_idx_p;
1190 struct Qdisc *q;
1191
1192 if (!root)
1193 return 0;
1194
1195 q = root;
1196 if (q_idx < s_q_idx) {
1197 q_idx++;
1198 } else {
1199 if (!tc_qdisc_dump_ignore(q) &&
1200 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1201 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1202 goto done;
1203 q_idx++;
1204 }
1205 list_for_each_entry(q, &root->list, list) {
1206 if (q_idx < s_q_idx) {
1207 q_idx++;
1208 continue;
1209 }
1210 if (!tc_qdisc_dump_ignore(q) &&
1211 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1212 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1213 goto done;
1214 q_idx++;
1215 }
1216
1217out:
1218 *q_idx_p = q_idx;
1219 return ret;
1220done:
1221 ret = -1;
1222 goto out;
1223}
1224
1da177e4
LT
1225static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1226{
3b1e0a65 1227 struct net *net = sock_net(skb->sk);
1da177e4
LT
1228 int idx, q_idx;
1229 int s_idx, s_q_idx;
1230 struct net_device *dev;
1da177e4 1231
b854272b
DL
1232 if (net != &init_net)
1233 return 0;
1234
1da177e4
LT
1235 s_idx = cb->args[0];
1236 s_q_idx = q_idx = cb->args[1];
1237 read_lock(&dev_base_lock);
7562f876 1238 idx = 0;
881d966b 1239 for_each_netdev(&init_net, dev) {
30723673
DM
1240 struct netdev_queue *dev_queue;
1241
1da177e4 1242 if (idx < s_idx)
7562f876 1243 goto cont;
1da177e4
LT
1244 if (idx > s_idx)
1245 s_q_idx = 0;
1da177e4 1246 q_idx = 0;
30723673
DM
1247
1248 dev_queue = netdev_get_tx_queue(dev, 0);
827ebd64 1249 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
30723673
DM
1250 goto done;
1251
1252 dev_queue = &dev->rx_queue;
827ebd64 1253 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
30723673
DM
1254 goto done;
1255
7562f876
PE
1256cont:
1257 idx++;
1da177e4
LT
1258 }
1259
1260done:
1261 read_unlock(&dev_base_lock);
1262
1263 cb->args[0] = idx;
1264 cb->args[1] = q_idx;
1265
1266 return skb->len;
1267}
1268
1269
1270
1271/************************************************
1272 * Traffic classes manipulation. *
1273 ************************************************/
1274
1275
1276
1277static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1278{
3b1e0a65 1279 struct net *net = sock_net(skb->sk);
b0e1e646 1280 struct netdev_queue *dev_queue;
1da177e4 1281 struct tcmsg *tcm = NLMSG_DATA(n);
1e90474c 1282 struct nlattr *tca[TCA_MAX + 1];
1da177e4
LT
1283 struct net_device *dev;
1284 struct Qdisc *q = NULL;
20fea08b 1285 const struct Qdisc_class_ops *cops;
1da177e4
LT
1286 unsigned long cl = 0;
1287 unsigned long new_cl;
1288 u32 pid = tcm->tcm_parent;
1289 u32 clid = tcm->tcm_handle;
1290 u32 qid = TC_H_MAJ(clid);
1291 int err;
1292
b854272b
DL
1293 if (net != &init_net)
1294 return -EINVAL;
1295
881d966b 1296 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1da177e4
LT
1297 return -ENODEV;
1298
1e90474c
PM
1299 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1300 if (err < 0)
1301 return err;
1302
1da177e4
LT
1303 /*
1304 parent == TC_H_UNSPEC - unspecified parent.
1305 parent == TC_H_ROOT - class is root, which has no parent.
1306 parent == X:0 - parent is root class.
1307 parent == X:Y - parent is a node in hierarchy.
1308 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1309
1310 handle == 0:0 - generate handle from kernel pool.
1311 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1312 handle == X:Y - clear.
1313 handle == X:0 - root class.
1314 */
1315
1316 /* Step 1. Determine qdisc handle X:0 */
1317
e8a0464c 1318 dev_queue = netdev_get_tx_queue(dev, 0);
1da177e4
LT
1319 if (pid != TC_H_ROOT) {
1320 u32 qid1 = TC_H_MAJ(pid);
1321
1322 if (qid && qid1) {
1323 /* If both majors are known, they must be identical. */
1324 if (qid != qid1)
1325 return -EINVAL;
1326 } else if (qid1) {
1327 qid = qid1;
1328 } else if (qid == 0)
b0e1e646 1329 qid = dev_queue->qdisc_sleeping->handle;
1da177e4
LT
1330
1331 /* Now qid is genuine qdisc handle consistent
1332 both with parent and child.
1333
1334 TC_H_MAJ(pid) still may be unspecified, complete it now.
1335 */
1336 if (pid)
1337 pid = TC_H_MAKE(qid, pid);
1338 } else {
1339 if (qid == 0)
b0e1e646 1340 qid = dev_queue->qdisc_sleeping->handle;
1da177e4
LT
1341 }
1342
1343 /* OK. Locate qdisc */
10297b99 1344 if ((q = qdisc_lookup(dev, qid)) == NULL)
1da177e4
LT
1345 return -ENOENT;
1346
1347 /* An check that it supports classes */
1348 cops = q->ops->cl_ops;
1349 if (cops == NULL)
1350 return -EINVAL;
1351
1352 /* Now try to get class */
1353 if (clid == 0) {
1354 if (pid == TC_H_ROOT)
1355 clid = qid;
1356 } else
1357 clid = TC_H_MAKE(qid, clid);
1358
1359 if (clid)
1360 cl = cops->get(q, clid);
1361
1362 if (cl == 0) {
1363 err = -ENOENT;
1364 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
1365 goto out;
1366 } else {
1367 switch (n->nlmsg_type) {
10297b99 1368 case RTM_NEWTCLASS:
1da177e4
LT
1369 err = -EEXIST;
1370 if (n->nlmsg_flags&NLM_F_EXCL)
1371 goto out;
1372 break;
1373 case RTM_DELTCLASS:
1374 err = cops->delete(q, cl);
1375 if (err == 0)
1376 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
1377 goto out;
1378 case RTM_GETTCLASS:
1379 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
1380 goto out;
1381 default:
1382 err = -EINVAL;
1383 goto out;
1384 }
1385 }
1386
1387 new_cl = cl;
1388 err = cops->change(q, clid, pid, tca, &new_cl);
1389 if (err == 0)
1390 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
1391
1392out:
1393 if (cl)
1394 cops->put(q, cl);
1395
1396 return err;
1397}
1398
1399
1400static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1401 unsigned long cl,
e431b8c0 1402 u32 pid, u32 seq, u16 flags, int event)
1da177e4
LT
1403{
1404 struct tcmsg *tcm;
1405 struct nlmsghdr *nlh;
27a884dc 1406 unsigned char *b = skb_tail_pointer(skb);
1da177e4 1407 struct gnet_dump d;
20fea08b 1408 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1da177e4 1409
e431b8c0 1410 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1da177e4
LT
1411 tcm = NLMSG_DATA(nlh);
1412 tcm->tcm_family = AF_UNSPEC;
5ce2d488 1413 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1da177e4
LT
1414 tcm->tcm_parent = q->handle;
1415 tcm->tcm_handle = q->handle;
1416 tcm->tcm_info = 0;
57e1c487 1417 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
1da177e4 1418 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1e90474c 1419 goto nla_put_failure;
1da177e4
LT
1420
1421 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
7698b4fc 1422 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
1e90474c 1423 goto nla_put_failure;
1da177e4
LT
1424
1425 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1e90474c 1426 goto nla_put_failure;
1da177e4
LT
1427
1428 if (gnet_stats_finish_copy(&d) < 0)
1e90474c 1429 goto nla_put_failure;
1da177e4 1430
27a884dc 1431 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1da177e4
LT
1432 return skb->len;
1433
1434nlmsg_failure:
1e90474c 1435nla_put_failure:
dc5fc579 1436 nlmsg_trim(skb, b);
1da177e4
LT
1437 return -1;
1438}
1439
1440static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1441 struct Qdisc *q, unsigned long cl, int event)
1442{
1443 struct sk_buff *skb;
1444 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1445
1446 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1447 if (!skb)
1448 return -ENOBUFS;
1449
1450 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1451 kfree_skb(skb);
1452 return -EINVAL;
1453 }
1454
97c53cac 1455 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1da177e4
LT
1456}
1457
1458struct qdisc_dump_args
1459{
1460 struct qdisc_walker w;
1461 struct sk_buff *skb;
1462 struct netlink_callback *cb;
1463};
1464
1465static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1466{
1467 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1468
1469 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1470 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1471}
1472
30723673
DM
1473static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1474 struct tcmsg *tcm, struct netlink_callback *cb,
1475 int *t_p, int s_t)
1476{
1477 struct qdisc_dump_args arg;
1478
1479 if (tc_qdisc_dump_ignore(q) ||
1480 *t_p < s_t || !q->ops->cl_ops ||
1481 (tcm->tcm_parent &&
1482 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1483 (*t_p)++;
1484 return 0;
1485 }
1486 if (*t_p > s_t)
1487 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1488 arg.w.fn = qdisc_class_dump;
1489 arg.skb = skb;
1490 arg.cb = cb;
1491 arg.w.stop = 0;
1492 arg.w.skip = cb->args[1];
1493 arg.w.count = 0;
1494 q->ops->cl_ops->walk(q, &arg.w);
1495 cb->args[1] = arg.w.count;
1496 if (arg.w.stop)
1497 return -1;
1498 (*t_p)++;
1499 return 0;
1500}
1501
1502static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1503 struct tcmsg *tcm, struct netlink_callback *cb,
1504 int *t_p, int s_t)
1505{
1506 struct Qdisc *q;
1507
1508 if (!root)
1509 return 0;
1510
1511 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1512 return -1;
1513
1514 list_for_each_entry(q, &root->list, list) {
1515 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1516 return -1;
1517 }
1518
1519 return 0;
1520}
1521
1da177e4
LT
1522static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1523{
30723673 1524 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
3b1e0a65 1525 struct net *net = sock_net(skb->sk);
30723673 1526 struct netdev_queue *dev_queue;
1da177e4 1527 struct net_device *dev;
30723673 1528 int t, s_t;
1da177e4 1529
b854272b
DL
1530 if (net != &init_net)
1531 return 0;
1532
1da177e4
LT
1533 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1534 return 0;
881d966b 1535 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1da177e4
LT
1536 return 0;
1537
1538 s_t = cb->args[0];
1539 t = 0;
1540
30723673 1541 dev_queue = netdev_get_tx_queue(dev, 0);
8123b421 1542 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
30723673
DM
1543 goto done;
1544
1545 dev_queue = &dev->rx_queue;
8123b421 1546 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
30723673 1547 goto done;
1da177e4 1548
30723673 1549done:
1da177e4
LT
1550 cb->args[0] = t;
1551
1552 dev_put(dev);
1553 return skb->len;
1554}
1555
1556/* Main classifier routine: scans classifier chain attached
1557 to this qdisc, (optionally) tests for protocol and asks
1558 specific classifiers.
1559 */
73ca4918
PM
1560int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1561 struct tcf_result *res)
1562{
1563 __be16 protocol = skb->protocol;
1564 int err = 0;
1565
1566 for (; tp; tp = tp->next) {
1567 if ((tp->protocol == protocol ||
1568 tp->protocol == htons(ETH_P_ALL)) &&
1569 (err = tp->classify(skb, tp, res)) >= 0) {
1570#ifdef CONFIG_NET_CLS_ACT
1571 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1572 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1573#endif
1574 return err;
1575 }
1576 }
1577 return -1;
1578}
1579EXPORT_SYMBOL(tc_classify_compat);
1580
1da177e4 1581int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
73ca4918 1582 struct tcf_result *res)
1da177e4
LT
1583{
1584 int err = 0;
73ca4918 1585 __be16 protocol;
1da177e4
LT
1586#ifdef CONFIG_NET_CLS_ACT
1587 struct tcf_proto *otp = tp;
1588reclassify:
1589#endif
1590 protocol = skb->protocol;
1591
73ca4918 1592 err = tc_classify_compat(skb, tp, res);
1da177e4 1593#ifdef CONFIG_NET_CLS_ACT
73ca4918
PM
1594 if (err == TC_ACT_RECLASSIFY) {
1595 u32 verd = G_TC_VERD(skb->tc_verd);
1596 tp = otp;
1597
1598 if (verd++ >= MAX_REC_LOOP) {
1599 printk("rule prio %u protocol %02x reclassify loop, "
1600 "packet dropped\n",
1601 tp->prio&0xffff, ntohs(tp->protocol));
1602 return TC_ACT_SHOT;
1da177e4 1603 }
73ca4918
PM
1604 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1605 goto reclassify;
1da177e4 1606 }
73ca4918
PM
1607#endif
1608 return err;
1da177e4 1609}
73ca4918 1610EXPORT_SYMBOL(tc_classify);
1da177e4 1611
a48b5a61
PM
1612void tcf_destroy(struct tcf_proto *tp)
1613{
1614 tp->ops->destroy(tp);
1615 module_put(tp->ops->owner);
1616 kfree(tp);
1617}
1618
ff31ab56 1619void tcf_destroy_chain(struct tcf_proto **fl)
a48b5a61
PM
1620{
1621 struct tcf_proto *tp;
1622
ff31ab56
PM
1623 while ((tp = *fl) != NULL) {
1624 *fl = tp->next;
a48b5a61
PM
1625 tcf_destroy(tp);
1626 }
1627}
1628EXPORT_SYMBOL(tcf_destroy_chain);
1629
1da177e4
LT
1630#ifdef CONFIG_PROC_FS
1631static int psched_show(struct seq_file *seq, void *v)
1632{
3c0cfc13
PM
1633 struct timespec ts;
1634
1635 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
1da177e4 1636 seq_printf(seq, "%08x %08x %08x %08x\n",
641b9e0e 1637 (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
514bca32 1638 1000000,
3c0cfc13 1639 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
1da177e4
LT
1640
1641 return 0;
1642}
1643
1644static int psched_open(struct inode *inode, struct file *file)
1645{
1646 return single_open(file, psched_show, PDE(inode)->data);
1647}
1648
da7071d7 1649static const struct file_operations psched_fops = {
1da177e4
LT
1650 .owner = THIS_MODULE,
1651 .open = psched_open,
1652 .read = seq_read,
1653 .llseek = seq_lseek,
1654 .release = single_release,
10297b99 1655};
1da177e4
LT
1656#endif
1657
1da177e4
LT
1658static int __init pktsched_init(void)
1659{
1da177e4
LT
1660 register_qdisc(&pfifo_qdisc_ops);
1661 register_qdisc(&bfifo_qdisc_ops);
457c4cbc 1662 proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
1da177e4 1663
be577ddc
TG
1664 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1665 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1666 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1667 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1668 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1669 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1670
1da177e4
LT
1671 return 0;
1672}
1673
1674subsys_initcall(pktsched_init);