]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - net/sched/sch_api.c
pkt_sched: Fix ingress deletion and filter attachment.
[mirror_ubuntu-artful-kernel.git] / net / sched / sch_api.c
1 /*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
30
31 #include <net/net_namespace.h>
32 #include <net/sock.h>
33 #include <net/netlink.h>
34 #include <net/pkt_sched.h>
35
36 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
37 struct Qdisc *old, struct Qdisc *new);
38 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
39 struct Qdisc *q, unsigned long cl, int event);
40
41 /*
42
43 Short review.
44 -------------
45
46 This file consists of two interrelated parts:
47
48 1. queueing disciplines manager frontend.
49 2. traffic classes manager frontend.
50
51 Generally, queueing discipline ("qdisc") is a black box,
52 which is able to enqueue packets and to dequeue them (when
53 device is ready to send something) in order and at times
54 determined by algorithm hidden in it.
55
56 qdisc's are divided to two categories:
57 - "queues", which have no internal structure visible from outside.
58 - "schedulers", which split all the packets to "traffic classes",
59 using "packet classifiers" (look at cls_api.c)
60
61 In turn, classes may have child qdiscs (as rule, queues)
62 attached to them etc. etc. etc.
63
64 The goal of the routines in this file is to translate
65 information supplied by user in the form of handles
66 to more intelligible for kernel form, to make some sanity
67 checks and part of work, which is common to all qdiscs
68 and to provide rtnetlink notifications.
69
70 All real intelligent work is done inside qdisc modules.
71
72
73
74 Every discipline has two major routines: enqueue and dequeue.
75
76 ---dequeue
77
78 dequeue usually returns a skb to send. It is allowed to return NULL,
79 but it does not mean that queue is empty, it just means that
80 discipline does not want to send anything this time.
81 Queue is really empty if q->q.qlen == 0.
82 For complicated disciplines with multiple queues q->q is not
83 real packet queue, but however q->q.qlen must be valid.
84
85 ---enqueue
86
87 enqueue returns 0, if packet was enqueued successfully.
88 If packet (this one or another one) was dropped, it returns
89 not zero error code.
90 NET_XMIT_DROP - this packet dropped
91 Expected action: do not backoff, but wait until queue will clear.
92 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
93 Expected action: backoff or ignore
94 NET_XMIT_POLICED - dropped by police.
95 Expected action: backoff or error to real-time apps.
96
97 Auxiliary routines:
98
99 ---requeue
100
101 requeues once dequeued packet. It is used for non-standard or
102 just buggy devices, which can defer output even if netif_queue_stopped()=0.
103
104 ---reset
105
106 returns qdisc to initial state: purge all buffers, clear all
107 timers, counters (except for statistics) etc.
108
109 ---init
110
111 initializes newly created qdisc.
112
113 ---destroy
114
115 destroys resources allocated by init and during lifetime of qdisc.
116
117 ---change
118
119 changes qdisc parameters.
120 */
121
122 /* Protects list of registered TC modules. It is pure SMP lock. */
123 static DEFINE_RWLOCK(qdisc_mod_lock);
124
125
126 /************************************************
127 * Queueing disciplines manipulation. *
128 ************************************************/
129
130
131 /* The list of all installed queueing disciplines. */
132
133 static struct Qdisc_ops *qdisc_base;
134
135 /* Register/uregister queueing discipline */
136
137 int register_qdisc(struct Qdisc_ops *qops)
138 {
139 struct Qdisc_ops *q, **qp;
140 int rc = -EEXIST;
141
142 write_lock(&qdisc_mod_lock);
143 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
144 if (!strcmp(qops->id, q->id))
145 goto out;
146
147 if (qops->enqueue == NULL)
148 qops->enqueue = noop_qdisc_ops.enqueue;
149 if (qops->requeue == NULL)
150 qops->requeue = noop_qdisc_ops.requeue;
151 if (qops->dequeue == NULL)
152 qops->dequeue = noop_qdisc_ops.dequeue;
153
154 qops->next = NULL;
155 *qp = qops;
156 rc = 0;
157 out:
158 write_unlock(&qdisc_mod_lock);
159 return rc;
160 }
161 EXPORT_SYMBOL(register_qdisc);
162
163 int unregister_qdisc(struct Qdisc_ops *qops)
164 {
165 struct Qdisc_ops *q, **qp;
166 int err = -ENOENT;
167
168 write_lock(&qdisc_mod_lock);
169 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
170 if (q == qops)
171 break;
172 if (q) {
173 *qp = q->next;
174 q->next = NULL;
175 err = 0;
176 }
177 write_unlock(&qdisc_mod_lock);
178 return err;
179 }
180 EXPORT_SYMBOL(unregister_qdisc);
181
182 /* We know handle. Find qdisc among all qdisc's attached to device
183 (root qdisc, all its children, children of children etc.)
184 */
185
186 struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
187 {
188 struct Qdisc *q;
189
190 if (!(root->flags & TCQ_F_BUILTIN) &&
191 root->handle == handle)
192 return root;
193
194 list_for_each_entry(q, &root->list, list) {
195 if (q->handle == handle)
196 return q;
197 }
198 return NULL;
199 }
200
201 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
202 {
203 unsigned int i;
204
205 for (i = 0; i < dev->num_tx_queues; i++) {
206 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
207 struct Qdisc *q, *txq_root = txq->qdisc_sleeping;
208
209 q = qdisc_match_from_root(txq_root, handle);
210 if (q)
211 return q;
212 }
213 return qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
214 }
215
216 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
217 {
218 unsigned long cl;
219 struct Qdisc *leaf;
220 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
221
222 if (cops == NULL)
223 return NULL;
224 cl = cops->get(p, classid);
225
226 if (cl == 0)
227 return NULL;
228 leaf = cops->leaf(p, cl);
229 cops->put(p, cl);
230 return leaf;
231 }
232
233 /* Find queueing discipline by name */
234
235 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
236 {
237 struct Qdisc_ops *q = NULL;
238
239 if (kind) {
240 read_lock(&qdisc_mod_lock);
241 for (q = qdisc_base; q; q = q->next) {
242 if (nla_strcmp(kind, q->id) == 0) {
243 if (!try_module_get(q->owner))
244 q = NULL;
245 break;
246 }
247 }
248 read_unlock(&qdisc_mod_lock);
249 }
250 return q;
251 }
252
253 static struct qdisc_rate_table *qdisc_rtab_list;
254
255 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
256 {
257 struct qdisc_rate_table *rtab;
258
259 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
260 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
261 rtab->refcnt++;
262 return rtab;
263 }
264 }
265
266 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
267 nla_len(tab) != TC_RTAB_SIZE)
268 return NULL;
269
270 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
271 if (rtab) {
272 rtab->rate = *r;
273 rtab->refcnt = 1;
274 memcpy(rtab->data, nla_data(tab), 1024);
275 rtab->next = qdisc_rtab_list;
276 qdisc_rtab_list = rtab;
277 }
278 return rtab;
279 }
280 EXPORT_SYMBOL(qdisc_get_rtab);
281
282 void qdisc_put_rtab(struct qdisc_rate_table *tab)
283 {
284 struct qdisc_rate_table *rtab, **rtabp;
285
286 if (!tab || --tab->refcnt)
287 return;
288
289 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
290 if (rtab == tab) {
291 *rtabp = rtab->next;
292 kfree(rtab);
293 return;
294 }
295 }
296 }
297 EXPORT_SYMBOL(qdisc_put_rtab);
298
299 static LIST_HEAD(qdisc_stab_list);
300 static DEFINE_SPINLOCK(qdisc_stab_lock);
301
302 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
303 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
304 [TCA_STAB_DATA] = { .type = NLA_BINARY },
305 };
306
307 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
308 {
309 struct nlattr *tb[TCA_STAB_MAX + 1];
310 struct qdisc_size_table *stab;
311 struct tc_sizespec *s;
312 unsigned int tsize = 0;
313 u16 *tab = NULL;
314 int err;
315
316 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
317 if (err < 0)
318 return ERR_PTR(err);
319 if (!tb[TCA_STAB_BASE])
320 return ERR_PTR(-EINVAL);
321
322 s = nla_data(tb[TCA_STAB_BASE]);
323
324 if (s->tsize > 0) {
325 if (!tb[TCA_STAB_DATA])
326 return ERR_PTR(-EINVAL);
327 tab = nla_data(tb[TCA_STAB_DATA]);
328 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
329 }
330
331 if (!s || tsize != s->tsize || (!tab && tsize > 0))
332 return ERR_PTR(-EINVAL);
333
334 spin_lock(&qdisc_stab_lock);
335
336 list_for_each_entry(stab, &qdisc_stab_list, list) {
337 if (memcmp(&stab->szopts, s, sizeof(*s)))
338 continue;
339 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
340 continue;
341 stab->refcnt++;
342 spin_unlock(&qdisc_stab_lock);
343 return stab;
344 }
345
346 spin_unlock(&qdisc_stab_lock);
347
348 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
349 if (!stab)
350 return ERR_PTR(-ENOMEM);
351
352 stab->refcnt = 1;
353 stab->szopts = *s;
354 if (tsize > 0)
355 memcpy(stab->data, tab, tsize * sizeof(u16));
356
357 spin_lock(&qdisc_stab_lock);
358 list_add_tail(&stab->list, &qdisc_stab_list);
359 spin_unlock(&qdisc_stab_lock);
360
361 return stab;
362 }
363
364 void qdisc_put_stab(struct qdisc_size_table *tab)
365 {
366 if (!tab)
367 return;
368
369 spin_lock(&qdisc_stab_lock);
370
371 if (--tab->refcnt == 0) {
372 list_del(&tab->list);
373 kfree(tab);
374 }
375
376 spin_unlock(&qdisc_stab_lock);
377 }
378 EXPORT_SYMBOL(qdisc_put_stab);
379
380 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
381 {
382 struct nlattr *nest;
383
384 nest = nla_nest_start(skb, TCA_STAB);
385 NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts);
386 nla_nest_end(skb, nest);
387
388 return skb->len;
389
390 nla_put_failure:
391 return -1;
392 }
393
394 void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
395 {
396 int pkt_len, slot;
397
398 pkt_len = skb->len + stab->szopts.overhead;
399 if (unlikely(!stab->szopts.tsize))
400 goto out;
401
402 slot = pkt_len + stab->szopts.cell_align;
403 if (unlikely(slot < 0))
404 slot = 0;
405
406 slot >>= stab->szopts.cell_log;
407 if (likely(slot < stab->szopts.tsize))
408 pkt_len = stab->data[slot];
409 else
410 pkt_len = stab->data[stab->szopts.tsize - 1] *
411 (slot / stab->szopts.tsize) +
412 stab->data[slot % stab->szopts.tsize];
413
414 pkt_len <<= stab->szopts.size_log;
415 out:
416 if (unlikely(pkt_len < 1))
417 pkt_len = 1;
418 qdisc_skb_cb(skb)->pkt_len = pkt_len;
419 }
420 EXPORT_SYMBOL(qdisc_calculate_pkt_len);
421
422 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
423 {
424 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
425 timer);
426
427 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
428 smp_wmb();
429 __netif_schedule(wd->qdisc);
430
431 return HRTIMER_NORESTART;
432 }
433
434 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
435 {
436 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
437 wd->timer.function = qdisc_watchdog;
438 wd->qdisc = qdisc;
439 }
440 EXPORT_SYMBOL(qdisc_watchdog_init);
441
442 void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
443 {
444 ktime_t time;
445
446 wd->qdisc->flags |= TCQ_F_THROTTLED;
447 time = ktime_set(0, 0);
448 time = ktime_add_ns(time, PSCHED_US2NS(expires));
449 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
450 }
451 EXPORT_SYMBOL(qdisc_watchdog_schedule);
452
453 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
454 {
455 hrtimer_cancel(&wd->timer);
456 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
457 }
458 EXPORT_SYMBOL(qdisc_watchdog_cancel);
459
460 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
461 {
462 unsigned int size = n * sizeof(struct hlist_head), i;
463 struct hlist_head *h;
464
465 if (size <= PAGE_SIZE)
466 h = kmalloc(size, GFP_KERNEL);
467 else
468 h = (struct hlist_head *)
469 __get_free_pages(GFP_KERNEL, get_order(size));
470
471 if (h != NULL) {
472 for (i = 0; i < n; i++)
473 INIT_HLIST_HEAD(&h[i]);
474 }
475 return h;
476 }
477
478 static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
479 {
480 unsigned int size = n * sizeof(struct hlist_head);
481
482 if (size <= PAGE_SIZE)
483 kfree(h);
484 else
485 free_pages((unsigned long)h, get_order(size));
486 }
487
488 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
489 {
490 struct Qdisc_class_common *cl;
491 struct hlist_node *n, *next;
492 struct hlist_head *nhash, *ohash;
493 unsigned int nsize, nmask, osize;
494 unsigned int i, h;
495
496 /* Rehash when load factor exceeds 0.75 */
497 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
498 return;
499 nsize = clhash->hashsize * 2;
500 nmask = nsize - 1;
501 nhash = qdisc_class_hash_alloc(nsize);
502 if (nhash == NULL)
503 return;
504
505 ohash = clhash->hash;
506 osize = clhash->hashsize;
507
508 sch_tree_lock(sch);
509 for (i = 0; i < osize; i++) {
510 hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
511 h = qdisc_class_hash(cl->classid, nmask);
512 hlist_add_head(&cl->hnode, &nhash[h]);
513 }
514 }
515 clhash->hash = nhash;
516 clhash->hashsize = nsize;
517 clhash->hashmask = nmask;
518 sch_tree_unlock(sch);
519
520 qdisc_class_hash_free(ohash, osize);
521 }
522 EXPORT_SYMBOL(qdisc_class_hash_grow);
523
524 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
525 {
526 unsigned int size = 4;
527
528 clhash->hash = qdisc_class_hash_alloc(size);
529 if (clhash->hash == NULL)
530 return -ENOMEM;
531 clhash->hashsize = size;
532 clhash->hashmask = size - 1;
533 clhash->hashelems = 0;
534 return 0;
535 }
536 EXPORT_SYMBOL(qdisc_class_hash_init);
537
538 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
539 {
540 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
541 }
542 EXPORT_SYMBOL(qdisc_class_hash_destroy);
543
544 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
545 struct Qdisc_class_common *cl)
546 {
547 unsigned int h;
548
549 INIT_HLIST_NODE(&cl->hnode);
550 h = qdisc_class_hash(cl->classid, clhash->hashmask);
551 hlist_add_head(&cl->hnode, &clhash->hash[h]);
552 clhash->hashelems++;
553 }
554 EXPORT_SYMBOL(qdisc_class_hash_insert);
555
556 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
557 struct Qdisc_class_common *cl)
558 {
559 hlist_del(&cl->hnode);
560 clhash->hashelems--;
561 }
562 EXPORT_SYMBOL(qdisc_class_hash_remove);
563
564 /* Allocate an unique handle from space managed by kernel */
565
566 static u32 qdisc_alloc_handle(struct net_device *dev)
567 {
568 int i = 0x10000;
569 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
570
571 do {
572 autohandle += TC_H_MAKE(0x10000U, 0);
573 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
574 autohandle = TC_H_MAKE(0x80000000U, 0);
575 } while (qdisc_lookup(dev, autohandle) && --i > 0);
576
577 return i>0 ? autohandle : 0;
578 }
579
580 /* Attach toplevel qdisc to device queue. */
581
582 static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
583 struct Qdisc *qdisc)
584 {
585 struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
586 spinlock_t *root_lock;
587
588 root_lock = qdisc_root_lock(oqdisc);
589 spin_lock_bh(root_lock);
590
591 /* Prune old scheduler */
592 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
593 qdisc_reset(oqdisc);
594
595 /* ... and graft new one */
596 if (qdisc == NULL)
597 qdisc = &noop_qdisc;
598 dev_queue->qdisc_sleeping = qdisc;
599 dev_queue->qdisc = &noop_qdisc;
600
601 spin_unlock_bh(root_lock);
602
603 return oqdisc;
604 }
605
606 void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
607 {
608 const struct Qdisc_class_ops *cops;
609 unsigned long cl;
610 u32 parentid;
611
612 if (n == 0)
613 return;
614 while ((parentid = sch->parent)) {
615 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
616 return;
617
618 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
619 if (sch == NULL) {
620 WARN_ON(parentid != TC_H_ROOT);
621 return;
622 }
623 cops = sch->ops->cl_ops;
624 if (cops->qlen_notify) {
625 cl = cops->get(sch, parentid);
626 cops->qlen_notify(sch, cl);
627 cops->put(sch, cl);
628 }
629 sch->q.qlen -= n;
630 }
631 }
632 EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
633
634 static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
635 struct Qdisc *old, struct Qdisc *new)
636 {
637 if (new || old)
638 qdisc_notify(skb, n, clid, old, new);
639
640 if (old) {
641 spin_lock_bh(&old->q.lock);
642 qdisc_destroy(old);
643 spin_unlock_bh(&old->q.lock);
644 }
645 }
646
647 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
648 * to device "dev".
649 *
650 * When appropriate send a netlink notification using 'skb'
651 * and "n".
652 *
653 * On success, destroy old qdisc.
654 */
655
656 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
657 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
658 struct Qdisc *new, struct Qdisc *old)
659 {
660 struct Qdisc *q = old;
661 int err = 0;
662
663 if (parent == NULL) {
664 unsigned int i, num_q, ingress;
665
666 ingress = 0;
667 num_q = dev->num_tx_queues;
668 if ((q && q->flags & TCQ_F_INGRESS) ||
669 (new && new->flags & TCQ_F_INGRESS)) {
670 num_q = 1;
671 ingress = 1;
672 }
673
674 if (dev->flags & IFF_UP)
675 dev_deactivate(dev);
676
677 for (i = 0; i < num_q; i++) {
678 struct netdev_queue *dev_queue = &dev->rx_queue;
679
680 if (!ingress)
681 dev_queue = netdev_get_tx_queue(dev, i);
682
683 old = dev_graft_qdisc(dev_queue, new);
684 if (new && i > 0)
685 atomic_inc(&new->refcnt);
686
687 notify_and_destroy(skb, n, classid, old, new);
688 }
689
690 if (dev->flags & IFF_UP)
691 dev_activate(dev);
692 } else {
693 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
694
695 err = -EINVAL;
696
697 if (cops) {
698 unsigned long cl = cops->get(parent, classid);
699 if (cl) {
700 err = cops->graft(parent, cl, new, &old);
701 cops->put(parent, cl);
702 }
703 }
704 if (!err)
705 notify_and_destroy(skb, n, classid, old, new);
706 }
707 return err;
708 }
709
710 /*
711 Allocate and initialize new qdisc.
712
713 Parameters are passed via opt.
714 */
715
716 static struct Qdisc *
717 qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
718 u32 parent, u32 handle, struct nlattr **tca, int *errp)
719 {
720 int err;
721 struct nlattr *kind = tca[TCA_KIND];
722 struct Qdisc *sch;
723 struct Qdisc_ops *ops;
724 struct qdisc_size_table *stab;
725
726 ops = qdisc_lookup_ops(kind);
727 #ifdef CONFIG_KMOD
728 if (ops == NULL && kind != NULL) {
729 char name[IFNAMSIZ];
730 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
731 /* We dropped the RTNL semaphore in order to
732 * perform the module load. So, even if we
733 * succeeded in loading the module we have to
734 * tell the caller to replay the request. We
735 * indicate this using -EAGAIN.
736 * We replay the request because the device may
737 * go away in the mean time.
738 */
739 rtnl_unlock();
740 request_module("sch_%s", name);
741 rtnl_lock();
742 ops = qdisc_lookup_ops(kind);
743 if (ops != NULL) {
744 /* We will try again qdisc_lookup_ops,
745 * so don't keep a reference.
746 */
747 module_put(ops->owner);
748 err = -EAGAIN;
749 goto err_out;
750 }
751 }
752 }
753 #endif
754
755 err = -ENOENT;
756 if (ops == NULL)
757 goto err_out;
758
759 sch = qdisc_alloc(dev_queue, ops);
760 if (IS_ERR(sch)) {
761 err = PTR_ERR(sch);
762 goto err_out2;
763 }
764
765 sch->parent = parent;
766
767 if (handle == TC_H_INGRESS) {
768 sch->flags |= TCQ_F_INGRESS;
769 handle = TC_H_MAKE(TC_H_INGRESS, 0);
770 } else {
771 if (handle == 0) {
772 handle = qdisc_alloc_handle(dev);
773 err = -ENOMEM;
774 if (handle == 0)
775 goto err_out3;
776 }
777 }
778
779 sch->handle = handle;
780
781 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
782 if (tca[TCA_STAB]) {
783 stab = qdisc_get_stab(tca[TCA_STAB]);
784 if (IS_ERR(stab)) {
785 err = PTR_ERR(stab);
786 goto err_out3;
787 }
788 sch->stab = stab;
789 }
790 if (tca[TCA_RATE]) {
791 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
792 qdisc_root_lock(sch),
793 tca[TCA_RATE]);
794 if (err) {
795 /*
796 * Any broken qdiscs that would require
797 * a ops->reset() here? The qdisc was never
798 * in action so it shouldn't be necessary.
799 */
800 if (ops->destroy)
801 ops->destroy(sch);
802 goto err_out3;
803 }
804 }
805 if ((parent != TC_H_ROOT) && !(sch->flags & TCQ_F_INGRESS))
806 list_add_tail(&sch->list, &dev_queue->qdisc_sleeping->list);
807
808 return sch;
809 }
810 err_out3:
811 qdisc_put_stab(sch->stab);
812 dev_put(dev);
813 kfree((char *) sch - sch->padded);
814 err_out2:
815 module_put(ops->owner);
816 err_out:
817 *errp = err;
818 return NULL;
819 }
820
821 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
822 {
823 struct qdisc_size_table *stab = NULL;
824 int err = 0;
825
826 if (tca[TCA_OPTIONS]) {
827 if (sch->ops->change == NULL)
828 return -EINVAL;
829 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
830 if (err)
831 return err;
832 }
833
834 if (tca[TCA_STAB]) {
835 stab = qdisc_get_stab(tca[TCA_STAB]);
836 if (IS_ERR(stab))
837 return PTR_ERR(stab);
838 }
839
840 qdisc_put_stab(sch->stab);
841 sch->stab = stab;
842
843 if (tca[TCA_RATE])
844 gen_replace_estimator(&sch->bstats, &sch->rate_est,
845 qdisc_root_lock(sch), tca[TCA_RATE]);
846 return 0;
847 }
848
849 struct check_loop_arg
850 {
851 struct qdisc_walker w;
852 struct Qdisc *p;
853 int depth;
854 };
855
856 static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
857
858 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
859 {
860 struct check_loop_arg arg;
861
862 if (q->ops->cl_ops == NULL)
863 return 0;
864
865 arg.w.stop = arg.w.skip = arg.w.count = 0;
866 arg.w.fn = check_loop_fn;
867 arg.depth = depth;
868 arg.p = p;
869 q->ops->cl_ops->walk(q, &arg.w);
870 return arg.w.stop ? -ELOOP : 0;
871 }
872
873 static int
874 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
875 {
876 struct Qdisc *leaf;
877 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
878 struct check_loop_arg *arg = (struct check_loop_arg *)w;
879
880 leaf = cops->leaf(q, cl);
881 if (leaf) {
882 if (leaf == arg->p || arg->depth > 7)
883 return -ELOOP;
884 return check_loop(leaf, arg->p, arg->depth + 1);
885 }
886 return 0;
887 }
888
889 /*
890 * Delete/get qdisc.
891 */
892
893 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
894 {
895 struct net *net = sock_net(skb->sk);
896 struct tcmsg *tcm = NLMSG_DATA(n);
897 struct nlattr *tca[TCA_MAX + 1];
898 struct net_device *dev;
899 u32 clid = tcm->tcm_parent;
900 struct Qdisc *q = NULL;
901 struct Qdisc *p = NULL;
902 int err;
903
904 if (net != &init_net)
905 return -EINVAL;
906
907 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
908 return -ENODEV;
909
910 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
911 if (err < 0)
912 return err;
913
914 if (clid) {
915 if (clid != TC_H_ROOT) {
916 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
917 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
918 return -ENOENT;
919 q = qdisc_leaf(p, clid);
920 } else { /* ingress */
921 q = dev->rx_queue.qdisc_sleeping;
922 }
923 } else {
924 struct netdev_queue *dev_queue;
925 dev_queue = netdev_get_tx_queue(dev, 0);
926 q = dev_queue->qdisc_sleeping;
927 }
928 if (!q)
929 return -ENOENT;
930
931 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
932 return -EINVAL;
933 } else {
934 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
935 return -ENOENT;
936 }
937
938 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
939 return -EINVAL;
940
941 if (n->nlmsg_type == RTM_DELQDISC) {
942 if (!clid)
943 return -EINVAL;
944 if (q->handle == 0)
945 return -ENOENT;
946 if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
947 return err;
948 } else {
949 qdisc_notify(skb, n, clid, NULL, q);
950 }
951 return 0;
952 }
953
954 /*
955 Create/change qdisc.
956 */
957
958 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
959 {
960 struct net *net = sock_net(skb->sk);
961 struct tcmsg *tcm;
962 struct nlattr *tca[TCA_MAX + 1];
963 struct net_device *dev;
964 u32 clid;
965 struct Qdisc *q, *p;
966 int err;
967
968 if (net != &init_net)
969 return -EINVAL;
970
971 replay:
972 /* Reinit, just in case something touches this. */
973 tcm = NLMSG_DATA(n);
974 clid = tcm->tcm_parent;
975 q = p = NULL;
976
977 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
978 return -ENODEV;
979
980 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
981 if (err < 0)
982 return err;
983
984 if (clid) {
985 if (clid != TC_H_ROOT) {
986 if (clid != TC_H_INGRESS) {
987 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
988 return -ENOENT;
989 q = qdisc_leaf(p, clid);
990 } else { /*ingress */
991 q = dev->rx_queue.qdisc_sleeping;
992 }
993 } else {
994 struct netdev_queue *dev_queue;
995 dev_queue = netdev_get_tx_queue(dev, 0);
996 q = dev_queue->qdisc_sleeping;
997 }
998
999 /* It may be default qdisc, ignore it */
1000 if (q && q->handle == 0)
1001 q = NULL;
1002
1003 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1004 if (tcm->tcm_handle) {
1005 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
1006 return -EEXIST;
1007 if (TC_H_MIN(tcm->tcm_handle))
1008 return -EINVAL;
1009 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
1010 goto create_n_graft;
1011 if (n->nlmsg_flags&NLM_F_EXCL)
1012 return -EEXIST;
1013 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1014 return -EINVAL;
1015 if (q == p ||
1016 (p && check_loop(q, p, 0)))
1017 return -ELOOP;
1018 atomic_inc(&q->refcnt);
1019 goto graft;
1020 } else {
1021 if (q == NULL)
1022 goto create_n_graft;
1023
1024 /* This magic test requires explanation.
1025 *
1026 * We know, that some child q is already
1027 * attached to this parent and have choice:
1028 * either to change it or to create/graft new one.
1029 *
1030 * 1. We are allowed to create/graft only
1031 * if CREATE and REPLACE flags are set.
1032 *
1033 * 2. If EXCL is set, requestor wanted to say,
1034 * that qdisc tcm_handle is not expected
1035 * to exist, so that we choose create/graft too.
1036 *
1037 * 3. The last case is when no flags are set.
1038 * Alas, it is sort of hole in API, we
1039 * cannot decide what to do unambiguously.
1040 * For now we select create/graft, if
1041 * user gave KIND, which does not match existing.
1042 */
1043 if ((n->nlmsg_flags&NLM_F_CREATE) &&
1044 (n->nlmsg_flags&NLM_F_REPLACE) &&
1045 ((n->nlmsg_flags&NLM_F_EXCL) ||
1046 (tca[TCA_KIND] &&
1047 nla_strcmp(tca[TCA_KIND], q->ops->id))))
1048 goto create_n_graft;
1049 }
1050 }
1051 } else {
1052 if (!tcm->tcm_handle)
1053 return -EINVAL;
1054 q = qdisc_lookup(dev, tcm->tcm_handle);
1055 }
1056
1057 /* Change qdisc parameters */
1058 if (q == NULL)
1059 return -ENOENT;
1060 if (n->nlmsg_flags&NLM_F_EXCL)
1061 return -EEXIST;
1062 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1063 return -EINVAL;
1064 err = qdisc_change(q, tca);
1065 if (err == 0)
1066 qdisc_notify(skb, n, clid, NULL, q);
1067 return err;
1068
1069 create_n_graft:
1070 if (!(n->nlmsg_flags&NLM_F_CREATE))
1071 return -ENOENT;
1072 if (clid == TC_H_INGRESS)
1073 q = qdisc_create(dev, &dev->rx_queue,
1074 tcm->tcm_parent, tcm->tcm_parent,
1075 tca, &err);
1076 else
1077 q = qdisc_create(dev, netdev_get_tx_queue(dev, 0),
1078 tcm->tcm_parent, tcm->tcm_handle,
1079 tca, &err);
1080 if (q == NULL) {
1081 if (err == -EAGAIN)
1082 goto replay;
1083 return err;
1084 }
1085
1086 graft:
1087 if (1) {
1088 spinlock_t *root_lock;
1089
1090 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1091 if (err) {
1092 if (q) {
1093 root_lock = qdisc_root_lock(q);
1094 spin_lock_bh(root_lock);
1095 qdisc_destroy(q);
1096 spin_unlock_bh(root_lock);
1097 }
1098 return err;
1099 }
1100 }
1101 return 0;
1102 }
1103
1104 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
1105 u32 pid, u32 seq, u16 flags, int event)
1106 {
1107 struct tcmsg *tcm;
1108 struct nlmsghdr *nlh;
1109 unsigned char *b = skb_tail_pointer(skb);
1110 struct gnet_dump d;
1111
1112 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1113 tcm = NLMSG_DATA(nlh);
1114 tcm->tcm_family = AF_UNSPEC;
1115 tcm->tcm__pad1 = 0;
1116 tcm->tcm__pad2 = 0;
1117 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1118 tcm->tcm_parent = clid;
1119 tcm->tcm_handle = q->handle;
1120 tcm->tcm_info = atomic_read(&q->refcnt);
1121 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
1122 if (q->ops->dump && q->ops->dump(q, skb) < 0)
1123 goto nla_put_failure;
1124 q->qstats.qlen = q->q.qlen;
1125
1126 if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
1127 goto nla_put_failure;
1128
1129 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
1130 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
1131 goto nla_put_failure;
1132
1133 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
1134 goto nla_put_failure;
1135
1136 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
1137 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
1138 gnet_stats_copy_queue(&d, &q->qstats) < 0)
1139 goto nla_put_failure;
1140
1141 if (gnet_stats_finish_copy(&d) < 0)
1142 goto nla_put_failure;
1143
1144 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1145 return skb->len;
1146
1147 nlmsg_failure:
1148 nla_put_failure:
1149 nlmsg_trim(skb, b);
1150 return -1;
1151 }
1152
1153 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1154 u32 clid, struct Qdisc *old, struct Qdisc *new)
1155 {
1156 struct sk_buff *skb;
1157 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1158
1159 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1160 if (!skb)
1161 return -ENOBUFS;
1162
1163 if (old && old->handle) {
1164 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
1165 goto err_out;
1166 }
1167 if (new) {
1168 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1169 goto err_out;
1170 }
1171
1172 if (skb->len)
1173 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1174
1175 err_out:
1176 kfree_skb(skb);
1177 return -EINVAL;
1178 }
1179
1180 static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1181 {
1182 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1183 }
1184
1185 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1186 struct netlink_callback *cb,
1187 int *q_idx_p, int s_q_idx)
1188 {
1189 int ret = 0, q_idx = *q_idx_p;
1190 struct Qdisc *q;
1191
1192 if (!root)
1193 return 0;
1194
1195 q = root;
1196 if (q_idx < s_q_idx) {
1197 q_idx++;
1198 } else {
1199 if (!tc_qdisc_dump_ignore(q) &&
1200 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1201 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1202 goto done;
1203 q_idx++;
1204 }
1205 list_for_each_entry(q, &root->list, list) {
1206 if (q_idx < s_q_idx) {
1207 q_idx++;
1208 continue;
1209 }
1210 if (!tc_qdisc_dump_ignore(q) &&
1211 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1212 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1213 goto done;
1214 q_idx++;
1215 }
1216
1217 out:
1218 *q_idx_p = q_idx;
1219 return ret;
1220 done:
1221 ret = -1;
1222 goto out;
1223 }
1224
1225 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1226 {
1227 struct net *net = sock_net(skb->sk);
1228 int idx, q_idx;
1229 int s_idx, s_q_idx;
1230 struct net_device *dev;
1231
1232 if (net != &init_net)
1233 return 0;
1234
1235 s_idx = cb->args[0];
1236 s_q_idx = q_idx = cb->args[1];
1237 read_lock(&dev_base_lock);
1238 idx = 0;
1239 for_each_netdev(&init_net, dev) {
1240 struct netdev_queue *dev_queue;
1241
1242 if (idx < s_idx)
1243 goto cont;
1244 if (idx > s_idx)
1245 s_q_idx = 0;
1246 q_idx = 0;
1247
1248 dev_queue = netdev_get_tx_queue(dev, 0);
1249 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
1250 goto done;
1251
1252 dev_queue = &dev->rx_queue;
1253 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
1254 goto done;
1255
1256 cont:
1257 idx++;
1258 }
1259
1260 done:
1261 read_unlock(&dev_base_lock);
1262
1263 cb->args[0] = idx;
1264 cb->args[1] = q_idx;
1265
1266 return skb->len;
1267 }
1268
1269
1270
1271 /************************************************
1272 * Traffic classes manipulation. *
1273 ************************************************/
1274
1275
1276
1277 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1278 {
1279 struct net *net = sock_net(skb->sk);
1280 struct netdev_queue *dev_queue;
1281 struct tcmsg *tcm = NLMSG_DATA(n);
1282 struct nlattr *tca[TCA_MAX + 1];
1283 struct net_device *dev;
1284 struct Qdisc *q = NULL;
1285 const struct Qdisc_class_ops *cops;
1286 unsigned long cl = 0;
1287 unsigned long new_cl;
1288 u32 pid = tcm->tcm_parent;
1289 u32 clid = tcm->tcm_handle;
1290 u32 qid = TC_H_MAJ(clid);
1291 int err;
1292
1293 if (net != &init_net)
1294 return -EINVAL;
1295
1296 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1297 return -ENODEV;
1298
1299 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1300 if (err < 0)
1301 return err;
1302
1303 /*
1304 parent == TC_H_UNSPEC - unspecified parent.
1305 parent == TC_H_ROOT - class is root, which has no parent.
1306 parent == X:0 - parent is root class.
1307 parent == X:Y - parent is a node in hierarchy.
1308 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1309
1310 handle == 0:0 - generate handle from kernel pool.
1311 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1312 handle == X:Y - clear.
1313 handle == X:0 - root class.
1314 */
1315
1316 /* Step 1. Determine qdisc handle X:0 */
1317
1318 dev_queue = netdev_get_tx_queue(dev, 0);
1319 if (pid != TC_H_ROOT) {
1320 u32 qid1 = TC_H_MAJ(pid);
1321
1322 if (qid && qid1) {
1323 /* If both majors are known, they must be identical. */
1324 if (qid != qid1)
1325 return -EINVAL;
1326 } else if (qid1) {
1327 qid = qid1;
1328 } else if (qid == 0)
1329 qid = dev_queue->qdisc_sleeping->handle;
1330
1331 /* Now qid is genuine qdisc handle consistent
1332 both with parent and child.
1333
1334 TC_H_MAJ(pid) still may be unspecified, complete it now.
1335 */
1336 if (pid)
1337 pid = TC_H_MAKE(qid, pid);
1338 } else {
1339 if (qid == 0)
1340 qid = dev_queue->qdisc_sleeping->handle;
1341 }
1342
1343 /* OK. Locate qdisc */
1344 if ((q = qdisc_lookup(dev, qid)) == NULL)
1345 return -ENOENT;
1346
1347 /* An check that it supports classes */
1348 cops = q->ops->cl_ops;
1349 if (cops == NULL)
1350 return -EINVAL;
1351
1352 /* Now try to get class */
1353 if (clid == 0) {
1354 if (pid == TC_H_ROOT)
1355 clid = qid;
1356 } else
1357 clid = TC_H_MAKE(qid, clid);
1358
1359 if (clid)
1360 cl = cops->get(q, clid);
1361
1362 if (cl == 0) {
1363 err = -ENOENT;
1364 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
1365 goto out;
1366 } else {
1367 switch (n->nlmsg_type) {
1368 case RTM_NEWTCLASS:
1369 err = -EEXIST;
1370 if (n->nlmsg_flags&NLM_F_EXCL)
1371 goto out;
1372 break;
1373 case RTM_DELTCLASS:
1374 err = cops->delete(q, cl);
1375 if (err == 0)
1376 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
1377 goto out;
1378 case RTM_GETTCLASS:
1379 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
1380 goto out;
1381 default:
1382 err = -EINVAL;
1383 goto out;
1384 }
1385 }
1386
1387 new_cl = cl;
1388 err = cops->change(q, clid, pid, tca, &new_cl);
1389 if (err == 0)
1390 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
1391
1392 out:
1393 if (cl)
1394 cops->put(q, cl);
1395
1396 return err;
1397 }
1398
1399
1400 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1401 unsigned long cl,
1402 u32 pid, u32 seq, u16 flags, int event)
1403 {
1404 struct tcmsg *tcm;
1405 struct nlmsghdr *nlh;
1406 unsigned char *b = skb_tail_pointer(skb);
1407 struct gnet_dump d;
1408 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1409
1410 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1411 tcm = NLMSG_DATA(nlh);
1412 tcm->tcm_family = AF_UNSPEC;
1413 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1414 tcm->tcm_parent = q->handle;
1415 tcm->tcm_handle = q->handle;
1416 tcm->tcm_info = 0;
1417 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
1418 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1419 goto nla_put_failure;
1420
1421 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
1422 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
1423 goto nla_put_failure;
1424
1425 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1426 goto nla_put_failure;
1427
1428 if (gnet_stats_finish_copy(&d) < 0)
1429 goto nla_put_failure;
1430
1431 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1432 return skb->len;
1433
1434 nlmsg_failure:
1435 nla_put_failure:
1436 nlmsg_trim(skb, b);
1437 return -1;
1438 }
1439
1440 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1441 struct Qdisc *q, unsigned long cl, int event)
1442 {
1443 struct sk_buff *skb;
1444 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1445
1446 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1447 if (!skb)
1448 return -ENOBUFS;
1449
1450 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1451 kfree_skb(skb);
1452 return -EINVAL;
1453 }
1454
1455 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1456 }
1457
1458 struct qdisc_dump_args
1459 {
1460 struct qdisc_walker w;
1461 struct sk_buff *skb;
1462 struct netlink_callback *cb;
1463 };
1464
1465 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1466 {
1467 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1468
1469 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1470 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1471 }
1472
1473 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1474 struct tcmsg *tcm, struct netlink_callback *cb,
1475 int *t_p, int s_t)
1476 {
1477 struct qdisc_dump_args arg;
1478
1479 if (tc_qdisc_dump_ignore(q) ||
1480 *t_p < s_t || !q->ops->cl_ops ||
1481 (tcm->tcm_parent &&
1482 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1483 (*t_p)++;
1484 return 0;
1485 }
1486 if (*t_p > s_t)
1487 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1488 arg.w.fn = qdisc_class_dump;
1489 arg.skb = skb;
1490 arg.cb = cb;
1491 arg.w.stop = 0;
1492 arg.w.skip = cb->args[1];
1493 arg.w.count = 0;
1494 q->ops->cl_ops->walk(q, &arg.w);
1495 cb->args[1] = arg.w.count;
1496 if (arg.w.stop)
1497 return -1;
1498 (*t_p)++;
1499 return 0;
1500 }
1501
1502 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1503 struct tcmsg *tcm, struct netlink_callback *cb,
1504 int *t_p, int s_t)
1505 {
1506 struct Qdisc *q;
1507
1508 if (!root)
1509 return 0;
1510
1511 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1512 return -1;
1513
1514 list_for_each_entry(q, &root->list, list) {
1515 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1516 return -1;
1517 }
1518
1519 return 0;
1520 }
1521
1522 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1523 {
1524 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1525 struct net *net = sock_net(skb->sk);
1526 struct netdev_queue *dev_queue;
1527 struct net_device *dev;
1528 int t, s_t;
1529
1530 if (net != &init_net)
1531 return 0;
1532
1533 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1534 return 0;
1535 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1536 return 0;
1537
1538 s_t = cb->args[0];
1539 t = 0;
1540
1541 dev_queue = netdev_get_tx_queue(dev, 0);
1542 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
1543 goto done;
1544
1545 dev_queue = &dev->rx_queue;
1546 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
1547 goto done;
1548
1549 done:
1550 cb->args[0] = t;
1551
1552 dev_put(dev);
1553 return skb->len;
1554 }
1555
1556 /* Main classifier routine: scans classifier chain attached
1557 to this qdisc, (optionally) tests for protocol and asks
1558 specific classifiers.
1559 */
1560 int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1561 struct tcf_result *res)
1562 {
1563 __be16 protocol = skb->protocol;
1564 int err = 0;
1565
1566 for (; tp; tp = tp->next) {
1567 if ((tp->protocol == protocol ||
1568 tp->protocol == htons(ETH_P_ALL)) &&
1569 (err = tp->classify(skb, tp, res)) >= 0) {
1570 #ifdef CONFIG_NET_CLS_ACT
1571 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1572 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1573 #endif
1574 return err;
1575 }
1576 }
1577 return -1;
1578 }
1579 EXPORT_SYMBOL(tc_classify_compat);
1580
1581 int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
1582 struct tcf_result *res)
1583 {
1584 int err = 0;
1585 __be16 protocol;
1586 #ifdef CONFIG_NET_CLS_ACT
1587 struct tcf_proto *otp = tp;
1588 reclassify:
1589 #endif
1590 protocol = skb->protocol;
1591
1592 err = tc_classify_compat(skb, tp, res);
1593 #ifdef CONFIG_NET_CLS_ACT
1594 if (err == TC_ACT_RECLASSIFY) {
1595 u32 verd = G_TC_VERD(skb->tc_verd);
1596 tp = otp;
1597
1598 if (verd++ >= MAX_REC_LOOP) {
1599 printk("rule prio %u protocol %02x reclassify loop, "
1600 "packet dropped\n",
1601 tp->prio&0xffff, ntohs(tp->protocol));
1602 return TC_ACT_SHOT;
1603 }
1604 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1605 goto reclassify;
1606 }
1607 #endif
1608 return err;
1609 }
1610 EXPORT_SYMBOL(tc_classify);
1611
1612 void tcf_destroy(struct tcf_proto *tp)
1613 {
1614 tp->ops->destroy(tp);
1615 module_put(tp->ops->owner);
1616 kfree(tp);
1617 }
1618
1619 void tcf_destroy_chain(struct tcf_proto **fl)
1620 {
1621 struct tcf_proto *tp;
1622
1623 while ((tp = *fl) != NULL) {
1624 *fl = tp->next;
1625 tcf_destroy(tp);
1626 }
1627 }
1628 EXPORT_SYMBOL(tcf_destroy_chain);
1629
1630 #ifdef CONFIG_PROC_FS
1631 static int psched_show(struct seq_file *seq, void *v)
1632 {
1633 struct timespec ts;
1634
1635 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
1636 seq_printf(seq, "%08x %08x %08x %08x\n",
1637 (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
1638 1000000,
1639 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
1640
1641 return 0;
1642 }
1643
1644 static int psched_open(struct inode *inode, struct file *file)
1645 {
1646 return single_open(file, psched_show, PDE(inode)->data);
1647 }
1648
1649 static const struct file_operations psched_fops = {
1650 .owner = THIS_MODULE,
1651 .open = psched_open,
1652 .read = seq_read,
1653 .llseek = seq_lseek,
1654 .release = single_release,
1655 };
1656 #endif
1657
1658 static int __init pktsched_init(void)
1659 {
1660 register_qdisc(&pfifo_qdisc_ops);
1661 register_qdisc(&bfifo_qdisc_ops);
1662 proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
1663
1664 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1665 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1666 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1667 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1668 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1669 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1670
1671 return 0;
1672 }
1673
1674 subsys_initcall(pktsched_init);