2 * net/sched/sch_api.c Packet scheduler API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
30 #include <linux/lockdep.h>
31 #include <linux/slab.h>
33 #include <net/net_namespace.h>
35 #include <net/netlink.h>
36 #include <net/pkt_sched.h>
38 static int qdisc_notify(struct net
*net
, struct sk_buff
*oskb
,
39 struct nlmsghdr
*n
, u32 clid
,
40 struct Qdisc
*old
, struct Qdisc
*new);
41 static int tclass_notify(struct net
*net
, struct sk_buff
*oskb
,
42 struct nlmsghdr
*n
, struct Qdisc
*q
,
43 unsigned long cl
, int event
);
50 This file consists of two interrelated parts:
52 1. queueing disciplines manager frontend.
53 2. traffic classes manager frontend.
55 Generally, queueing discipline ("qdisc") is a black box,
56 which is able to enqueue packets and to dequeue them (when
57 device is ready to send something) in order and at times
58 determined by algorithm hidden in it.
60 qdisc's are divided to two categories:
61 - "queues", which have no internal structure visible from outside.
62 - "schedulers", which split all the packets to "traffic classes",
63 using "packet classifiers" (look at cls_api.c)
65 In turn, classes may have child qdiscs (as rule, queues)
66 attached to them etc. etc. etc.
68 The goal of the routines in this file is to translate
69 information supplied by user in the form of handles
70 to more intelligible for kernel form, to make some sanity
71 checks and part of work, which is common to all qdiscs
72 and to provide rtnetlink notifications.
74 All real intelligent work is done inside qdisc modules.
78 Every discipline has two major routines: enqueue and dequeue.
82 dequeue usually returns a skb to send. It is allowed to return NULL,
83 but it does not mean that queue is empty, it just means that
84 discipline does not want to send anything this time.
85 Queue is really empty if q->q.qlen == 0.
86 For complicated disciplines with multiple queues q->q is not
87 real packet queue, but however q->q.qlen must be valid.
91 enqueue returns 0, if packet was enqueued successfully.
92 If packet (this one or another one) was dropped, it returns
94 NET_XMIT_DROP - this packet dropped
95 Expected action: do not backoff, but wait until queue will clear.
96 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
97 Expected action: backoff or ignore
98 NET_XMIT_POLICED - dropped by police.
99 Expected action: backoff or error to real-time apps.
105 like dequeue but without removing a packet from the queue
109 returns qdisc to initial state: purge all buffers, clear all
110 timers, counters (except for statistics) etc.
114 initializes newly created qdisc.
118 destroys resources allocated by init and during lifetime of qdisc.
122 changes qdisc parameters.
125 /* Protects list of registered TC modules. It is pure SMP lock. */
126 static DEFINE_RWLOCK(qdisc_mod_lock
);
129 /************************************************
130 * Queueing disciplines manipulation. *
131 ************************************************/
134 /* The list of all installed queueing disciplines. */
136 static struct Qdisc_ops
*qdisc_base
;
138 /* Register/unregister queueing discipline */
140 int register_qdisc(struct Qdisc_ops
*qops
)
142 struct Qdisc_ops
*q
, **qp
;
145 write_lock(&qdisc_mod_lock
);
146 for (qp
= &qdisc_base
; (q
= *qp
) != NULL
; qp
= &q
->next
)
147 if (!strcmp(qops
->id
, q
->id
))
150 if (qops
->enqueue
== NULL
)
151 qops
->enqueue
= noop_qdisc_ops
.enqueue
;
152 if (qops
->peek
== NULL
) {
153 if (qops
->dequeue
== NULL
)
154 qops
->peek
= noop_qdisc_ops
.peek
;
158 if (qops
->dequeue
== NULL
)
159 qops
->dequeue
= noop_qdisc_ops
.dequeue
;
162 const struct Qdisc_class_ops
*cops
= qops
->cl_ops
;
164 if (!(cops
->get
&& cops
->put
&& cops
->walk
&& cops
->leaf
))
167 if (cops
->tcf_chain
&& !(cops
->bind_tcf
&& cops
->unbind_tcf
))
175 write_unlock(&qdisc_mod_lock
);
182 EXPORT_SYMBOL(register_qdisc
);
184 int unregister_qdisc(struct Qdisc_ops
*qops
)
186 struct Qdisc_ops
*q
, **qp
;
189 write_lock(&qdisc_mod_lock
);
190 for (qp
= &qdisc_base
; (q
= *qp
) != NULL
; qp
= &q
->next
)
198 write_unlock(&qdisc_mod_lock
);
201 EXPORT_SYMBOL(unregister_qdisc
);
203 /* Get default qdisc if not otherwise specified */
204 void qdisc_get_default(char *name
, size_t len
)
206 read_lock(&qdisc_mod_lock
);
207 strlcpy(name
, default_qdisc_ops
->id
, len
);
208 read_unlock(&qdisc_mod_lock
);
211 static struct Qdisc_ops
*qdisc_lookup_default(const char *name
)
213 struct Qdisc_ops
*q
= NULL
;
215 for (q
= qdisc_base
; q
; q
= q
->next
) {
216 if (!strcmp(name
, q
->id
)) {
217 if (!try_module_get(q
->owner
))
226 /* Set new default qdisc to use */
227 int qdisc_set_default(const char *name
)
229 const struct Qdisc_ops
*ops
;
231 if (!capable(CAP_NET_ADMIN
))
234 write_lock(&qdisc_mod_lock
);
235 ops
= qdisc_lookup_default(name
);
237 /* Not found, drop lock and try to load module */
238 write_unlock(&qdisc_mod_lock
);
239 request_module("sch_%s", name
);
240 write_lock(&qdisc_mod_lock
);
242 ops
= qdisc_lookup_default(name
);
246 /* Set new default */
247 module_put(default_qdisc_ops
->owner
);
248 default_qdisc_ops
= ops
;
250 write_unlock(&qdisc_mod_lock
);
252 return ops
? 0 : -ENOENT
;
255 /* We know handle. Find qdisc among all qdisc's attached to device
256 (root qdisc, all its children, children of children etc.)
259 static struct Qdisc
*qdisc_match_from_root(struct Qdisc
*root
, u32 handle
)
263 if (!(root
->flags
& TCQ_F_BUILTIN
) &&
264 root
->handle
== handle
)
267 list_for_each_entry(q
, &root
->list
, list
) {
268 if (q
->handle
== handle
)
274 void qdisc_list_add(struct Qdisc
*q
)
276 struct Qdisc
*root
= qdisc_dev(q
)->qdisc
;
278 WARN_ON_ONCE(root
== &noop_qdisc
);
279 if ((q
->parent
!= TC_H_ROOT
) && !(q
->flags
& TCQ_F_INGRESS
))
280 list_add_tail(&q
->list
, &root
->list
);
282 EXPORT_SYMBOL(qdisc_list_add
);
284 void qdisc_list_del(struct Qdisc
*q
)
286 if ((q
->parent
!= TC_H_ROOT
) && !(q
->flags
& TCQ_F_INGRESS
))
289 EXPORT_SYMBOL(qdisc_list_del
);
291 struct Qdisc
*qdisc_lookup(struct net_device
*dev
, u32 handle
)
295 q
= qdisc_match_from_root(dev
->qdisc
, handle
);
299 if (dev_ingress_queue(dev
))
300 q
= qdisc_match_from_root(
301 dev_ingress_queue(dev
)->qdisc_sleeping
,
307 static struct Qdisc
*qdisc_leaf(struct Qdisc
*p
, u32 classid
)
311 const struct Qdisc_class_ops
*cops
= p
->ops
->cl_ops
;
315 cl
= cops
->get(p
, classid
);
319 leaf
= cops
->leaf(p
, cl
);
324 /* Find queueing discipline by name */
326 static struct Qdisc_ops
*qdisc_lookup_ops(struct nlattr
*kind
)
328 struct Qdisc_ops
*q
= NULL
;
331 read_lock(&qdisc_mod_lock
);
332 for (q
= qdisc_base
; q
; q
= q
->next
) {
333 if (nla_strcmp(kind
, q
->id
) == 0) {
334 if (!try_module_get(q
->owner
))
339 read_unlock(&qdisc_mod_lock
);
344 /* The linklayer setting were not transferred from iproute2, in older
345 * versions, and the rate tables lookup systems have been dropped in
346 * the kernel. To keep backward compatible with older iproute2 tc
347 * utils, we detect the linklayer setting by detecting if the rate
348 * table were modified.
350 * For linklayer ATM table entries, the rate table will be aligned to
351 * 48 bytes, thus some table entries will contain the same value. The
352 * mpu (min packet unit) is also encoded into the old rate table, thus
353 * starting from the mpu, we find low and high table entries for
354 * mapping this cell. If these entries contain the same value, when
355 * the rate tables have been modified for linklayer ATM.
357 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
358 * and then roundup to the next cell, calc the table entry one below,
361 static __u8
__detect_linklayer(struct tc_ratespec
*r
, __u32
*rtab
)
363 int low
= roundup(r
->mpu
, 48);
364 int high
= roundup(low
+1, 48);
365 int cell_low
= low
>> r
->cell_log
;
366 int cell_high
= (high
>> r
->cell_log
) - 1;
368 /* rtab is too inaccurate at rates > 100Mbit/s */
369 if ((r
->rate
> (100000000/8)) || (rtab
[0] == 0)) {
370 pr_debug("TC linklayer: Giving up ATM detection\n");
371 return TC_LINKLAYER_ETHERNET
;
374 if ((cell_high
> cell_low
) && (cell_high
< 256)
375 && (rtab
[cell_low
] == rtab
[cell_high
])) {
376 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
377 cell_low
, cell_high
, rtab
[cell_high
]);
378 return TC_LINKLAYER_ATM
;
380 return TC_LINKLAYER_ETHERNET
;
383 static struct qdisc_rate_table
*qdisc_rtab_list
;
385 struct qdisc_rate_table
*qdisc_get_rtab(struct tc_ratespec
*r
, struct nlattr
*tab
)
387 struct qdisc_rate_table
*rtab
;
389 if (tab
== NULL
|| r
->rate
== 0 || r
->cell_log
== 0 ||
390 nla_len(tab
) != TC_RTAB_SIZE
)
393 for (rtab
= qdisc_rtab_list
; rtab
; rtab
= rtab
->next
) {
394 if (!memcmp(&rtab
->rate
, r
, sizeof(struct tc_ratespec
)) &&
395 !memcmp(&rtab
->data
, nla_data(tab
), 1024)) {
401 rtab
= kmalloc(sizeof(*rtab
), GFP_KERNEL
);
405 memcpy(rtab
->data
, nla_data(tab
), 1024);
406 if (r
->linklayer
== TC_LINKLAYER_UNAWARE
)
407 r
->linklayer
= __detect_linklayer(r
, rtab
->data
);
408 rtab
->next
= qdisc_rtab_list
;
409 qdisc_rtab_list
= rtab
;
413 EXPORT_SYMBOL(qdisc_get_rtab
);
415 void qdisc_put_rtab(struct qdisc_rate_table
*tab
)
417 struct qdisc_rate_table
*rtab
, **rtabp
;
419 if (!tab
|| --tab
->refcnt
)
422 for (rtabp
= &qdisc_rtab_list
;
423 (rtab
= *rtabp
) != NULL
;
424 rtabp
= &rtab
->next
) {
432 EXPORT_SYMBOL(qdisc_put_rtab
);
434 static LIST_HEAD(qdisc_stab_list
);
435 static DEFINE_SPINLOCK(qdisc_stab_lock
);
437 static const struct nla_policy stab_policy
[TCA_STAB_MAX
+ 1] = {
438 [TCA_STAB_BASE
] = { .len
= sizeof(struct tc_sizespec
) },
439 [TCA_STAB_DATA
] = { .type
= NLA_BINARY
},
442 static struct qdisc_size_table
*qdisc_get_stab(struct nlattr
*opt
)
444 struct nlattr
*tb
[TCA_STAB_MAX
+ 1];
445 struct qdisc_size_table
*stab
;
446 struct tc_sizespec
*s
;
447 unsigned int tsize
= 0;
451 err
= nla_parse_nested(tb
, TCA_STAB_MAX
, opt
, stab_policy
);
454 if (!tb
[TCA_STAB_BASE
])
455 return ERR_PTR(-EINVAL
);
457 s
= nla_data(tb
[TCA_STAB_BASE
]);
460 if (!tb
[TCA_STAB_DATA
])
461 return ERR_PTR(-EINVAL
);
462 tab
= nla_data(tb
[TCA_STAB_DATA
]);
463 tsize
= nla_len(tb
[TCA_STAB_DATA
]) / sizeof(u16
);
466 if (tsize
!= s
->tsize
|| (!tab
&& tsize
> 0))
467 return ERR_PTR(-EINVAL
);
469 spin_lock(&qdisc_stab_lock
);
471 list_for_each_entry(stab
, &qdisc_stab_list
, list
) {
472 if (memcmp(&stab
->szopts
, s
, sizeof(*s
)))
474 if (tsize
> 0 && memcmp(stab
->data
, tab
, tsize
* sizeof(u16
)))
477 spin_unlock(&qdisc_stab_lock
);
481 spin_unlock(&qdisc_stab_lock
);
483 stab
= kmalloc(sizeof(*stab
) + tsize
* sizeof(u16
), GFP_KERNEL
);
485 return ERR_PTR(-ENOMEM
);
490 memcpy(stab
->data
, tab
, tsize
* sizeof(u16
));
492 spin_lock(&qdisc_stab_lock
);
493 list_add_tail(&stab
->list
, &qdisc_stab_list
);
494 spin_unlock(&qdisc_stab_lock
);
499 static void stab_kfree_rcu(struct rcu_head
*head
)
501 kfree(container_of(head
, struct qdisc_size_table
, rcu
));
504 void qdisc_put_stab(struct qdisc_size_table
*tab
)
509 spin_lock(&qdisc_stab_lock
);
511 if (--tab
->refcnt
== 0) {
512 list_del(&tab
->list
);
513 call_rcu_bh(&tab
->rcu
, stab_kfree_rcu
);
516 spin_unlock(&qdisc_stab_lock
);
518 EXPORT_SYMBOL(qdisc_put_stab
);
520 static int qdisc_dump_stab(struct sk_buff
*skb
, struct qdisc_size_table
*stab
)
524 nest
= nla_nest_start(skb
, TCA_STAB
);
526 goto nla_put_failure
;
527 if (nla_put(skb
, TCA_STAB_BASE
, sizeof(stab
->szopts
), &stab
->szopts
))
528 goto nla_put_failure
;
529 nla_nest_end(skb
, nest
);
537 void __qdisc_calculate_pkt_len(struct sk_buff
*skb
, const struct qdisc_size_table
*stab
)
541 pkt_len
= skb
->len
+ stab
->szopts
.overhead
;
542 if (unlikely(!stab
->szopts
.tsize
))
545 slot
= pkt_len
+ stab
->szopts
.cell_align
;
546 if (unlikely(slot
< 0))
549 slot
>>= stab
->szopts
.cell_log
;
550 if (likely(slot
< stab
->szopts
.tsize
))
551 pkt_len
= stab
->data
[slot
];
553 pkt_len
= stab
->data
[stab
->szopts
.tsize
- 1] *
554 (slot
/ stab
->szopts
.tsize
) +
555 stab
->data
[slot
% stab
->szopts
.tsize
];
557 pkt_len
<<= stab
->szopts
.size_log
;
559 if (unlikely(pkt_len
< 1))
561 qdisc_skb_cb(skb
)->pkt_len
= pkt_len
;
563 EXPORT_SYMBOL(__qdisc_calculate_pkt_len
);
565 void qdisc_warn_nonwc(char *txt
, struct Qdisc
*qdisc
)
567 if (!(qdisc
->flags
& TCQ_F_WARN_NONWC
)) {
568 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
569 txt
, qdisc
->ops
->id
, qdisc
->handle
>> 16);
570 qdisc
->flags
|= TCQ_F_WARN_NONWC
;
573 EXPORT_SYMBOL(qdisc_warn_nonwc
);
575 static enum hrtimer_restart
qdisc_watchdog(struct hrtimer
*timer
)
577 struct qdisc_watchdog
*wd
= container_of(timer
, struct qdisc_watchdog
,
580 qdisc_unthrottled(wd
->qdisc
);
581 __netif_schedule(qdisc_root(wd
->qdisc
));
583 return HRTIMER_NORESTART
;
586 void qdisc_watchdog_init(struct qdisc_watchdog
*wd
, struct Qdisc
*qdisc
)
588 hrtimer_init(&wd
->timer
, CLOCK_MONOTONIC
, HRTIMER_MODE_ABS
);
589 wd
->timer
.function
= qdisc_watchdog
;
592 EXPORT_SYMBOL(qdisc_watchdog_init
);
594 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog
*wd
, u64 expires
)
596 if (test_bit(__QDISC_STATE_DEACTIVATED
,
597 &qdisc_root_sleeping(wd
->qdisc
)->state
))
600 qdisc_throttled(wd
->qdisc
);
602 hrtimer_start(&wd
->timer
,
603 ns_to_ktime(expires
),
606 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns
);
608 void qdisc_watchdog_cancel(struct qdisc_watchdog
*wd
)
610 hrtimer_cancel(&wd
->timer
);
611 qdisc_unthrottled(wd
->qdisc
);
613 EXPORT_SYMBOL(qdisc_watchdog_cancel
);
615 static struct hlist_head
*qdisc_class_hash_alloc(unsigned int n
)
617 unsigned int size
= n
* sizeof(struct hlist_head
), i
;
618 struct hlist_head
*h
;
620 if (size
<= PAGE_SIZE
)
621 h
= kmalloc(size
, GFP_KERNEL
);
623 h
= (struct hlist_head
*)
624 __get_free_pages(GFP_KERNEL
, get_order(size
));
627 for (i
= 0; i
< n
; i
++)
628 INIT_HLIST_HEAD(&h
[i
]);
633 static void qdisc_class_hash_free(struct hlist_head
*h
, unsigned int n
)
635 unsigned int size
= n
* sizeof(struct hlist_head
);
637 if (size
<= PAGE_SIZE
)
640 free_pages((unsigned long)h
, get_order(size
));
643 void qdisc_class_hash_grow(struct Qdisc
*sch
, struct Qdisc_class_hash
*clhash
)
645 struct Qdisc_class_common
*cl
;
646 struct hlist_node
*next
;
647 struct hlist_head
*nhash
, *ohash
;
648 unsigned int nsize
, nmask
, osize
;
651 /* Rehash when load factor exceeds 0.75 */
652 if (clhash
->hashelems
* 4 <= clhash
->hashsize
* 3)
654 nsize
= clhash
->hashsize
* 2;
656 nhash
= qdisc_class_hash_alloc(nsize
);
660 ohash
= clhash
->hash
;
661 osize
= clhash
->hashsize
;
664 for (i
= 0; i
< osize
; i
++) {
665 hlist_for_each_entry_safe(cl
, next
, &ohash
[i
], hnode
) {
666 h
= qdisc_class_hash(cl
->classid
, nmask
);
667 hlist_add_head(&cl
->hnode
, &nhash
[h
]);
670 clhash
->hash
= nhash
;
671 clhash
->hashsize
= nsize
;
672 clhash
->hashmask
= nmask
;
673 sch_tree_unlock(sch
);
675 qdisc_class_hash_free(ohash
, osize
);
677 EXPORT_SYMBOL(qdisc_class_hash_grow
);
679 int qdisc_class_hash_init(struct Qdisc_class_hash
*clhash
)
681 unsigned int size
= 4;
683 clhash
->hash
= qdisc_class_hash_alloc(size
);
684 if (clhash
->hash
== NULL
)
686 clhash
->hashsize
= size
;
687 clhash
->hashmask
= size
- 1;
688 clhash
->hashelems
= 0;
691 EXPORT_SYMBOL(qdisc_class_hash_init
);
693 void qdisc_class_hash_destroy(struct Qdisc_class_hash
*clhash
)
695 qdisc_class_hash_free(clhash
->hash
, clhash
->hashsize
);
697 EXPORT_SYMBOL(qdisc_class_hash_destroy
);
699 void qdisc_class_hash_insert(struct Qdisc_class_hash
*clhash
,
700 struct Qdisc_class_common
*cl
)
704 INIT_HLIST_NODE(&cl
->hnode
);
705 h
= qdisc_class_hash(cl
->classid
, clhash
->hashmask
);
706 hlist_add_head(&cl
->hnode
, &clhash
->hash
[h
]);
709 EXPORT_SYMBOL(qdisc_class_hash_insert
);
711 void qdisc_class_hash_remove(struct Qdisc_class_hash
*clhash
,
712 struct Qdisc_class_common
*cl
)
714 hlist_del(&cl
->hnode
);
717 EXPORT_SYMBOL(qdisc_class_hash_remove
);
719 /* Allocate an unique handle from space managed by kernel
720 * Possible range is [8000-FFFF]:0000 (0x8000 values)
722 static u32
qdisc_alloc_handle(struct net_device
*dev
)
725 static u32 autohandle
= TC_H_MAKE(0x80000000U
, 0);
728 autohandle
+= TC_H_MAKE(0x10000U
, 0);
729 if (autohandle
== TC_H_MAKE(TC_H_ROOT
, 0))
730 autohandle
= TC_H_MAKE(0x80000000U
, 0);
731 if (!qdisc_lookup(dev
, autohandle
))
739 void qdisc_tree_decrease_qlen(struct Qdisc
*sch
, unsigned int n
)
741 const struct Qdisc_class_ops
*cops
;
748 drops
= max_t(int, n
, 0);
749 while ((parentid
= sch
->parent
)) {
750 if (TC_H_MAJ(parentid
) == TC_H_MAJ(TC_H_INGRESS
))
753 sch
= qdisc_lookup(qdisc_dev(sch
), TC_H_MAJ(parentid
));
755 WARN_ON(parentid
!= TC_H_ROOT
);
758 cops
= sch
->ops
->cl_ops
;
759 if (cops
->qlen_notify
) {
760 cl
= cops
->get(sch
, parentid
);
761 cops
->qlen_notify(sch
, cl
);
765 sch
->qstats
.drops
+= drops
;
768 EXPORT_SYMBOL(qdisc_tree_decrease_qlen
);
770 static void notify_and_destroy(struct net
*net
, struct sk_buff
*skb
,
771 struct nlmsghdr
*n
, u32 clid
,
772 struct Qdisc
*old
, struct Qdisc
*new)
775 qdisc_notify(net
, skb
, n
, clid
, old
, new);
781 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
784 * When appropriate send a netlink notification using 'skb'
787 * On success, destroy old qdisc.
790 static int qdisc_graft(struct net_device
*dev
, struct Qdisc
*parent
,
791 struct sk_buff
*skb
, struct nlmsghdr
*n
, u32 classid
,
792 struct Qdisc
*new, struct Qdisc
*old
)
794 struct Qdisc
*q
= old
;
795 struct net
*net
= dev_net(dev
);
798 if (parent
== NULL
) {
799 unsigned int i
, num_q
, ingress
;
802 num_q
= dev
->num_tx_queues
;
803 if ((q
&& q
->flags
& TCQ_F_INGRESS
) ||
804 (new && new->flags
& TCQ_F_INGRESS
)) {
807 if (!dev_ingress_queue(dev
))
811 if (dev
->flags
& IFF_UP
)
814 if (new && new->ops
->attach
) {
815 new->ops
->attach(new);
819 for (i
= 0; i
< num_q
; i
++) {
820 struct netdev_queue
*dev_queue
= dev_ingress_queue(dev
);
823 dev_queue
= netdev_get_tx_queue(dev
, i
);
825 old
= dev_graft_qdisc(dev_queue
, new);
827 atomic_inc(&new->refcnt
);
834 notify_and_destroy(net
, skb
, n
, classid
,
836 if (new && !new->ops
->attach
)
837 atomic_inc(&new->refcnt
);
838 dev
->qdisc
= new ? : &noop_qdisc
;
840 notify_and_destroy(net
, skb
, n
, classid
, old
, new);
843 if (dev
->flags
& IFF_UP
)
846 const struct Qdisc_class_ops
*cops
= parent
->ops
->cl_ops
;
849 if (cops
&& cops
->graft
) {
850 unsigned long cl
= cops
->get(parent
, classid
);
852 err
= cops
->graft(parent
, cl
, new, &old
);
853 cops
->put(parent
, cl
);
858 notify_and_destroy(net
, skb
, n
, classid
, old
, new);
863 /* lockdep annotation is needed for ingress; egress gets it only for name */
864 static struct lock_class_key qdisc_tx_lock
;
865 static struct lock_class_key qdisc_rx_lock
;
868 Allocate and initialize new qdisc.
870 Parameters are passed via opt.
873 static struct Qdisc
*
874 qdisc_create(struct net_device
*dev
, struct netdev_queue
*dev_queue
,
875 struct Qdisc
*p
, u32 parent
, u32 handle
,
876 struct nlattr
**tca
, int *errp
)
879 struct nlattr
*kind
= tca
[TCA_KIND
];
881 struct Qdisc_ops
*ops
;
882 struct qdisc_size_table
*stab
;
884 ops
= qdisc_lookup_ops(kind
);
885 #ifdef CONFIG_MODULES
886 if (ops
== NULL
&& kind
!= NULL
) {
888 if (nla_strlcpy(name
, kind
, IFNAMSIZ
) < IFNAMSIZ
) {
889 /* We dropped the RTNL semaphore in order to
890 * perform the module load. So, even if we
891 * succeeded in loading the module we have to
892 * tell the caller to replay the request. We
893 * indicate this using -EAGAIN.
894 * We replay the request because the device may
895 * go away in the mean time.
898 request_module("sch_%s", name
);
900 ops
= qdisc_lookup_ops(kind
);
902 /* We will try again qdisc_lookup_ops,
903 * so don't keep a reference.
905 module_put(ops
->owner
);
917 sch
= qdisc_alloc(dev_queue
, ops
);
923 sch
->parent
= parent
;
925 if (handle
== TC_H_INGRESS
) {
926 sch
->flags
|= TCQ_F_INGRESS
;
927 handle
= TC_H_MAKE(TC_H_INGRESS
, 0);
928 lockdep_set_class(qdisc_lock(sch
), &qdisc_rx_lock
);
931 handle
= qdisc_alloc_handle(dev
);
936 lockdep_set_class(qdisc_lock(sch
), &qdisc_tx_lock
);
937 if (!netif_is_multiqueue(dev
))
938 sch
->flags
|= TCQ_F_ONETXQUEUE
;
941 sch
->handle
= handle
;
943 if (!ops
->init
|| (err
= ops
->init(sch
, tca
[TCA_OPTIONS
])) == 0) {
945 stab
= qdisc_get_stab(tca
[TCA_STAB
]);
950 rcu_assign_pointer(sch
->stab
, stab
);
953 spinlock_t
*root_lock
;
956 if (sch
->flags
& TCQ_F_MQROOT
)
959 if ((sch
->parent
!= TC_H_ROOT
) &&
960 !(sch
->flags
& TCQ_F_INGRESS
) &&
961 (!p
|| !(p
->flags
& TCQ_F_MQROOT
)))
962 root_lock
= qdisc_root_sleeping_lock(sch
);
964 root_lock
= qdisc_lock(sch
);
966 err
= gen_new_estimator(&sch
->bstats
, &sch
->rate_est
,
967 root_lock
, tca
[TCA_RATE
]);
978 kfree((char *) sch
- sch
->padded
);
980 module_put(ops
->owner
);
987 * Any broken qdiscs that would require a ops->reset() here?
988 * The qdisc was never in action so it shouldn't be necessary.
990 qdisc_put_stab(rtnl_dereference(sch
->stab
));
996 static int qdisc_change(struct Qdisc
*sch
, struct nlattr
**tca
)
998 struct qdisc_size_table
*ostab
, *stab
= NULL
;
1001 if (tca
[TCA_OPTIONS
]) {
1002 if (sch
->ops
->change
== NULL
)
1004 err
= sch
->ops
->change(sch
, tca
[TCA_OPTIONS
]);
1009 if (tca
[TCA_STAB
]) {
1010 stab
= qdisc_get_stab(tca
[TCA_STAB
]);
1012 return PTR_ERR(stab
);
1015 ostab
= rtnl_dereference(sch
->stab
);
1016 rcu_assign_pointer(sch
->stab
, stab
);
1017 qdisc_put_stab(ostab
);
1019 if (tca
[TCA_RATE
]) {
1020 /* NB: ignores errors from replace_estimator
1021 because change can't be undone. */
1022 if (sch
->flags
& TCQ_F_MQROOT
)
1024 gen_replace_estimator(&sch
->bstats
, &sch
->rate_est
,
1025 qdisc_root_sleeping_lock(sch
),
1032 struct check_loop_arg
{
1033 struct qdisc_walker w
;
1038 static int check_loop_fn(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*w
);
1040 static int check_loop(struct Qdisc
*q
, struct Qdisc
*p
, int depth
)
1042 struct check_loop_arg arg
;
1044 if (q
->ops
->cl_ops
== NULL
)
1047 arg
.w
.stop
= arg
.w
.skip
= arg
.w
.count
= 0;
1048 arg
.w
.fn
= check_loop_fn
;
1051 q
->ops
->cl_ops
->walk(q
, &arg
.w
);
1052 return arg
.w
.stop
? -ELOOP
: 0;
1056 check_loop_fn(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*w
)
1059 const struct Qdisc_class_ops
*cops
= q
->ops
->cl_ops
;
1060 struct check_loop_arg
*arg
= (struct check_loop_arg
*)w
;
1062 leaf
= cops
->leaf(q
, cl
);
1064 if (leaf
== arg
->p
|| arg
->depth
> 7)
1066 return check_loop(leaf
, arg
->p
, arg
->depth
+ 1);
1075 static int tc_get_qdisc(struct sk_buff
*skb
, struct nlmsghdr
*n
)
1077 struct net
*net
= sock_net(skb
->sk
);
1078 struct tcmsg
*tcm
= nlmsg_data(n
);
1079 struct nlattr
*tca
[TCA_MAX
+ 1];
1080 struct net_device
*dev
;
1082 struct Qdisc
*q
= NULL
;
1083 struct Qdisc
*p
= NULL
;
1086 if ((n
->nlmsg_type
!= RTM_GETQDISC
) && !capable(CAP_NET_ADMIN
))
1089 err
= nlmsg_parse(n
, sizeof(*tcm
), tca
, TCA_MAX
, NULL
);
1093 dev
= __dev_get_by_index(net
, tcm
->tcm_ifindex
);
1097 clid
= tcm
->tcm_parent
;
1099 if (clid
!= TC_H_ROOT
) {
1100 if (TC_H_MAJ(clid
) != TC_H_MAJ(TC_H_INGRESS
)) {
1101 p
= qdisc_lookup(dev
, TC_H_MAJ(clid
));
1104 q
= qdisc_leaf(p
, clid
);
1105 } else if (dev_ingress_queue(dev
)) {
1106 q
= dev_ingress_queue(dev
)->qdisc_sleeping
;
1114 if (tcm
->tcm_handle
&& q
->handle
!= tcm
->tcm_handle
)
1117 q
= qdisc_lookup(dev
, tcm
->tcm_handle
);
1122 if (tca
[TCA_KIND
] && nla_strcmp(tca
[TCA_KIND
], q
->ops
->id
))
1125 if (n
->nlmsg_type
== RTM_DELQDISC
) {
1130 err
= qdisc_graft(dev
, p
, skb
, n
, clid
, NULL
, q
);
1134 qdisc_notify(net
, skb
, n
, clid
, NULL
, q
);
1140 * Create/change qdisc.
1143 static int tc_modify_qdisc(struct sk_buff
*skb
, struct nlmsghdr
*n
)
1145 struct net
*net
= sock_net(skb
->sk
);
1147 struct nlattr
*tca
[TCA_MAX
+ 1];
1148 struct net_device
*dev
;
1150 struct Qdisc
*q
, *p
;
1153 if (!capable(CAP_NET_ADMIN
))
1157 /* Reinit, just in case something touches this. */
1158 err
= nlmsg_parse(n
, sizeof(*tcm
), tca
, TCA_MAX
, NULL
);
1162 tcm
= nlmsg_data(n
);
1163 clid
= tcm
->tcm_parent
;
1166 dev
= __dev_get_by_index(net
, tcm
->tcm_ifindex
);
1172 if (clid
!= TC_H_ROOT
) {
1173 if (clid
!= TC_H_INGRESS
) {
1174 p
= qdisc_lookup(dev
, TC_H_MAJ(clid
));
1177 q
= qdisc_leaf(p
, clid
);
1178 } else if (dev_ingress_queue_create(dev
)) {
1179 q
= dev_ingress_queue(dev
)->qdisc_sleeping
;
1185 /* It may be default qdisc, ignore it */
1186 if (q
&& q
->handle
== 0)
1189 if (!q
|| !tcm
->tcm_handle
|| q
->handle
!= tcm
->tcm_handle
) {
1190 if (tcm
->tcm_handle
) {
1191 if (q
&& !(n
->nlmsg_flags
& NLM_F_REPLACE
))
1193 if (TC_H_MIN(tcm
->tcm_handle
))
1195 q
= qdisc_lookup(dev
, tcm
->tcm_handle
);
1197 goto create_n_graft
;
1198 if (n
->nlmsg_flags
& NLM_F_EXCL
)
1200 if (tca
[TCA_KIND
] && nla_strcmp(tca
[TCA_KIND
], q
->ops
->id
))
1203 (p
&& check_loop(q
, p
, 0)))
1205 atomic_inc(&q
->refcnt
);
1209 goto create_n_graft
;
1211 /* This magic test requires explanation.
1213 * We know, that some child q is already
1214 * attached to this parent and have choice:
1215 * either to change it or to create/graft new one.
1217 * 1. We are allowed to create/graft only
1218 * if CREATE and REPLACE flags are set.
1220 * 2. If EXCL is set, requestor wanted to say,
1221 * that qdisc tcm_handle is not expected
1222 * to exist, so that we choose create/graft too.
1224 * 3. The last case is when no flags are set.
1225 * Alas, it is sort of hole in API, we
1226 * cannot decide what to do unambiguously.
1227 * For now we select create/graft, if
1228 * user gave KIND, which does not match existing.
1230 if ((n
->nlmsg_flags
& NLM_F_CREATE
) &&
1231 (n
->nlmsg_flags
& NLM_F_REPLACE
) &&
1232 ((n
->nlmsg_flags
& NLM_F_EXCL
) ||
1234 nla_strcmp(tca
[TCA_KIND
], q
->ops
->id
))))
1235 goto create_n_graft
;
1239 if (!tcm
->tcm_handle
)
1241 q
= qdisc_lookup(dev
, tcm
->tcm_handle
);
1244 /* Change qdisc parameters */
1247 if (n
->nlmsg_flags
& NLM_F_EXCL
)
1249 if (tca
[TCA_KIND
] && nla_strcmp(tca
[TCA_KIND
], q
->ops
->id
))
1251 err
= qdisc_change(q
, tca
);
1253 qdisc_notify(net
, skb
, n
, clid
, NULL
, q
);
1257 if (!(n
->nlmsg_flags
& NLM_F_CREATE
))
1259 if (clid
== TC_H_INGRESS
) {
1260 if (dev_ingress_queue(dev
))
1261 q
= qdisc_create(dev
, dev_ingress_queue(dev
), p
,
1262 tcm
->tcm_parent
, tcm
->tcm_parent
,
1267 struct netdev_queue
*dev_queue
;
1269 if (p
&& p
->ops
->cl_ops
&& p
->ops
->cl_ops
->select_queue
)
1270 dev_queue
= p
->ops
->cl_ops
->select_queue(p
, tcm
);
1272 dev_queue
= p
->dev_queue
;
1274 dev_queue
= netdev_get_tx_queue(dev
, 0);
1276 q
= qdisc_create(dev
, dev_queue
, p
,
1277 tcm
->tcm_parent
, tcm
->tcm_handle
,
1287 err
= qdisc_graft(dev
, p
, skb
, n
, clid
, q
, NULL
);
1297 static int tc_fill_qdisc(struct sk_buff
*skb
, struct Qdisc
*q
, u32 clid
,
1298 u32 portid
, u32 seq
, u16 flags
, int event
)
1301 struct nlmsghdr
*nlh
;
1302 unsigned char *b
= skb_tail_pointer(skb
);
1304 struct qdisc_size_table
*stab
;
1306 nlh
= nlmsg_put(skb
, portid
, seq
, event
, sizeof(*tcm
), flags
);
1308 goto out_nlmsg_trim
;
1309 tcm
= nlmsg_data(nlh
);
1310 tcm
->tcm_family
= AF_UNSPEC
;
1313 tcm
->tcm_ifindex
= qdisc_dev(q
)->ifindex
;
1314 tcm
->tcm_parent
= clid
;
1315 tcm
->tcm_handle
= q
->handle
;
1316 tcm
->tcm_info
= atomic_read(&q
->refcnt
);
1317 if (nla_put_string(skb
, TCA_KIND
, q
->ops
->id
))
1318 goto nla_put_failure
;
1319 if (q
->ops
->dump
&& q
->ops
->dump(q
, skb
) < 0)
1320 goto nla_put_failure
;
1321 q
->qstats
.qlen
= q
->q
.qlen
;
1323 stab
= rtnl_dereference(q
->stab
);
1324 if (stab
&& qdisc_dump_stab(skb
, stab
) < 0)
1325 goto nla_put_failure
;
1327 if (gnet_stats_start_copy_compat(skb
, TCA_STATS2
, TCA_STATS
, TCA_XSTATS
,
1328 qdisc_root_sleeping_lock(q
), &d
) < 0)
1329 goto nla_put_failure
;
1331 if (q
->ops
->dump_stats
&& q
->ops
->dump_stats(q
, &d
) < 0)
1332 goto nla_put_failure
;
1334 if (gnet_stats_copy_basic(&d
, &q
->bstats
) < 0 ||
1335 gnet_stats_copy_rate_est(&d
, &q
->bstats
, &q
->rate_est
) < 0 ||
1336 gnet_stats_copy_queue(&d
, &q
->qstats
) < 0)
1337 goto nla_put_failure
;
1339 if (gnet_stats_finish_copy(&d
) < 0)
1340 goto nla_put_failure
;
1342 nlh
->nlmsg_len
= skb_tail_pointer(skb
) - b
;
1351 static bool tc_qdisc_dump_ignore(struct Qdisc
*q
)
1353 return (q
->flags
& TCQ_F_BUILTIN
) ? true : false;
1356 static int qdisc_notify(struct net
*net
, struct sk_buff
*oskb
,
1357 struct nlmsghdr
*n
, u32 clid
,
1358 struct Qdisc
*old
, struct Qdisc
*new)
1360 struct sk_buff
*skb
;
1361 u32 portid
= oskb
? NETLINK_CB(oskb
).portid
: 0;
1363 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
1367 if (old
&& !tc_qdisc_dump_ignore(old
)) {
1368 if (tc_fill_qdisc(skb
, old
, clid
, portid
, n
->nlmsg_seq
,
1369 0, RTM_DELQDISC
) < 0)
1372 if (new && !tc_qdisc_dump_ignore(new)) {
1373 if (tc_fill_qdisc(skb
, new, clid
, portid
, n
->nlmsg_seq
,
1374 old
? NLM_F_REPLACE
: 0, RTM_NEWQDISC
) < 0)
1379 return rtnetlink_send(skb
, net
, portid
, RTNLGRP_TC
,
1380 n
->nlmsg_flags
& NLM_F_ECHO
);
1387 static int tc_dump_qdisc_root(struct Qdisc
*root
, struct sk_buff
*skb
,
1388 struct netlink_callback
*cb
,
1389 int *q_idx_p
, int s_q_idx
)
1391 int ret
= 0, q_idx
= *q_idx_p
;
1398 if (q_idx
< s_q_idx
) {
1401 if (!tc_qdisc_dump_ignore(q
) &&
1402 tc_fill_qdisc(skb
, q
, q
->parent
, NETLINK_CB(cb
->skb
).portid
,
1403 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
, RTM_NEWQDISC
) <= 0)
1407 list_for_each_entry(q
, &root
->list
, list
) {
1408 if (q_idx
< s_q_idx
) {
1412 if (!tc_qdisc_dump_ignore(q
) &&
1413 tc_fill_qdisc(skb
, q
, q
->parent
, NETLINK_CB(cb
->skb
).portid
,
1414 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
, RTM_NEWQDISC
) <= 0)
1427 static int tc_dump_qdisc(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1429 struct net
*net
= sock_net(skb
->sk
);
1432 struct net_device
*dev
;
1434 s_idx
= cb
->args
[0];
1435 s_q_idx
= q_idx
= cb
->args
[1];
1439 for_each_netdev(net
, dev
) {
1440 struct netdev_queue
*dev_queue
;
1448 if (tc_dump_qdisc_root(dev
->qdisc
, skb
, cb
, &q_idx
, s_q_idx
) < 0)
1451 dev_queue
= dev_ingress_queue(dev
);
1453 tc_dump_qdisc_root(dev_queue
->qdisc_sleeping
, skb
, cb
,
1454 &q_idx
, s_q_idx
) < 0)
1463 cb
->args
[1] = q_idx
;
1470 /************************************************
1471 * Traffic classes manipulation. *
1472 ************************************************/
1476 static int tc_ctl_tclass(struct sk_buff
*skb
, struct nlmsghdr
*n
)
1478 struct net
*net
= sock_net(skb
->sk
);
1479 struct tcmsg
*tcm
= nlmsg_data(n
);
1480 struct nlattr
*tca
[TCA_MAX
+ 1];
1481 struct net_device
*dev
;
1482 struct Qdisc
*q
= NULL
;
1483 const struct Qdisc_class_ops
*cops
;
1484 unsigned long cl
= 0;
1485 unsigned long new_cl
;
1491 if ((n
->nlmsg_type
!= RTM_GETTCLASS
) && !capable(CAP_NET_ADMIN
))
1494 err
= nlmsg_parse(n
, sizeof(*tcm
), tca
, TCA_MAX
, NULL
);
1498 dev
= __dev_get_by_index(net
, tcm
->tcm_ifindex
);
1503 parent == TC_H_UNSPEC - unspecified parent.
1504 parent == TC_H_ROOT - class is root, which has no parent.
1505 parent == X:0 - parent is root class.
1506 parent == X:Y - parent is a node in hierarchy.
1507 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1509 handle == 0:0 - generate handle from kernel pool.
1510 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1511 handle == X:Y - clear.
1512 handle == X:0 - root class.
1515 /* Step 1. Determine qdisc handle X:0 */
1517 portid
= tcm
->tcm_parent
;
1518 clid
= tcm
->tcm_handle
;
1519 qid
= TC_H_MAJ(clid
);
1521 if (portid
!= TC_H_ROOT
) {
1522 u32 qid1
= TC_H_MAJ(portid
);
1525 /* If both majors are known, they must be identical. */
1530 } else if (qid
== 0)
1531 qid
= dev
->qdisc
->handle
;
1533 /* Now qid is genuine qdisc handle consistent
1534 * both with parent and child.
1536 * TC_H_MAJ(portid) still may be unspecified, complete it now.
1539 portid
= TC_H_MAKE(qid
, portid
);
1542 qid
= dev
->qdisc
->handle
;
1545 /* OK. Locate qdisc */
1546 q
= qdisc_lookup(dev
, qid
);
1550 /* An check that it supports classes */
1551 cops
= q
->ops
->cl_ops
;
1555 /* Now try to get class */
1557 if (portid
== TC_H_ROOT
)
1560 clid
= TC_H_MAKE(qid
, clid
);
1563 cl
= cops
->get(q
, clid
);
1567 if (n
->nlmsg_type
!= RTM_NEWTCLASS
||
1568 !(n
->nlmsg_flags
& NLM_F_CREATE
))
1571 switch (n
->nlmsg_type
) {
1574 if (n
->nlmsg_flags
& NLM_F_EXCL
)
1580 err
= cops
->delete(q
, cl
);
1582 tclass_notify(net
, skb
, n
, q
, cl
, RTM_DELTCLASS
);
1585 err
= tclass_notify(net
, skb
, n
, q
, cl
, RTM_NEWTCLASS
);
1596 err
= cops
->change(q
, clid
, portid
, tca
, &new_cl
);
1598 tclass_notify(net
, skb
, n
, q
, new_cl
, RTM_NEWTCLASS
);
1608 static int tc_fill_tclass(struct sk_buff
*skb
, struct Qdisc
*q
,
1610 u32 portid
, u32 seq
, u16 flags
, int event
)
1613 struct nlmsghdr
*nlh
;
1614 unsigned char *b
= skb_tail_pointer(skb
);
1616 const struct Qdisc_class_ops
*cl_ops
= q
->ops
->cl_ops
;
1618 nlh
= nlmsg_put(skb
, portid
, seq
, event
, sizeof(*tcm
), flags
);
1620 goto out_nlmsg_trim
;
1621 tcm
= nlmsg_data(nlh
);
1622 tcm
->tcm_family
= AF_UNSPEC
;
1625 tcm
->tcm_ifindex
= qdisc_dev(q
)->ifindex
;
1626 tcm
->tcm_parent
= q
->handle
;
1627 tcm
->tcm_handle
= q
->handle
;
1629 if (nla_put_string(skb
, TCA_KIND
, q
->ops
->id
))
1630 goto nla_put_failure
;
1631 if (cl_ops
->dump
&& cl_ops
->dump(q
, cl
, skb
, tcm
) < 0)
1632 goto nla_put_failure
;
1634 if (gnet_stats_start_copy_compat(skb
, TCA_STATS2
, TCA_STATS
, TCA_XSTATS
,
1635 qdisc_root_sleeping_lock(q
), &d
) < 0)
1636 goto nla_put_failure
;
1638 if (cl_ops
->dump_stats
&& cl_ops
->dump_stats(q
, cl
, &d
) < 0)
1639 goto nla_put_failure
;
1641 if (gnet_stats_finish_copy(&d
) < 0)
1642 goto nla_put_failure
;
1644 nlh
->nlmsg_len
= skb_tail_pointer(skb
) - b
;
1653 static int tclass_notify(struct net
*net
, struct sk_buff
*oskb
,
1654 struct nlmsghdr
*n
, struct Qdisc
*q
,
1655 unsigned long cl
, int event
)
1657 struct sk_buff
*skb
;
1658 u32 portid
= oskb
? NETLINK_CB(oskb
).portid
: 0;
1660 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
1664 if (tc_fill_tclass(skb
, q
, cl
, portid
, n
->nlmsg_seq
, 0, event
) < 0) {
1669 return rtnetlink_send(skb
, net
, portid
, RTNLGRP_TC
,
1670 n
->nlmsg_flags
& NLM_F_ECHO
);
1673 struct qdisc_dump_args
{
1674 struct qdisc_walker w
;
1675 struct sk_buff
*skb
;
1676 struct netlink_callback
*cb
;
1679 static int qdisc_class_dump(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*arg
)
1681 struct qdisc_dump_args
*a
= (struct qdisc_dump_args
*)arg
;
1683 return tc_fill_tclass(a
->skb
, q
, cl
, NETLINK_CB(a
->cb
->skb
).portid
,
1684 a
->cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
, RTM_NEWTCLASS
);
1687 static int tc_dump_tclass_qdisc(struct Qdisc
*q
, struct sk_buff
*skb
,
1688 struct tcmsg
*tcm
, struct netlink_callback
*cb
,
1691 struct qdisc_dump_args arg
;
1693 if (tc_qdisc_dump_ignore(q
) ||
1694 *t_p
< s_t
|| !q
->ops
->cl_ops
||
1696 TC_H_MAJ(tcm
->tcm_parent
) != q
->handle
)) {
1701 memset(&cb
->args
[1], 0, sizeof(cb
->args
)-sizeof(cb
->args
[0]));
1702 arg
.w
.fn
= qdisc_class_dump
;
1706 arg
.w
.skip
= cb
->args
[1];
1708 q
->ops
->cl_ops
->walk(q
, &arg
.w
);
1709 cb
->args
[1] = arg
.w
.count
;
1716 static int tc_dump_tclass_root(struct Qdisc
*root
, struct sk_buff
*skb
,
1717 struct tcmsg
*tcm
, struct netlink_callback
*cb
,
1725 if (tc_dump_tclass_qdisc(root
, skb
, tcm
, cb
, t_p
, s_t
) < 0)
1728 list_for_each_entry(q
, &root
->list
, list
) {
1729 if (tc_dump_tclass_qdisc(q
, skb
, tcm
, cb
, t_p
, s_t
) < 0)
1736 static int tc_dump_tclass(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1738 struct tcmsg
*tcm
= nlmsg_data(cb
->nlh
);
1739 struct net
*net
= sock_net(skb
->sk
);
1740 struct netdev_queue
*dev_queue
;
1741 struct net_device
*dev
;
1744 if (nlmsg_len(cb
->nlh
) < sizeof(*tcm
))
1746 dev
= dev_get_by_index(net
, tcm
->tcm_ifindex
);
1753 if (tc_dump_tclass_root(dev
->qdisc
, skb
, tcm
, cb
, &t
, s_t
) < 0)
1756 dev_queue
= dev_ingress_queue(dev
);
1758 tc_dump_tclass_root(dev_queue
->qdisc_sleeping
, skb
, tcm
, cb
,
1769 /* Main classifier routine: scans classifier chain attached
1770 * to this qdisc, (optionally) tests for protocol and asks
1771 * specific classifiers.
1773 int tc_classify_compat(struct sk_buff
*skb
, const struct tcf_proto
*tp
,
1774 struct tcf_result
*res
)
1776 __be16 protocol
= skb
->protocol
;
1779 for (; tp
; tp
= tp
->next
) {
1780 if (tp
->protocol
!= protocol
&&
1781 tp
->protocol
!= htons(ETH_P_ALL
))
1783 err
= tp
->classify(skb
, tp
, res
);
1786 #ifdef CONFIG_NET_CLS_ACT
1787 if (err
!= TC_ACT_RECLASSIFY
&& skb
->tc_verd
)
1788 skb
->tc_verd
= SET_TC_VERD(skb
->tc_verd
, 0);
1795 EXPORT_SYMBOL(tc_classify_compat
);
1797 int tc_classify(struct sk_buff
*skb
, const struct tcf_proto
*tp
,
1798 struct tcf_result
*res
)
1801 #ifdef CONFIG_NET_CLS_ACT
1802 const struct tcf_proto
*otp
= tp
;
1806 err
= tc_classify_compat(skb
, tp
, res
);
1807 #ifdef CONFIG_NET_CLS_ACT
1808 if (err
== TC_ACT_RECLASSIFY
) {
1809 u32 verd
= G_TC_VERD(skb
->tc_verd
);
1812 if (verd
++ >= MAX_REC_LOOP
) {
1813 net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x\n",
1816 ntohs(tp
->protocol
));
1819 skb
->tc_verd
= SET_TC_VERD(skb
->tc_verd
, verd
);
1825 EXPORT_SYMBOL(tc_classify
);
1827 void tcf_destroy(struct tcf_proto
*tp
)
1829 tp
->ops
->destroy(tp
);
1830 module_put(tp
->ops
->owner
);
1834 void tcf_destroy_chain(struct tcf_proto
**fl
)
1836 struct tcf_proto
*tp
;
1838 while ((tp
= *fl
) != NULL
) {
1843 EXPORT_SYMBOL(tcf_destroy_chain
);
1845 #ifdef CONFIG_PROC_FS
1846 static int psched_show(struct seq_file
*seq
, void *v
)
1850 hrtimer_get_res(CLOCK_MONOTONIC
, &ts
);
1851 seq_printf(seq
, "%08x %08x %08x %08x\n",
1852 (u32
)NSEC_PER_USEC
, (u32
)PSCHED_TICKS2NS(1),
1854 (u32
)NSEC_PER_SEC
/(u32
)ktime_to_ns(timespec_to_ktime(ts
)));
1859 static int psched_open(struct inode
*inode
, struct file
*file
)
1861 return single_open(file
, psched_show
, NULL
);
1864 static const struct file_operations psched_fops
= {
1865 .owner
= THIS_MODULE
,
1866 .open
= psched_open
,
1868 .llseek
= seq_lseek
,
1869 .release
= single_release
,
1872 static int __net_init
psched_net_init(struct net
*net
)
1874 struct proc_dir_entry
*e
;
1876 e
= proc_create("psched", 0, net
->proc_net
, &psched_fops
);
1883 static void __net_exit
psched_net_exit(struct net
*net
)
1885 remove_proc_entry("psched", net
->proc_net
);
1888 static int __net_init
psched_net_init(struct net
*net
)
1893 static void __net_exit
psched_net_exit(struct net
*net
)
1898 static struct pernet_operations psched_net_ops
= {
1899 .init
= psched_net_init
,
1900 .exit
= psched_net_exit
,
1903 static int __init
pktsched_init(void)
1907 err
= register_pernet_subsys(&psched_net_ops
);
1909 pr_err("pktsched_init: "
1910 "cannot initialize per netns operations\n");
1914 register_qdisc(&pfifo_fast_ops
);
1915 register_qdisc(&pfifo_qdisc_ops
);
1916 register_qdisc(&bfifo_qdisc_ops
);
1917 register_qdisc(&pfifo_head_drop_qdisc_ops
);
1918 register_qdisc(&mq_qdisc_ops
);
1920 rtnl_register(PF_UNSPEC
, RTM_NEWQDISC
, tc_modify_qdisc
, NULL
, NULL
);
1921 rtnl_register(PF_UNSPEC
, RTM_DELQDISC
, tc_get_qdisc
, NULL
, NULL
);
1922 rtnl_register(PF_UNSPEC
, RTM_GETQDISC
, tc_get_qdisc
, tc_dump_qdisc
, NULL
);
1923 rtnl_register(PF_UNSPEC
, RTM_NEWTCLASS
, tc_ctl_tclass
, NULL
, NULL
);
1924 rtnl_register(PF_UNSPEC
, RTM_DELTCLASS
, tc_ctl_tclass
, NULL
, NULL
);
1925 rtnl_register(PF_UNSPEC
, RTM_GETTCLASS
, tc_ctl_tclass
, tc_dump_tclass
, NULL
);
1930 subsys_initcall(pktsched_init
);