2 * net/sched/sch_mqprio.c
4 * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * version 2 as published by the Free Software Foundation.
11 #include <linux/types.h>
12 #include <linux/slab.h>
13 #include <linux/kernel.h>
14 #include <linux/string.h>
15 #include <linux/errno.h>
16 #include <linux/skbuff.h>
17 #include <linux/module.h>
18 #include <net/netlink.h>
19 #include <net/pkt_sched.h>
20 #include <net/sch_generic.h>
23 struct Qdisc
**qdiscs
;
27 static void mqprio_destroy(struct Qdisc
*sch
)
29 struct net_device
*dev
= qdisc_dev(sch
);
30 struct mqprio_sched
*priv
= qdisc_priv(sch
);
35 ntx
< dev
->num_tx_queues
&& priv
->qdiscs
[ntx
];
37 qdisc_destroy(priv
->qdiscs
[ntx
]);
41 if (priv
->hw_offload
&& dev
->netdev_ops
->ndo_setup_tc
) {
42 struct tc_mqprio_qopt offload
= { 0 };
43 struct tc_to_netdev tc
= { { .mqprio
= &offload
} };
45 dev
->netdev_ops
->ndo_setup_tc(dev
, TC_SETUP_MQPRIO
,
46 sch
->handle
, 0, 0, &tc
);
48 netdev_set_num_tc(dev
, 0);
52 static int mqprio_parse_opt(struct net_device
*dev
, struct tc_mqprio_qopt
*qopt
)
56 /* Verify num_tc is not out of max range */
57 if (qopt
->num_tc
> TC_MAX_QUEUE
)
60 /* Verify priority mapping uses valid tcs */
61 for (i
= 0; i
< TC_BITMASK
+ 1; i
++) {
62 if (qopt
->prio_tc_map
[i
] >= qopt
->num_tc
)
66 /* Limit qopt->hw to maximum supported offload value. Drivers have
67 * the option of overriding this later if they don't support the a
70 if (qopt
->hw
> TC_MQPRIO_HW_OFFLOAD_MAX
)
71 qopt
->hw
= TC_MQPRIO_HW_OFFLOAD_MAX
;
73 /* If hardware offload is requested we will leave it to the device
74 * to either populate the queue counts itself or to validate the
75 * provided queue counts. If ndo_setup_tc is not present then
76 * hardware doesn't support offload and we should return an error.
79 return dev
->netdev_ops
->ndo_setup_tc
? 0 : -EINVAL
;
81 for (i
= 0; i
< qopt
->num_tc
; i
++) {
82 unsigned int last
= qopt
->offset
[i
] + qopt
->count
[i
];
84 /* Verify the queue count is in tx range being equal to the
85 * real_num_tx_queues indicates the last queue is in use.
87 if (qopt
->offset
[i
] >= dev
->real_num_tx_queues
||
89 last
> dev
->real_num_tx_queues
)
92 /* Verify that the offset and counts do not overlap */
93 for (j
= i
+ 1; j
< qopt
->num_tc
; j
++) {
94 if (last
> qopt
->offset
[j
])
102 static int mqprio_init(struct Qdisc
*sch
, struct nlattr
*opt
)
104 struct net_device
*dev
= qdisc_dev(sch
);
105 struct mqprio_sched
*priv
= qdisc_priv(sch
);
106 struct netdev_queue
*dev_queue
;
108 int i
, err
= -EOPNOTSUPP
;
109 struct tc_mqprio_qopt
*qopt
= NULL
;
111 BUILD_BUG_ON(TC_MAX_QUEUE
!= TC_QOPT_MAX_QUEUE
);
112 BUILD_BUG_ON(TC_BITMASK
!= TC_QOPT_BITMASK
);
114 if (sch
->parent
!= TC_H_ROOT
)
117 if (!netif_is_multiqueue(dev
))
120 if (!opt
|| nla_len(opt
) < sizeof(*qopt
))
123 qopt
= nla_data(opt
);
124 if (mqprio_parse_opt(dev
, qopt
))
127 /* pre-allocate qdisc, attachment can't fail */
128 priv
->qdiscs
= kcalloc(dev
->num_tx_queues
, sizeof(priv
->qdiscs
[0]),
133 for (i
= 0; i
< dev
->num_tx_queues
; i
++) {
134 dev_queue
= netdev_get_tx_queue(dev
, i
);
135 qdisc
= qdisc_create_dflt(dev_queue
,
136 get_default_qdisc_ops(dev
, i
),
137 TC_H_MAKE(TC_H_MAJ(sch
->handle
),
142 priv
->qdiscs
[i
] = qdisc
;
143 qdisc
->flags
|= TCQ_F_ONETXQUEUE
| TCQ_F_NOPARENT
;
146 /* If the mqprio options indicate that hardware should own
147 * the queue mapping then run ndo_setup_tc otherwise use the
148 * supplied and verified mapping
151 struct tc_mqprio_qopt offload
= *qopt
;
152 struct tc_to_netdev tc
= { { .mqprio
= &offload
} };
154 err
= dev
->netdev_ops
->ndo_setup_tc(dev
, TC_SETUP_MQPRIO
,
155 sch
->handle
, 0, 0, &tc
);
159 priv
->hw_offload
= offload
.hw
;
161 netdev_set_num_tc(dev
, qopt
->num_tc
);
162 for (i
= 0; i
< qopt
->num_tc
; i
++)
163 netdev_set_tc_queue(dev
, i
,
164 qopt
->count
[i
], qopt
->offset
[i
]);
167 /* Always use supplied priority mappings */
168 for (i
= 0; i
< TC_BITMASK
+ 1; i
++)
169 netdev_set_prio_tc_map(dev
, i
, qopt
->prio_tc_map
[i
]);
171 sch
->flags
|= TCQ_F_MQROOT
;
175 static void mqprio_attach(struct Qdisc
*sch
)
177 struct net_device
*dev
= qdisc_dev(sch
);
178 struct mqprio_sched
*priv
= qdisc_priv(sch
);
179 struct Qdisc
*qdisc
, *old
;
182 /* Attach underlying qdisc */
183 for (ntx
= 0; ntx
< dev
->num_tx_queues
; ntx
++) {
184 qdisc
= priv
->qdiscs
[ntx
];
185 old
= dev_graft_qdisc(qdisc
->dev_queue
, qdisc
);
188 if (ntx
< dev
->real_num_tx_queues
)
189 qdisc_hash_add(qdisc
, false);
195 static struct netdev_queue
*mqprio_queue_get(struct Qdisc
*sch
,
198 struct net_device
*dev
= qdisc_dev(sch
);
199 unsigned long ntx
= cl
- 1 - netdev_get_num_tc(dev
);
201 if (ntx
>= dev
->num_tx_queues
)
203 return netdev_get_tx_queue(dev
, ntx
);
206 static int mqprio_graft(struct Qdisc
*sch
, unsigned long cl
, struct Qdisc
*new,
209 struct net_device
*dev
= qdisc_dev(sch
);
210 struct netdev_queue
*dev_queue
= mqprio_queue_get(sch
, cl
);
215 if (dev
->flags
& IFF_UP
)
218 *old
= dev_graft_qdisc(dev_queue
, new);
221 new->flags
|= TCQ_F_ONETXQUEUE
| TCQ_F_NOPARENT
;
223 if (dev
->flags
& IFF_UP
)
229 static int mqprio_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
231 struct net_device
*dev
= qdisc_dev(sch
);
232 struct mqprio_sched
*priv
= qdisc_priv(sch
);
233 unsigned char *b
= skb_tail_pointer(skb
);
234 struct tc_mqprio_qopt opt
= { 0 };
239 memset(&sch
->bstats
, 0, sizeof(sch
->bstats
));
240 memset(&sch
->qstats
, 0, sizeof(sch
->qstats
));
242 for (i
= 0; i
< dev
->num_tx_queues
; i
++) {
243 qdisc
= rtnl_dereference(netdev_get_tx_queue(dev
, i
)->qdisc
);
244 spin_lock_bh(qdisc_lock(qdisc
));
245 sch
->q
.qlen
+= qdisc
->q
.qlen
;
246 sch
->bstats
.bytes
+= qdisc
->bstats
.bytes
;
247 sch
->bstats
.packets
+= qdisc
->bstats
.packets
;
248 sch
->qstats
.backlog
+= qdisc
->qstats
.backlog
;
249 sch
->qstats
.drops
+= qdisc
->qstats
.drops
;
250 sch
->qstats
.requeues
+= qdisc
->qstats
.requeues
;
251 sch
->qstats
.overlimits
+= qdisc
->qstats
.overlimits
;
252 spin_unlock_bh(qdisc_lock(qdisc
));
255 opt
.num_tc
= netdev_get_num_tc(dev
);
256 memcpy(opt
.prio_tc_map
, dev
->prio_tc_map
, sizeof(opt
.prio_tc_map
));
257 opt
.hw
= priv
->hw_offload
;
259 for (i
= 0; i
< netdev_get_num_tc(dev
); i
++) {
260 opt
.count
[i
] = dev
->tc_to_txq
[i
].count
;
261 opt
.offset
[i
] = dev
->tc_to_txq
[i
].offset
;
264 if (nla_put(skb
, TCA_OPTIONS
, sizeof(opt
), &opt
))
265 goto nla_put_failure
;
273 static struct Qdisc
*mqprio_leaf(struct Qdisc
*sch
, unsigned long cl
)
275 struct netdev_queue
*dev_queue
= mqprio_queue_get(sch
, cl
);
280 return dev_queue
->qdisc_sleeping
;
283 static unsigned long mqprio_get(struct Qdisc
*sch
, u32 classid
)
285 struct net_device
*dev
= qdisc_dev(sch
);
286 unsigned int ntx
= TC_H_MIN(classid
);
288 if (ntx
> dev
->num_tx_queues
+ netdev_get_num_tc(dev
))
293 static void mqprio_put(struct Qdisc
*sch
, unsigned long cl
)
297 static int mqprio_dump_class(struct Qdisc
*sch
, unsigned long cl
,
298 struct sk_buff
*skb
, struct tcmsg
*tcm
)
300 struct net_device
*dev
= qdisc_dev(sch
);
302 if (cl
<= netdev_get_num_tc(dev
)) {
303 tcm
->tcm_parent
= TC_H_ROOT
;
307 struct netdev_queue
*dev_queue
;
309 dev_queue
= mqprio_queue_get(sch
, cl
);
311 for (i
= 0; i
< netdev_get_num_tc(dev
); i
++) {
312 struct netdev_tc_txq tc
= dev
->tc_to_txq
[i
];
313 int q_idx
= cl
- netdev_get_num_tc(dev
);
315 if (q_idx
> tc
.offset
&&
316 q_idx
<= tc
.offset
+ tc
.count
) {
318 TC_H_MAKE(TC_H_MAJ(sch
->handle
),
323 tcm
->tcm_info
= dev_queue
->qdisc_sleeping
->handle
;
325 tcm
->tcm_handle
|= TC_H_MIN(cl
);
329 static int mqprio_dump_class_stats(struct Qdisc
*sch
, unsigned long cl
,
334 struct net_device
*dev
= qdisc_dev(sch
);
336 if (cl
<= netdev_get_num_tc(dev
)) {
340 struct gnet_stats_queue qstats
= {0};
341 struct gnet_stats_basic_packed bstats
= {0};
342 struct netdev_tc_txq tc
= dev
->tc_to_txq
[cl
- 1];
344 /* Drop lock here it will be reclaimed before touching
345 * statistics this is required because the d->lock we
346 * hold here is the look on dev_queue->qdisc_sleeping
347 * also acquired below.
350 spin_unlock_bh(d
->lock
);
352 for (i
= tc
.offset
; i
< tc
.offset
+ tc
.count
; i
++) {
353 struct netdev_queue
*q
= netdev_get_tx_queue(dev
, i
);
355 qdisc
= rtnl_dereference(q
->qdisc
);
356 spin_lock_bh(qdisc_lock(qdisc
));
357 qlen
+= qdisc
->q
.qlen
;
358 bstats
.bytes
+= qdisc
->bstats
.bytes
;
359 bstats
.packets
+= qdisc
->bstats
.packets
;
360 qstats
.backlog
+= qdisc
->qstats
.backlog
;
361 qstats
.drops
+= qdisc
->qstats
.drops
;
362 qstats
.requeues
+= qdisc
->qstats
.requeues
;
363 qstats
.overlimits
+= qdisc
->qstats
.overlimits
;
364 spin_unlock_bh(qdisc_lock(qdisc
));
366 /* Reclaim root sleeping lock before completing stats */
368 spin_lock_bh(d
->lock
);
369 if (gnet_stats_copy_basic(NULL
, d
, NULL
, &bstats
) < 0 ||
370 gnet_stats_copy_queue(d
, NULL
, &qstats
, qlen
) < 0)
373 struct netdev_queue
*dev_queue
= mqprio_queue_get(sch
, cl
);
375 sch
= dev_queue
->qdisc_sleeping
;
376 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch
),
377 d
, NULL
, &sch
->bstats
) < 0 ||
378 gnet_stats_copy_queue(d
, NULL
,
379 &sch
->qstats
, sch
->q
.qlen
) < 0)
385 static void mqprio_walk(struct Qdisc
*sch
, struct qdisc_walker
*arg
)
387 struct net_device
*dev
= qdisc_dev(sch
);
393 /* Walk hierarchy with a virtual class per tc */
394 arg
->count
= arg
->skip
;
395 for (ntx
= arg
->skip
;
396 ntx
< dev
->num_tx_queues
+ netdev_get_num_tc(dev
);
398 if (arg
->fn(sch
, ntx
+ 1, arg
) < 0) {
406 static const struct Qdisc_class_ops mqprio_class_ops
= {
407 .graft
= mqprio_graft
,
412 .dump
= mqprio_dump_class
,
413 .dump_stats
= mqprio_dump_class_stats
,
416 static struct Qdisc_ops mqprio_qdisc_ops __read_mostly
= {
417 .cl_ops
= &mqprio_class_ops
,
419 .priv_size
= sizeof(struct mqprio_sched
),
421 .destroy
= mqprio_destroy
,
422 .attach
= mqprio_attach
,
424 .owner
= THIS_MODULE
,
427 static int __init
mqprio_module_init(void)
429 return register_qdisc(&mqprio_qdisc_ops
);
432 static void __exit
mqprio_module_exit(void)
434 unregister_qdisc(&mqprio_qdisc_ops
);
437 module_init(mqprio_module_init
);
438 module_exit(mqprio_module_exit
);
440 MODULE_LICENSE("GPL");