]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/sched/sch_red.c
cls_flower: Fix incorrect idr release when failing to modify rule
[mirror_ubuntu-bionic-kernel.git] / net / sched / sch_red.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_red.c Random Early Detection queue.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Changes:
dba051f3 12 * J Hadi Salim 980914: computation fixes
1da177e4 13 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
dba051f3 14 * J Hadi Salim 980816: ECN support
1da177e4
LT
15 */
16
1da177e4 17#include <linux/module.h>
1da177e4
LT
18#include <linux/types.h>
19#include <linux/kernel.h>
1da177e4 20#include <linux/skbuff.h>
1da177e4 21#include <net/pkt_sched.h>
602f3baf 22#include <net/pkt_cls.h>
1da177e4 23#include <net/inet_ecn.h>
6b31b28a 24#include <net/red.h>
1da177e4
LT
25
26
6b31b28a 27/* Parameters, settable by user:
1da177e4
LT
28 -----------------------------
29
30 limit - bytes (must be > qth_max + burst)
31
32 Hard limit on queue length, should be chosen >qth_max
33 to allow packet bursts. This parameter does not
34 affect the algorithms behaviour and can be chosen
35 arbitrarily high (well, less than ram size)
36 Really, this limit will never be reached
37 if RED works correctly.
1da177e4
LT
38 */
39
cc7ec456 40struct red_sched_data {
6b31b28a
TG
41 u32 limit; /* HARD maximal queue length */
42 unsigned char flags;
8af2a218 43 struct timer_list adapt_timer;
cdeabbb8 44 struct Qdisc *sch;
6b31b28a 45 struct red_parms parms;
eeca6688 46 struct red_vars vars;
6b31b28a 47 struct red_stats stats;
f38c39d6 48 struct Qdisc *qdisc;
1da177e4
LT
49};
50
6b31b28a 51static inline int red_use_ecn(struct red_sched_data *q)
1da177e4 52{
6b31b28a 53 return q->flags & TC_RED_ECN;
1da177e4
LT
54}
55
bdc450a0
TG
56static inline int red_use_harddrop(struct red_sched_data *q)
57{
58 return q->flags & TC_RED_HARDDROP;
59}
60
520ac30f
ED
61static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
62 struct sk_buff **to_free)
1da177e4
LT
63{
64 struct red_sched_data *q = qdisc_priv(sch);
f38c39d6
PM
65 struct Qdisc *child = q->qdisc;
66 int ret;
1da177e4 67
eeca6688
ED
68 q->vars.qavg = red_calc_qavg(&q->parms,
69 &q->vars,
70 child->qstats.backlog);
1da177e4 71
eeca6688
ED
72 if (red_is_idling(&q->vars))
73 red_end_of_idle_period(&q->vars);
1da177e4 74
eeca6688 75 switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
cc7ec456
ED
76 case RED_DONT_MARK:
77 break;
78
79 case RED_PROB_MARK:
25331d6c 80 qdisc_qstats_overlimit(sch);
cc7ec456
ED
81 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
82 q->stats.prob_drop++;
83 goto congestion_drop;
84 }
85
86 q->stats.prob_mark++;
87 break;
88
89 case RED_HARD_MARK:
25331d6c 90 qdisc_qstats_overlimit(sch);
cc7ec456
ED
91 if (red_use_harddrop(q) || !red_use_ecn(q) ||
92 !INET_ECN_set_ce(skb)) {
93 q->stats.forced_drop++;
94 goto congestion_drop;
95 }
96
97 q->stats.forced_mark++;
98 break;
1da177e4
LT
99 }
100
520ac30f 101 ret = qdisc_enqueue(skb, child, to_free);
f38c39d6 102 if (likely(ret == NET_XMIT_SUCCESS)) {
d7f4f332 103 qdisc_qstats_backlog_inc(sch, skb);
f38c39d6 104 sch->q.qlen++;
378a2f09 105 } else if (net_xmit_drop_count(ret)) {
f38c39d6 106 q->stats.pdrop++;
25331d6c 107 qdisc_qstats_drop(sch);
f38c39d6
PM
108 }
109 return ret;
6b31b28a
TG
110
111congestion_drop:
520ac30f 112 qdisc_drop(skb, sch, to_free);
1da177e4
LT
113 return NET_XMIT_CN;
114}
115
cc7ec456 116static struct sk_buff *red_dequeue(struct Qdisc *sch)
1da177e4
LT
117{
118 struct sk_buff *skb;
119 struct red_sched_data *q = qdisc_priv(sch);
f38c39d6 120 struct Qdisc *child = q->qdisc;
1da177e4 121
f38c39d6 122 skb = child->dequeue(child);
9190b3b3
ED
123 if (skb) {
124 qdisc_bstats_update(sch, skb);
d7f4f332 125 qdisc_qstats_backlog_dec(sch, skb);
f38c39d6 126 sch->q.qlen--;
9190b3b3 127 } else {
eeca6688
ED
128 if (!red_is_idling(&q->vars))
129 red_start_of_idle_period(&q->vars);
9190b3b3 130 }
9e178ff2 131 return skb;
1da177e4
LT
132}
133
cc7ec456 134static struct sk_buff *red_peek(struct Qdisc *sch)
8e3af978
JP
135{
136 struct red_sched_data *q = qdisc_priv(sch);
137 struct Qdisc *child = q->qdisc;
138
139 return child->ops->peek(child);
140}
141
cc7ec456 142static void red_reset(struct Qdisc *sch)
1da177e4
LT
143{
144 struct red_sched_data *q = qdisc_priv(sch);
145
f38c39d6 146 qdisc_reset(q->qdisc);
d7f4f332 147 sch->qstats.backlog = 0;
f38c39d6 148 sch->q.qlen = 0;
eeca6688 149 red_restart(&q->vars);
1da177e4
LT
150}
151
602f3baf
NF
152static int red_offload(struct Qdisc *sch, bool enable)
153{
154 struct red_sched_data *q = qdisc_priv(sch);
155 struct net_device *dev = qdisc_dev(sch);
156 struct tc_red_qopt_offload opt = {
157 .handle = sch->handle,
158 .parent = sch->parent,
159 };
160
161 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
162 return -EOPNOTSUPP;
163
164 if (enable) {
165 opt.command = TC_RED_REPLACE;
166 opt.set.min = q->parms.qth_min >> q->parms.Wlog;
167 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
168 opt.set.probability = q->parms.max_P;
169 opt.set.is_ecn = red_use_ecn(q);
170 } else {
171 opt.command = TC_RED_DESTROY;
172 }
173
d772b3a9 174 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
602f3baf
NF
175}
176
f38c39d6
PM
177static void red_destroy(struct Qdisc *sch)
178{
179 struct red_sched_data *q = qdisc_priv(sch);
8af2a218
ED
180
181 del_timer_sync(&q->adapt_timer);
602f3baf 182 red_offload(sch, false);
f38c39d6
PM
183 qdisc_destroy(q->qdisc);
184}
185
27a3421e
PM
186static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
187 [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
188 [TCA_RED_STAB] = { .len = RED_STAB_SIZE },
a73ed26b 189 [TCA_RED_MAX_P] = { .type = NLA_U32 },
27a3421e
PM
190};
191
1e90474c 192static int red_change(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
193{
194 struct red_sched_data *q = qdisc_priv(sch);
1e90474c 195 struct nlattr *tb[TCA_RED_MAX + 1];
1da177e4 196 struct tc_red_qopt *ctl;
f38c39d6 197 struct Qdisc *child = NULL;
cee63723 198 int err;
a73ed26b 199 u32 max_P;
1da177e4 200
cee63723 201 if (opt == NULL)
dba051f3
TG
202 return -EINVAL;
203
fceb6435 204 err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
cee63723
PM
205 if (err < 0)
206 return err;
207
1e90474c 208 if (tb[TCA_RED_PARMS] == NULL ||
27a3421e 209 tb[TCA_RED_STAB] == NULL)
1da177e4
LT
210 return -EINVAL;
211
a73ed26b
ED
212 max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
213
1e90474c 214 ctl = nla_data(tb[TCA_RED_PARMS]);
8afa10cb
NF
215 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
216 return -EINVAL;
1da177e4 217
f38c39d6 218 if (ctl->limit > 0) {
fb0305ce
PM
219 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
220 if (IS_ERR(child))
221 return PTR_ERR(child);
f38c39d6 222
b5bf17ed 223 /* child is fifo, no need to check for noop_qdisc */
49b49971 224 qdisc_hash_add(child, true);
b5bf17ed
PA
225 }
226
1da177e4
LT
227 sch_tree_lock(sch);
228 q->flags = ctl->flags;
1da177e4 229 q->limit = ctl->limit;
5e50da01 230 if (child) {
2ccccf5f
WC
231 qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
232 q->qdisc->qstats.backlog);
b94c8afc
PM
233 qdisc_destroy(q->qdisc);
234 q->qdisc = child;
5e50da01 235 }
1da177e4 236
eeca6688
ED
237 red_set_parms(&q->parms,
238 ctl->qth_min, ctl->qth_max, ctl->Wlog,
a73ed26b
ED
239 ctl->Plog, ctl->Scell_log,
240 nla_data(tb[TCA_RED_STAB]),
241 max_P);
eeca6688 242 red_set_vars(&q->vars);
6b31b28a 243
8af2a218
ED
244 del_timer(&q->adapt_timer);
245 if (ctl->flags & TC_RED_ADAPTATIVE)
246 mod_timer(&q->adapt_timer, jiffies + HZ/2);
247
1ee5fa1e 248 if (!q->qdisc->q.qlen)
eeca6688 249 red_start_of_idle_period(&q->vars);
dba051f3 250
1da177e4 251 sch_tree_unlock(sch);
602f3baf 252 red_offload(sch, true);
1da177e4
LT
253 return 0;
254}
255
cdeabbb8 256static inline void red_adaptative_timer(struct timer_list *t)
8af2a218 257{
cdeabbb8
KC
258 struct red_sched_data *q = from_timer(q, t, adapt_timer);
259 struct Qdisc *sch = q->sch;
8af2a218
ED
260 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
261
262 spin_lock(root_lock);
eeca6688 263 red_adaptative_algo(&q->parms, &q->vars);
8af2a218
ED
264 mod_timer(&q->adapt_timer, jiffies + HZ/2);
265 spin_unlock(root_lock);
266}
267
cc7ec456 268static int red_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4 269{
f38c39d6
PM
270 struct red_sched_data *q = qdisc_priv(sch);
271
272 q->qdisc = &noop_qdisc;
cdeabbb8
KC
273 q->sch = sch;
274 timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
1da177e4
LT
275 return red_change(sch, opt);
276}
277
428a68af 278static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
602f3baf
NF
279{
280 struct net_device *dev = qdisc_dev(sch);
281 struct tc_red_qopt_offload hw_stats = {
ee9d3429 282 .command = TC_RED_STATS,
602f3baf
NF
283 .handle = sch->handle,
284 .parent = sch->parent,
ee9d3429
AM
285 {
286 .stats.bstats = &sch->bstats,
287 .stats.qstats = &sch->qstats,
288 },
602f3baf 289 };
d772b3a9
NF
290 int err;
291
292 sch->flags &= ~TCQ_F_OFFLOADED;
602f3baf 293
d772b3a9
NF
294 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
295 return 0;
296
297 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
298 &hw_stats);
299 if (err == -EOPNOTSUPP)
602f3baf
NF
300 return 0;
301
d772b3a9
NF
302 if (!err)
303 sch->flags |= TCQ_F_OFFLOADED;
304
305 return err;
602f3baf
NF
306}
307
1da177e4
LT
308static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
309{
310 struct red_sched_data *q = qdisc_priv(sch);
1e90474c 311 struct nlattr *opts = NULL;
6b31b28a
TG
312 struct tc_red_qopt opt = {
313 .limit = q->limit,
314 .flags = q->flags,
315 .qth_min = q->parms.qth_min >> q->parms.Wlog,
316 .qth_max = q->parms.qth_max >> q->parms.Wlog,
317 .Wlog = q->parms.Wlog,
318 .Plog = q->parms.Plog,
319 .Scell_log = q->parms.Scell_log,
320 };
602f3baf 321 int err;
1da177e4 322
0dfb33a0 323 sch->qstats.backlog = q->qdisc->qstats.backlog;
428a68af 324 err = red_dump_offload_stats(sch, &opt);
602f3baf
NF
325 if (err)
326 goto nla_put_failure;
327
1e90474c
PM
328 opts = nla_nest_start(skb, TCA_OPTIONS);
329 if (opts == NULL)
330 goto nla_put_failure;
1b34ec43
DM
331 if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
332 nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
333 goto nla_put_failure;
1e90474c 334 return nla_nest_end(skb, opts);
1da177e4 335
1e90474c 336nla_put_failure:
bc3ed28c
TG
337 nla_nest_cancel(skb, opts);
338 return -EMSGSIZE;
1da177e4
LT
339}
340
341static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
342{
343 struct red_sched_data *q = qdisc_priv(sch);
602f3baf 344 struct net_device *dev = qdisc_dev(sch);
6b31b28a
TG
345 struct tc_red_xstats st = {
346 .early = q->stats.prob_drop + q->stats.forced_drop,
347 .pdrop = q->stats.pdrop,
348 .other = q->stats.other,
349 .marked = q->stats.prob_mark + q->stats.forced_mark,
350 };
351
428a68af 352 if (sch->flags & TCQ_F_OFFLOADED) {
602f3baf
NF
353 struct red_stats hw_stats = {0};
354 struct tc_red_qopt_offload hw_stats_request = {
ee9d3429 355 .command = TC_RED_XSTATS,
602f3baf
NF
356 .handle = sch->handle,
357 .parent = sch->parent,
ee9d3429
AM
358 {
359 .xstats = &hw_stats,
360 },
602f3baf
NF
361 };
362 if (!dev->netdev_ops->ndo_setup_tc(dev,
363 TC_SETUP_QDISC_RED,
364 &hw_stats_request)) {
365 st.early += hw_stats.prob_drop + hw_stats.forced_drop;
366 st.pdrop += hw_stats.pdrop;
367 st.other += hw_stats.other;
368 st.marked += hw_stats.prob_mark + hw_stats.forced_mark;
369 }
370 }
371
6b31b28a 372 return gnet_stats_copy_app(d, &st, sizeof(st));
1da177e4
LT
373}
374
f38c39d6
PM
375static int red_dump_class(struct Qdisc *sch, unsigned long cl,
376 struct sk_buff *skb, struct tcmsg *tcm)
377{
378 struct red_sched_data *q = qdisc_priv(sch);
379
f38c39d6
PM
380 tcm->tcm_handle |= TC_H_MIN(1);
381 tcm->tcm_info = q->qdisc->handle;
382 return 0;
383}
384
385static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
386 struct Qdisc **old)
387{
388 struct red_sched_data *q = qdisc_priv(sch);
389
390 if (new == NULL)
391 new = &noop_qdisc;
392
86a7996c 393 *old = qdisc_replace(sch, new, &q->qdisc);
f38c39d6
PM
394 return 0;
395}
396
397static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
398{
399 struct red_sched_data *q = qdisc_priv(sch);
400 return q->qdisc;
401}
402
143976ce 403static unsigned long red_find(struct Qdisc *sch, u32 classid)
f38c39d6
PM
404{
405 return 1;
406}
407
f38c39d6
PM
408static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
409{
410 if (!walker->stop) {
411 if (walker->count >= walker->skip)
412 if (walker->fn(sch, 1, walker) < 0) {
413 walker->stop = 1;
414 return;
415 }
416 walker->count++;
417 }
418}
419
20fea08b 420static const struct Qdisc_class_ops red_class_ops = {
f38c39d6
PM
421 .graft = red_graft,
422 .leaf = red_leaf,
143976ce 423 .find = red_find,
f38c39d6 424 .walk = red_walk,
f38c39d6
PM
425 .dump = red_dump_class,
426};
427
20fea08b 428static struct Qdisc_ops red_qdisc_ops __read_mostly = {
1da177e4
LT
429 .id = "red",
430 .priv_size = sizeof(struct red_sched_data),
f38c39d6 431 .cl_ops = &red_class_ops,
1da177e4
LT
432 .enqueue = red_enqueue,
433 .dequeue = red_dequeue,
8e3af978 434 .peek = red_peek,
1da177e4
LT
435 .init = red_init,
436 .reset = red_reset,
f38c39d6 437 .destroy = red_destroy,
1da177e4
LT
438 .change = red_change,
439 .dump = red_dump,
440 .dump_stats = red_dump_stats,
441 .owner = THIS_MODULE,
442};
443
444static int __init red_module_init(void)
445{
446 return register_qdisc(&red_qdisc_ops);
447}
dba051f3
TG
448
449static void __exit red_module_exit(void)
1da177e4
LT
450{
451 unregister_qdisc(&red_qdisc_ops);
452}
dba051f3 453
1da177e4
LT
454module_init(red_module_init)
455module_exit(red_module_exit)
dba051f3 456
1da177e4 457MODULE_LICENSE("GPL");