]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - net/sched/sch_red.c
net: sched: Move to new offload indication in RED
[mirror_ubuntu-bionic-kernel.git] / net / sched / sch_red.c
1 /*
2 * net/sched/sch_red.c Random Early Detection queue.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Changes:
12 * J Hadi Salim 980914: computation fixes
13 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14 * J Hadi Salim 980816: ECN support
15 */
16
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/pkt_cls.h>
23 #include <net/inet_ecn.h>
24 #include <net/red.h>
25
26
27 /* Parameters, settable by user:
28 -----------------------------
29
30 limit - bytes (must be > qth_max + burst)
31
32 Hard limit on queue length, should be chosen >qth_max
33 to allow packet bursts. This parameter does not
34 affect the algorithms behaviour and can be chosen
35 arbitrarily high (well, less than ram size)
36 Really, this limit will never be reached
37 if RED works correctly.
38 */
39
40 struct red_sched_data {
41 u32 limit; /* HARD maximal queue length */
42 unsigned char flags;
43 struct timer_list adapt_timer;
44 struct Qdisc *sch;
45 struct red_parms parms;
46 struct red_vars vars;
47 struct red_stats stats;
48 struct Qdisc *qdisc;
49 };
50
51 static inline int red_use_ecn(struct red_sched_data *q)
52 {
53 return q->flags & TC_RED_ECN;
54 }
55
56 static inline int red_use_harddrop(struct red_sched_data *q)
57 {
58 return q->flags & TC_RED_HARDDROP;
59 }
60
61 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
62 struct sk_buff **to_free)
63 {
64 struct red_sched_data *q = qdisc_priv(sch);
65 struct Qdisc *child = q->qdisc;
66 int ret;
67
68 q->vars.qavg = red_calc_qavg(&q->parms,
69 &q->vars,
70 child->qstats.backlog);
71
72 if (red_is_idling(&q->vars))
73 red_end_of_idle_period(&q->vars);
74
75 switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
76 case RED_DONT_MARK:
77 break;
78
79 case RED_PROB_MARK:
80 qdisc_qstats_overlimit(sch);
81 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
82 q->stats.prob_drop++;
83 goto congestion_drop;
84 }
85
86 q->stats.prob_mark++;
87 break;
88
89 case RED_HARD_MARK:
90 qdisc_qstats_overlimit(sch);
91 if (red_use_harddrop(q) || !red_use_ecn(q) ||
92 !INET_ECN_set_ce(skb)) {
93 q->stats.forced_drop++;
94 goto congestion_drop;
95 }
96
97 q->stats.forced_mark++;
98 break;
99 }
100
101 ret = qdisc_enqueue(skb, child, to_free);
102 if (likely(ret == NET_XMIT_SUCCESS)) {
103 qdisc_qstats_backlog_inc(sch, skb);
104 sch->q.qlen++;
105 } else if (net_xmit_drop_count(ret)) {
106 q->stats.pdrop++;
107 qdisc_qstats_drop(sch);
108 }
109 return ret;
110
111 congestion_drop:
112 qdisc_drop(skb, sch, to_free);
113 return NET_XMIT_CN;
114 }
115
116 static struct sk_buff *red_dequeue(struct Qdisc *sch)
117 {
118 struct sk_buff *skb;
119 struct red_sched_data *q = qdisc_priv(sch);
120 struct Qdisc *child = q->qdisc;
121
122 skb = child->dequeue(child);
123 if (skb) {
124 qdisc_bstats_update(sch, skb);
125 qdisc_qstats_backlog_dec(sch, skb);
126 sch->q.qlen--;
127 } else {
128 if (!red_is_idling(&q->vars))
129 red_start_of_idle_period(&q->vars);
130 }
131 return skb;
132 }
133
134 static struct sk_buff *red_peek(struct Qdisc *sch)
135 {
136 struct red_sched_data *q = qdisc_priv(sch);
137 struct Qdisc *child = q->qdisc;
138
139 return child->ops->peek(child);
140 }
141
142 static void red_reset(struct Qdisc *sch)
143 {
144 struct red_sched_data *q = qdisc_priv(sch);
145
146 qdisc_reset(q->qdisc);
147 sch->qstats.backlog = 0;
148 sch->q.qlen = 0;
149 red_restart(&q->vars);
150 }
151
152 static int red_offload(struct Qdisc *sch, bool enable)
153 {
154 struct red_sched_data *q = qdisc_priv(sch);
155 struct net_device *dev = qdisc_dev(sch);
156 struct tc_red_qopt_offload opt = {
157 .handle = sch->handle,
158 .parent = sch->parent,
159 };
160 int err;
161
162 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
163 return -EOPNOTSUPP;
164
165 if (enable) {
166 opt.command = TC_RED_REPLACE;
167 opt.set.min = q->parms.qth_min >> q->parms.Wlog;
168 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
169 opt.set.probability = q->parms.max_P;
170 opt.set.is_ecn = red_use_ecn(q);
171 } else {
172 opt.command = TC_RED_DESTROY;
173 }
174
175 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
176
177 if (!err && enable)
178 sch->flags |= TCQ_F_OFFLOADED;
179 else
180 sch->flags &= ~TCQ_F_OFFLOADED;
181
182 return err;
183 }
184
185 static void red_destroy(struct Qdisc *sch)
186 {
187 struct red_sched_data *q = qdisc_priv(sch);
188
189 del_timer_sync(&q->adapt_timer);
190 red_offload(sch, false);
191 qdisc_destroy(q->qdisc);
192 }
193
194 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
195 [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
196 [TCA_RED_STAB] = { .len = RED_STAB_SIZE },
197 [TCA_RED_MAX_P] = { .type = NLA_U32 },
198 };
199
200 static int red_change(struct Qdisc *sch, struct nlattr *opt)
201 {
202 struct red_sched_data *q = qdisc_priv(sch);
203 struct nlattr *tb[TCA_RED_MAX + 1];
204 struct tc_red_qopt *ctl;
205 struct Qdisc *child = NULL;
206 int err;
207 u32 max_P;
208
209 if (opt == NULL)
210 return -EINVAL;
211
212 err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
213 if (err < 0)
214 return err;
215
216 if (tb[TCA_RED_PARMS] == NULL ||
217 tb[TCA_RED_STAB] == NULL)
218 return -EINVAL;
219
220 max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
221
222 ctl = nla_data(tb[TCA_RED_PARMS]);
223 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
224 return -EINVAL;
225
226 if (ctl->limit > 0) {
227 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
228 if (IS_ERR(child))
229 return PTR_ERR(child);
230 }
231
232 if (child != &noop_qdisc)
233 qdisc_hash_add(child, true);
234 sch_tree_lock(sch);
235 q->flags = ctl->flags;
236 q->limit = ctl->limit;
237 if (child) {
238 qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
239 q->qdisc->qstats.backlog);
240 qdisc_destroy(q->qdisc);
241 q->qdisc = child;
242 }
243
244 red_set_parms(&q->parms,
245 ctl->qth_min, ctl->qth_max, ctl->Wlog,
246 ctl->Plog, ctl->Scell_log,
247 nla_data(tb[TCA_RED_STAB]),
248 max_P);
249 red_set_vars(&q->vars);
250
251 del_timer(&q->adapt_timer);
252 if (ctl->flags & TC_RED_ADAPTATIVE)
253 mod_timer(&q->adapt_timer, jiffies + HZ/2);
254
255 if (!q->qdisc->q.qlen)
256 red_start_of_idle_period(&q->vars);
257
258 sch_tree_unlock(sch);
259 red_offload(sch, true);
260 return 0;
261 }
262
263 static inline void red_adaptative_timer(struct timer_list *t)
264 {
265 struct red_sched_data *q = from_timer(q, t, adapt_timer);
266 struct Qdisc *sch = q->sch;
267 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
268
269 spin_lock(root_lock);
270 red_adaptative_algo(&q->parms, &q->vars);
271 mod_timer(&q->adapt_timer, jiffies + HZ/2);
272 spin_unlock(root_lock);
273 }
274
275 static int red_init(struct Qdisc *sch, struct nlattr *opt)
276 {
277 struct red_sched_data *q = qdisc_priv(sch);
278
279 q->qdisc = &noop_qdisc;
280 q->sch = sch;
281 timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
282 return red_change(sch, opt);
283 }
284
285 static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
286 {
287 struct net_device *dev = qdisc_dev(sch);
288 struct tc_red_qopt_offload hw_stats = {
289 .command = TC_RED_STATS,
290 .handle = sch->handle,
291 .parent = sch->parent,
292 {
293 .stats.bstats = &sch->bstats,
294 .stats.qstats = &sch->qstats,
295 },
296 };
297
298 if (!(sch->flags & TCQ_F_OFFLOADED))
299 return 0;
300
301 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
302 &hw_stats);
303 }
304
305 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
306 {
307 struct red_sched_data *q = qdisc_priv(sch);
308 struct nlattr *opts = NULL;
309 struct tc_red_qopt opt = {
310 .limit = q->limit,
311 .flags = q->flags,
312 .qth_min = q->parms.qth_min >> q->parms.Wlog,
313 .qth_max = q->parms.qth_max >> q->parms.Wlog,
314 .Wlog = q->parms.Wlog,
315 .Plog = q->parms.Plog,
316 .Scell_log = q->parms.Scell_log,
317 };
318 int err;
319
320 sch->qstats.backlog = q->qdisc->qstats.backlog;
321 err = red_dump_offload_stats(sch, &opt);
322 if (err)
323 goto nla_put_failure;
324
325 opts = nla_nest_start(skb, TCA_OPTIONS);
326 if (opts == NULL)
327 goto nla_put_failure;
328 if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
329 nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
330 goto nla_put_failure;
331 return nla_nest_end(skb, opts);
332
333 nla_put_failure:
334 nla_nest_cancel(skb, opts);
335 return -EMSGSIZE;
336 }
337
338 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
339 {
340 struct red_sched_data *q = qdisc_priv(sch);
341 struct net_device *dev = qdisc_dev(sch);
342 struct tc_red_xstats st = {
343 .early = q->stats.prob_drop + q->stats.forced_drop,
344 .pdrop = q->stats.pdrop,
345 .other = q->stats.other,
346 .marked = q->stats.prob_mark + q->stats.forced_mark,
347 };
348
349 if (sch->flags & TCQ_F_OFFLOADED) {
350 struct red_stats hw_stats = {0};
351 struct tc_red_qopt_offload hw_stats_request = {
352 .command = TC_RED_XSTATS,
353 .handle = sch->handle,
354 .parent = sch->parent,
355 {
356 .xstats = &hw_stats,
357 },
358 };
359 if (!dev->netdev_ops->ndo_setup_tc(dev,
360 TC_SETUP_QDISC_RED,
361 &hw_stats_request)) {
362 st.early += hw_stats.prob_drop + hw_stats.forced_drop;
363 st.pdrop += hw_stats.pdrop;
364 st.other += hw_stats.other;
365 st.marked += hw_stats.prob_mark + hw_stats.forced_mark;
366 }
367 }
368
369 return gnet_stats_copy_app(d, &st, sizeof(st));
370 }
371
372 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
373 struct sk_buff *skb, struct tcmsg *tcm)
374 {
375 struct red_sched_data *q = qdisc_priv(sch);
376
377 tcm->tcm_handle |= TC_H_MIN(1);
378 tcm->tcm_info = q->qdisc->handle;
379 return 0;
380 }
381
382 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
383 struct Qdisc **old)
384 {
385 struct red_sched_data *q = qdisc_priv(sch);
386
387 if (new == NULL)
388 new = &noop_qdisc;
389
390 *old = qdisc_replace(sch, new, &q->qdisc);
391 return 0;
392 }
393
394 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
395 {
396 struct red_sched_data *q = qdisc_priv(sch);
397 return q->qdisc;
398 }
399
400 static unsigned long red_find(struct Qdisc *sch, u32 classid)
401 {
402 return 1;
403 }
404
405 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
406 {
407 if (!walker->stop) {
408 if (walker->count >= walker->skip)
409 if (walker->fn(sch, 1, walker) < 0) {
410 walker->stop = 1;
411 return;
412 }
413 walker->count++;
414 }
415 }
416
417 static const struct Qdisc_class_ops red_class_ops = {
418 .graft = red_graft,
419 .leaf = red_leaf,
420 .find = red_find,
421 .walk = red_walk,
422 .dump = red_dump_class,
423 };
424
425 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
426 .id = "red",
427 .priv_size = sizeof(struct red_sched_data),
428 .cl_ops = &red_class_ops,
429 .enqueue = red_enqueue,
430 .dequeue = red_dequeue,
431 .peek = red_peek,
432 .init = red_init,
433 .reset = red_reset,
434 .destroy = red_destroy,
435 .change = red_change,
436 .dump = red_dump,
437 .dump_stats = red_dump_stats,
438 .owner = THIS_MODULE,
439 };
440
441 static int __init red_module_init(void)
442 {
443 return register_qdisc(&red_qdisc_ops);
444 }
445
446 static void __exit red_module_exit(void)
447 {
448 unregister_qdisc(&red_qdisc_ops);
449 }
450
451 module_init(red_module_init)
452 module_exit(red_module_exit)
453
454 MODULE_LICENSE("GPL");