]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/sched/sch_tbf.c
ipv6: fix leaking uninitialized port number of offender sockaddr
[mirror_ubuntu-bionic-kernel.git] / net / sched / sch_tbf.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_tbf.c Token Bucket Filter queue.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
11 * original idea by Martin Devera
12 *
13 */
14
1da177e4 15#include <linux/module.h>
1da177e4
LT
16#include <linux/types.h>
17#include <linux/kernel.h>
1da177e4 18#include <linux/string.h>
1da177e4 19#include <linux/errno.h>
1da177e4 20#include <linux/skbuff.h>
0ba48053 21#include <net/netlink.h>
b757c933 22#include <net/sch_generic.h>
1da177e4
LT
23#include <net/pkt_sched.h>
24
25
26/* Simple Token Bucket Filter.
27 =======================================
28
29 SOURCE.
30 -------
31
32 None.
33
34 Description.
35 ------------
36
37 A data flow obeys TBF with rate R and depth B, if for any
38 time interval t_i...t_f the number of transmitted bits
39 does not exceed B + R*(t_f-t_i).
40
41 Packetized version of this definition:
42 The sequence of packets of sizes s_i served at moments t_i
43 obeys TBF, if for any i<=k:
44
45 s_i+....+s_k <= B + R*(t_k - t_i)
46
47 Algorithm.
48 ----------
49
50 Let N(t_i) be B/R initially and N(t) grow continuously with time as:
51
52 N(t+delta) = min{B/R, N(t) + delta}
53
54 If the first packet in queue has length S, it may be
55 transmitted only at the time t_* when S/R <= N(t_*),
56 and in this case N(t) jumps:
57
58 N(t_* + 0) = N(t_* - 0) - S/R.
59
60
61
62 Actually, QoS requires two TBF to be applied to a data stream.
63 One of them controls steady state burst size, another
64 one with rate P (peak rate) and depth M (equal to link MTU)
65 limits bursts at a smaller time scale.
66
67 It is easy to see that P>R, and B>M. If P is infinity, this double
68 TBF is equivalent to a single one.
69
70 When TBF works in reshaping mode, latency is estimated as:
71
72 lat = max ((L-B)/R, (L-M)/P)
73
74
75 NOTES.
76 ------
77
78 If TBF throttles, it starts a watchdog timer, which will wake it up
79 when it is ready to transmit.
80 Note that the minimal timer resolution is 1/HZ.
81 If no new packets arrive during this period,
82 or if the device is not awaken by EOI for some previous packet,
83 TBF can stop its activity for 1/HZ.
84
85
86 This means, that with depth B, the maximal rate is
87
88 R_crit = B*HZ
89
90 F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.
91
92 Note that the peak rate TBF is much more tough: with MTU 1500
93 P_crit = 150Kbytes/sec. So, if you need greater peak
94 rates, use alpha with HZ=1000 :-)
95
96 With classful TBF, limit is just kept for backwards compatibility.
97 It is passed to the default bfifo qdisc - if the inner qdisc is
98 changed the limit is not effective anymore.
99*/
100
cc7ec456 101struct tbf_sched_data {
1da177e4
LT
102/* Parameters */
103 u32 limit; /* Maximal length of backlog: bytes */
b757c933
JP
104 s64 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */
105 s64 mtu;
1da177e4 106 u32 max_size;
b757c933
JP
107 struct psched_ratecfg rate;
108 struct psched_ratecfg peak;
109 bool peak_present;
1da177e4
LT
110
111/* Variables */
b757c933
JP
112 s64 tokens; /* Current number of B tokens */
113 s64 ptokens; /* Current number of P tokens */
114 s64 t_c; /* Time check-point */
1da177e4 115 struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */
f7f593e3 116 struct qdisc_watchdog watchdog; /* Watchdog timer */
1da177e4
LT
117};
118
e43ac79a
ED
119
120/* GSO packet is too big, segment it so that tbf can transmit
121 * each segment in time
122 */
123static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
124{
125 struct tbf_sched_data *q = qdisc_priv(sch);
126 struct sk_buff *segs, *nskb;
127 netdev_features_t features = netif_skb_features(skb);
128 int ret, nb;
129
130 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
131
132 if (IS_ERR_OR_NULL(segs))
133 return qdisc_reshape_fail(skb, sch);
134
135 nb = 0;
136 while (segs) {
137 nskb = segs->next;
138 segs->next = NULL;
139 if (likely(segs->len <= q->max_size)) {
140 qdisc_skb_cb(segs)->pkt_len = segs->len;
141 ret = qdisc_enqueue(segs, q->qdisc);
142 } else {
143 ret = qdisc_reshape_fail(skb, sch);
144 }
145 if (ret != NET_XMIT_SUCCESS) {
146 if (net_xmit_drop_count(ret))
147 sch->qstats.drops++;
148 } else {
149 nb++;
150 }
151 segs = nskb;
152 }
153 sch->q.qlen += nb;
154 if (nb > 1)
155 qdisc_tree_decrease_qlen(sch, 1 - nb);
156 consume_skb(skb);
157 return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
158}
159
cc7ec456 160static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
1da177e4
LT
161{
162 struct tbf_sched_data *q = qdisc_priv(sch);
163 int ret;
164
e43ac79a
ED
165 if (qdisc_pkt_len(skb) > q->max_size) {
166 if (skb_is_gso(skb))
167 return tbf_segment(skb, sch);
69747650 168 return qdisc_reshape_fail(skb, sch);
e43ac79a 169 }
5f86173b 170 ret = qdisc_enqueue(skb, q->qdisc);
9871e50e 171 if (ret != NET_XMIT_SUCCESS) {
378a2f09
JP
172 if (net_xmit_drop_count(ret))
173 sch->qstats.drops++;
1da177e4
LT
174 return ret;
175 }
176
177 sch->q.qlen++;
9871e50e 178 return NET_XMIT_SUCCESS;
1da177e4
LT
179}
180
cc7ec456 181static unsigned int tbf_drop(struct Qdisc *sch)
1da177e4
LT
182{
183 struct tbf_sched_data *q = qdisc_priv(sch);
6d037a26 184 unsigned int len = 0;
1da177e4 185
6d037a26 186 if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
1da177e4
LT
187 sch->q.qlen--;
188 sch->qstats.drops++;
189 }
190 return len;
191}
192
cc7ec456 193static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
1da177e4
LT
194{
195 struct tbf_sched_data *q = qdisc_priv(sch);
196 struct sk_buff *skb;
197
03c05f0d 198 skb = q->qdisc->ops->peek(q->qdisc);
1da177e4
LT
199
200 if (skb) {
b757c933
JP
201 s64 now;
202 s64 toks;
203 s64 ptoks = 0;
0abf77e5 204 unsigned int len = qdisc_pkt_len(skb);
1da177e4 205
b757c933
JP
206 now = ktime_to_ns(ktime_get());
207 toks = min_t(s64, now - q->t_c, q->buffer);
1da177e4 208
b757c933 209 if (q->peak_present) {
1da177e4 210 ptoks = toks + q->ptokens;
b757c933 211 if (ptoks > q->mtu)
1da177e4 212 ptoks = q->mtu;
b757c933 213 ptoks -= (s64) psched_l2t_ns(&q->peak, len);
1da177e4
LT
214 }
215 toks += q->tokens;
b757c933 216 if (toks > q->buffer)
1da177e4 217 toks = q->buffer;
b757c933 218 toks -= (s64) psched_l2t_ns(&q->rate, len);
1da177e4
LT
219
220 if ((toks|ptoks) >= 0) {
77be155c 221 skb = qdisc_dequeue_peeked(q->qdisc);
03c05f0d
JP
222 if (unlikely(!skb))
223 return NULL;
224
1da177e4
LT
225 q->t_c = now;
226 q->tokens = toks;
227 q->ptokens = ptoks;
228 sch->q.qlen--;
fd245a4a 229 qdisc_unthrottled(sch);
9190b3b3 230 qdisc_bstats_update(sch, skb);
1da177e4
LT
231 return skb;
232 }
233
b757c933
JP
234 qdisc_watchdog_schedule_ns(&q->watchdog,
235 now + max_t(long, -toks, -ptoks));
1da177e4
LT
236
237 /* Maybe we have a shorter packet in the queue,
238 which can be sent now. It sounds cool,
239 but, however, this is wrong in principle.
240 We MUST NOT reorder packets under these circumstances.
241
242 Really, if we split the flow into independent
243 subflows, it would be a very good solution.
244 This is the main idea of all FQ algorithms
245 (cf. CSZ, HPFQ, HFSC)
246 */
247
1da177e4
LT
248 sch->qstats.overlimits++;
249 }
250 return NULL;
251}
252
cc7ec456 253static void tbf_reset(struct Qdisc *sch)
1da177e4
LT
254{
255 struct tbf_sched_data *q = qdisc_priv(sch);
256
257 qdisc_reset(q->qdisc);
258 sch->q.qlen = 0;
b757c933 259 q->t_c = ktime_to_ns(ktime_get());
1da177e4
LT
260 q->tokens = q->buffer;
261 q->ptokens = q->mtu;
f7f593e3 262 qdisc_watchdog_cancel(&q->watchdog);
1da177e4
LT
263}
264
27a3421e
PM
265static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
266 [TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) },
267 [TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
268 [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
a33c4a26
YY
269 [TCA_TBF_RATE64] = { .type = NLA_U64 },
270 [TCA_TBF_PRATE64] = { .type = NLA_U64 },
27a3421e
PM
271};
272
cc7ec456 273static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
1da177e4 274{
cee63723 275 int err;
1da177e4 276 struct tbf_sched_data *q = qdisc_priv(sch);
a33c4a26 277 struct nlattr *tb[TCA_TBF_MAX + 1];
1da177e4
LT
278 struct tc_tbf_qopt *qopt;
279 struct qdisc_rate_table *rtab = NULL;
280 struct qdisc_rate_table *ptab = NULL;
281 struct Qdisc *child = NULL;
cc7ec456 282 int max_size, n;
a33c4a26 283 u64 rate64 = 0, prate64 = 0;
1da177e4 284
a33c4a26 285 err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy);
cee63723
PM
286 if (err < 0)
287 return err;
288
289 err = -EINVAL;
27a3421e 290 if (tb[TCA_TBF_PARMS] == NULL)
1da177e4
LT
291 goto done;
292
1e90474c
PM
293 qopt = nla_data(tb[TCA_TBF_PARMS]);
294 rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
1da177e4
LT
295 if (rtab == NULL)
296 goto done;
297
298 if (qopt->peakrate.rate) {
299 if (qopt->peakrate.rate > qopt->rate.rate)
1e90474c 300 ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
1da177e4
LT
301 if (ptab == NULL)
302 goto done;
303 }
304
305 for (n = 0; n < 256; n++)
cc7ec456
ED
306 if (rtab->data[n] > qopt->buffer)
307 break;
308 max_size = (n << qopt->rate.cell_log) - 1;
1da177e4
LT
309 if (ptab) {
310 int size;
311
312 for (n = 0; n < 256; n++)
cc7ec456
ED
313 if (ptab->data[n] > qopt->mtu)
314 break;
315 size = (n << qopt->peakrate.cell_log) - 1;
316 if (size < max_size)
317 max_size = size;
1da177e4
LT
318 }
319 if (max_size < 0)
320 goto done;
321
f0cd1508 322 if (q->qdisc != &noop_qdisc) {
323 err = fifo_set_limit(q->qdisc, qopt->limit);
324 if (err)
325 goto done;
326 } else if (qopt->limit > 0) {
fb0305ce
PM
327 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
328 if (IS_ERR(child)) {
329 err = PTR_ERR(child);
1da177e4 330 goto done;
fb0305ce 331 }
1da177e4
LT
332 }
333
334 sch_tree_lock(sch);
5e50da01
PM
335 if (child) {
336 qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
b94c8afc
PM
337 qdisc_destroy(q->qdisc);
338 q->qdisc = child;
5e50da01 339 }
1da177e4 340 q->limit = qopt->limit;
b757c933 341 q->mtu = PSCHED_TICKS2NS(qopt->mtu);
1da177e4 342 q->max_size = max_size;
b757c933 343 q->buffer = PSCHED_TICKS2NS(qopt->buffer);
1da177e4
LT
344 q->tokens = q->buffer;
345 q->ptokens = q->mtu;
b94c8afc 346
a33c4a26
YY
347 if (tb[TCA_TBF_RATE64])
348 rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
349 psched_ratecfg_precompute(&q->rate, &rtab->rate, rate64);
b757c933 350 if (ptab) {
a33c4a26
YY
351 if (tb[TCA_TBF_PRATE64])
352 prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
353 psched_ratecfg_precompute(&q->peak, &ptab->rate, prate64);
b757c933
JP
354 q->peak_present = true;
355 } else {
356 q->peak_present = false;
357 }
b94c8afc 358
1da177e4
LT
359 sch_tree_unlock(sch);
360 err = 0;
361done:
362 if (rtab)
363 qdisc_put_rtab(rtab);
364 if (ptab)
365 qdisc_put_rtab(ptab);
366 return err;
367}
368
cc7ec456 369static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
370{
371 struct tbf_sched_data *q = qdisc_priv(sch);
372
373 if (opt == NULL)
374 return -EINVAL;
375
b757c933 376 q->t_c = ktime_to_ns(ktime_get());
f7f593e3 377 qdisc_watchdog_init(&q->watchdog, sch);
1da177e4
LT
378 q->qdisc = &noop_qdisc;
379
380 return tbf_change(sch, opt);
381}
382
383static void tbf_destroy(struct Qdisc *sch)
384{
385 struct tbf_sched_data *q = qdisc_priv(sch);
386
f7f593e3 387 qdisc_watchdog_cancel(&q->watchdog);
1da177e4
LT
388 qdisc_destroy(q->qdisc);
389}
390
391static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
392{
393 struct tbf_sched_data *q = qdisc_priv(sch);
4b3550ef 394 struct nlattr *nest;
1da177e4
LT
395 struct tc_tbf_qopt opt;
396
b0460e44 397 sch->qstats.backlog = q->qdisc->qstats.backlog;
4b3550ef
PM
398 nest = nla_nest_start(skb, TCA_OPTIONS);
399 if (nest == NULL)
400 goto nla_put_failure;
1da177e4
LT
401
402 opt.limit = q->limit;
01cb71d2 403 psched_ratecfg_getrate(&opt.rate, &q->rate);
b757c933 404 if (q->peak_present)
01cb71d2 405 psched_ratecfg_getrate(&opt.peakrate, &q->peak);
1da177e4
LT
406 else
407 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
b757c933
JP
408 opt.mtu = PSCHED_NS2TICKS(q->mtu);
409 opt.buffer = PSCHED_NS2TICKS(q->buffer);
1b34ec43
DM
410 if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
411 goto nla_put_failure;
a33c4a26
YY
412 if (q->rate.rate_bytes_ps >= (1ULL << 32) &&
413 nla_put_u64(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps))
414 goto nla_put_failure;
415 if (q->peak_present &&
416 q->peak.rate_bytes_ps >= (1ULL << 32) &&
417 nla_put_u64(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps))
418 goto nla_put_failure;
1da177e4 419
4b3550ef 420 nla_nest_end(skb, nest);
1da177e4
LT
421 return skb->len;
422
1e90474c 423nla_put_failure:
4b3550ef 424 nla_nest_cancel(skb, nest);
1da177e4
LT
425 return -1;
426}
427
428static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
429 struct sk_buff *skb, struct tcmsg *tcm)
430{
431 struct tbf_sched_data *q = qdisc_priv(sch);
432
1da177e4
LT
433 tcm->tcm_handle |= TC_H_MIN(1);
434 tcm->tcm_info = q->qdisc->handle;
435
436 return 0;
437}
438
439static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
440 struct Qdisc **old)
441{
442 struct tbf_sched_data *q = qdisc_priv(sch);
443
444 if (new == NULL)
445 new = &noop_qdisc;
446
447 sch_tree_lock(sch);
b94c8afc
PM
448 *old = q->qdisc;
449 q->qdisc = new;
5e50da01 450 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1da177e4 451 qdisc_reset(*old);
1da177e4
LT
452 sch_tree_unlock(sch);
453
454 return 0;
455}
456
457static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
458{
459 struct tbf_sched_data *q = qdisc_priv(sch);
460 return q->qdisc;
461}
462
463static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
464{
465 return 1;
466}
467
468static void tbf_put(struct Qdisc *sch, unsigned long arg)
469{
470}
471
1da177e4
LT
472static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
473{
474 if (!walker->stop) {
475 if (walker->count >= walker->skip)
476 if (walker->fn(sch, 1, walker) < 0) {
477 walker->stop = 1;
478 return;
479 }
480 walker->count++;
481 }
482}
483
cc7ec456 484static const struct Qdisc_class_ops tbf_class_ops = {
1da177e4
LT
485 .graft = tbf_graft,
486 .leaf = tbf_leaf,
487 .get = tbf_get,
488 .put = tbf_put,
1da177e4 489 .walk = tbf_walk,
1da177e4
LT
490 .dump = tbf_dump_class,
491};
492
20fea08b 493static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
1da177e4
LT
494 .next = NULL,
495 .cl_ops = &tbf_class_ops,
496 .id = "tbf",
497 .priv_size = sizeof(struct tbf_sched_data),
498 .enqueue = tbf_enqueue,
499 .dequeue = tbf_dequeue,
77be155c 500 .peek = qdisc_peek_dequeued,
1da177e4
LT
501 .drop = tbf_drop,
502 .init = tbf_init,
503 .reset = tbf_reset,
504 .destroy = tbf_destroy,
505 .change = tbf_change,
506 .dump = tbf_dump,
507 .owner = THIS_MODULE,
508};
509
510static int __init tbf_module_init(void)
511{
512 return register_qdisc(&tbf_qdisc_ops);
513}
514
515static void __exit tbf_module_exit(void)
516{
517 unregister_qdisc(&tbf_qdisc_ops);
518}
519module_init(tbf_module_init)
520module_exit(tbf_module_exit)
521MODULE_LICENSE("GPL");