]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/sched/sch_choke.c
net: rcu-ify tcf_proto
[mirror_ubuntu-artful-kernel.git] / net / sched / sch_choke.c
CommitLineData
45e14433 1/*
2 * net/sched/sch_choke.c CHOKE scheduler
3 *
4 * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com>
5 * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * version 2 as published by the Free Software Foundation.
10 *
11 */
12
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/kernel.h>
16#include <linux/skbuff.h>
cdfb74d4 17#include <linux/vmalloc.h>
45e14433 18#include <net/pkt_sched.h>
19#include <net/inet_ecn.h>
20#include <net/red.h>
2bcc34bb 21#include <net/flow_keys.h>
45e14433 22
23/*
24 CHOKe stateless AQM for fair bandwidth allocation
25 =================================================
26
27 CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for
28 unresponsive flows) is a variant of RED that penalizes misbehaving flows but
29 maintains no flow state. The difference from RED is an additional step
30 during the enqueuing process. If average queue size is over the
31 low threshold (qmin), a packet is chosen at random from the queue.
32 If both the new and chosen packet are from the same flow, both
33 are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it
34 needs to access packets in queue randomly. It has a minimal class
35 interface to allow overriding the builtin flow classifier with
36 filters.
37
38 Source:
39 R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless
40 Active Queue Management Scheme for Approximating Fair Bandwidth Allocation",
41 IEEE INFOCOM, 2000.
42
43 A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial
44 Characteristics", IEEE/ACM Transactions on Networking, 2004
45
46 */
47
48/* Upper bound on size of sk_buff table (packets) */
49#define CHOKE_MAX_QUEUE (128*1024 - 1)
50
51struct choke_sched_data {
52/* Parameters */
53 u32 limit;
54 unsigned char flags;
55
56 struct red_parms parms;
57
58/* Variables */
eeca6688 59 struct red_vars vars;
25d8c0d5 60 struct tcf_proto __rcu *filter_list;
45e14433 61 struct {
62 u32 prob_drop; /* Early probability drops */
63 u32 prob_mark; /* Early probability marks */
64 u32 forced_drop; /* Forced drops, qavg > max_thresh */
65 u32 forced_mark; /* Forced marks, qavg > max_thresh */
66 u32 pdrop; /* Drops due to queue limits */
67 u32 other; /* Drops due to drop() calls */
68 u32 matched; /* Drops to flow match */
69 } stats;
70
71 unsigned int head;
72 unsigned int tail;
73
74 unsigned int tab_mask; /* size - 1 */
75
76 struct sk_buff **tab;
77};
78
45e14433 79/* number of elements in queue including holes */
80static unsigned int choke_len(const struct choke_sched_data *q)
81{
82 return (q->tail - q->head) & q->tab_mask;
83}
84
85/* Is ECN parameter configured */
86static int use_ecn(const struct choke_sched_data *q)
87{
88 return q->flags & TC_RED_ECN;
89}
90
91/* Should packets over max just be dropped (versus marked) */
92static int use_harddrop(const struct choke_sched_data *q)
93{
94 return q->flags & TC_RED_HARDDROP;
95}
96
97/* Move head pointer forward to skip over holes */
98static void choke_zap_head_holes(struct choke_sched_data *q)
99{
100 do {
101 q->head = (q->head + 1) & q->tab_mask;
102 if (q->head == q->tail)
103 break;
104 } while (q->tab[q->head] == NULL);
105}
106
107/* Move tail pointer backwards to reuse holes */
108static void choke_zap_tail_holes(struct choke_sched_data *q)
109{
110 do {
111 q->tail = (q->tail - 1) & q->tab_mask;
112 if (q->head == q->tail)
113 break;
114 } while (q->tab[q->tail] == NULL);
115}
116
117/* Drop packet from queue array by creating a "hole" */
118static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
119{
120 struct choke_sched_data *q = qdisc_priv(sch);
121 struct sk_buff *skb = q->tab[idx];
122
123 q->tab[idx] = NULL;
124
125 if (idx == q->head)
126 choke_zap_head_holes(q);
127 if (idx == q->tail)
128 choke_zap_tail_holes(q);
129
130 sch->qstats.backlog -= qdisc_pkt_len(skb);
131 qdisc_drop(skb, sch);
132 qdisc_tree_decrease_qlen(sch, 1);
133 --sch->q.qlen;
134}
135
26f70e12 136struct choke_skb_cb {
2bcc34bb
ED
137 u16 classid;
138 u8 keys_valid;
139 struct flow_keys keys;
26f70e12
ED
140};
141
142static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
143{
16bda13d 144 qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
26f70e12
ED
145 return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
146}
147
45e14433 148static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
149{
26f70e12 150 choke_skb_cb(skb)->classid = classid;
45e14433 151}
152
153static u16 choke_get_classid(const struct sk_buff *skb)
154{
26f70e12 155 return choke_skb_cb(skb)->classid;
45e14433 156}
157
2bcc34bb
ED
158/*
159 * Compare flow of two packets
160 * Returns true only if source and destination address and port match.
161 * false for special cases
162 */
163static bool choke_match_flow(struct sk_buff *skb1,
164 struct sk_buff *skb2)
165{
166 if (skb1->protocol != skb2->protocol)
167 return false;
168
169 if (!choke_skb_cb(skb1)->keys_valid) {
170 choke_skb_cb(skb1)->keys_valid = 1;
171 skb_flow_dissect(skb1, &choke_skb_cb(skb1)->keys);
172 }
173
174 if (!choke_skb_cb(skb2)->keys_valid) {
175 choke_skb_cb(skb2)->keys_valid = 1;
176 skb_flow_dissect(skb2, &choke_skb_cb(skb2)->keys);
177 }
178
179 return !memcmp(&choke_skb_cb(skb1)->keys,
180 &choke_skb_cb(skb2)->keys,
181 sizeof(struct flow_keys));
182}
183
45e14433 184/*
185 * Classify flow using either:
186 * 1. pre-existing classification result in skb
187 * 2. fast internal classification
188 * 3. use TC filter based classification
189 */
190static bool choke_classify(struct sk_buff *skb,
191 struct Qdisc *sch, int *qerr)
192
193{
194 struct choke_sched_data *q = qdisc_priv(sch);
195 struct tcf_result res;
25d8c0d5 196 struct tcf_proto *fl;
45e14433 197 int result;
198
25d8c0d5
JF
199 fl = rcu_dereference_bh(q->filter_list);
200 result = tc_classify(skb, fl, &res);
45e14433 201 if (result >= 0) {
202#ifdef CONFIG_NET_CLS_ACT
203 switch (result) {
204 case TC_ACT_STOLEN:
205 case TC_ACT_QUEUED:
206 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
207 case TC_ACT_SHOT:
208 return false;
209 }
210#endif
211 choke_set_classid(skb, TC_H_MIN(res.classid));
212 return true;
213 }
214
215 return false;
216}
217
218/*
219 * Select a packet at random from queue
220 * HACK: since queue can have holes from previous deletion; retry several
221 * times to find a random skb but then just give up and return the head
222 * Will return NULL if queue is empty (q->head == q->tail)
223 */
224static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
225 unsigned int *pidx)
226{
227 struct sk_buff *skb;
228 int retrys = 3;
229
230 do {
f337db64 231 *pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask;
45e14433 232 skb = q->tab[*pidx];
233 if (skb)
234 return skb;
235 } while (--retrys > 0);
236
237 return q->tab[*pidx = q->head];
238}
239
240/*
241 * Compare new packet with random packet in queue
242 * returns true if matched and sets *pidx
243 */
244static bool choke_match_random(const struct choke_sched_data *q,
245 struct sk_buff *nskb,
246 unsigned int *pidx)
247{
248 struct sk_buff *oskb;
249
250 if (q->head == q->tail)
251 return false;
252
253 oskb = choke_peek_random(q, pidx);
25d8c0d5 254 if (rcu_access_pointer(q->filter_list))
45e14433 255 return choke_get_classid(nskb) == choke_get_classid(oskb);
256
257 return choke_match_flow(oskb, nskb);
258}
259
260static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
261{
25d8c0d5 262 int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
45e14433 263 struct choke_sched_data *q = qdisc_priv(sch);
eeca6688 264 const struct red_parms *p = &q->parms;
45e14433 265
25d8c0d5 266 if (rcu_access_pointer(q->filter_list)) {
45e14433 267 /* If using external classifiers, get result and record it. */
268 if (!choke_classify(skb, sch, &ret))
269 goto other_drop; /* Packet was eaten by filter */
270 }
271
2bcc34bb 272 choke_skb_cb(skb)->keys_valid = 0;
45e14433 273 /* Compute average queue usage (see RED) */
eeca6688
ED
274 q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
275 if (red_is_idling(&q->vars))
276 red_end_of_idle_period(&q->vars);
45e14433 277
278 /* Is queue small? */
eeca6688
ED
279 if (q->vars.qavg <= p->qth_min)
280 q->vars.qcount = -1;
45e14433 281 else {
282 unsigned int idx;
283
284 /* Draw a packet at random from queue and compare flow */
285 if (choke_match_random(q, skb, &idx)) {
286 q->stats.matched++;
287 choke_drop_by_idx(sch, idx);
288 goto congestion_drop;
289 }
290
291 /* Queue is large, always mark/drop */
eeca6688
ED
292 if (q->vars.qavg > p->qth_max) {
293 q->vars.qcount = -1;
45e14433 294
295 sch->qstats.overlimits++;
296 if (use_harddrop(q) || !use_ecn(q) ||
297 !INET_ECN_set_ce(skb)) {
298 q->stats.forced_drop++;
299 goto congestion_drop;
300 }
301
302 q->stats.forced_mark++;
eeca6688
ED
303 } else if (++q->vars.qcount) {
304 if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
305 q->vars.qcount = 0;
306 q->vars.qR = red_random(p);
45e14433 307
308 sch->qstats.overlimits++;
309 if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
310 q->stats.prob_drop++;
311 goto congestion_drop;
312 }
313
314 q->stats.prob_mark++;
315 }
316 } else
eeca6688 317 q->vars.qR = red_random(p);
45e14433 318 }
319
320 /* Admit new packet */
321 if (sch->q.qlen < q->limit) {
322 q->tab[q->tail] = skb;
323 q->tail = (q->tail + 1) & q->tab_mask;
324 ++sch->q.qlen;
325 sch->qstats.backlog += qdisc_pkt_len(skb);
326 return NET_XMIT_SUCCESS;
327 }
328
329 q->stats.pdrop++;
17045755 330 return qdisc_drop(skb, sch);
45e14433 331
17045755 332congestion_drop:
45e14433 333 qdisc_drop(skb, sch);
334 return NET_XMIT_CN;
335
17045755 336other_drop:
45e14433 337 if (ret & __NET_XMIT_BYPASS)
338 sch->qstats.drops++;
339 kfree_skb(skb);
340 return ret;
341}
342
343static struct sk_buff *choke_dequeue(struct Qdisc *sch)
344{
345 struct choke_sched_data *q = qdisc_priv(sch);
346 struct sk_buff *skb;
347
348 if (q->head == q->tail) {
eeca6688
ED
349 if (!red_is_idling(&q->vars))
350 red_start_of_idle_period(&q->vars);
45e14433 351 return NULL;
352 }
353
354 skb = q->tab[q->head];
355 q->tab[q->head] = NULL;
356 choke_zap_head_holes(q);
357 --sch->q.qlen;
358 sch->qstats.backlog -= qdisc_pkt_len(skb);
359 qdisc_bstats_update(sch, skb);
360
361 return skb;
362}
363
364static unsigned int choke_drop(struct Qdisc *sch)
365{
366 struct choke_sched_data *q = qdisc_priv(sch);
367 unsigned int len;
368
369 len = qdisc_queue_drop(sch);
370 if (len > 0)
371 q->stats.other++;
372 else {
eeca6688
ED
373 if (!red_is_idling(&q->vars))
374 red_start_of_idle_period(&q->vars);
45e14433 375 }
376
377 return len;
378}
379
380static void choke_reset(struct Qdisc *sch)
381{
382 struct choke_sched_data *q = qdisc_priv(sch);
383
eeca6688 384 red_restart(&q->vars);
45e14433 385}
386
387static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
388 [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) },
389 [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE },
a73ed26b 390 [TCA_CHOKE_MAX_P] = { .type = NLA_U32 },
45e14433 391};
392
393
394static void choke_free(void *addr)
395{
4cb28970 396 kvfree(addr);
45e14433 397}
398
399static int choke_change(struct Qdisc *sch, struct nlattr *opt)
400{
401 struct choke_sched_data *q = qdisc_priv(sch);
402 struct nlattr *tb[TCA_CHOKE_MAX + 1];
403 const struct tc_red_qopt *ctl;
404 int err;
405 struct sk_buff **old = NULL;
406 unsigned int mask;
a73ed26b 407 u32 max_P;
45e14433 408
409 if (opt == NULL)
410 return -EINVAL;
411
412 err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy);
413 if (err < 0)
414 return err;
415
416 if (tb[TCA_CHOKE_PARMS] == NULL ||
417 tb[TCA_CHOKE_STAB] == NULL)
418 return -EINVAL;
419
a73ed26b
ED
420 max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
421
45e14433 422 ctl = nla_data(tb[TCA_CHOKE_PARMS]);
423
424 if (ctl->limit > CHOKE_MAX_QUEUE)
425 return -EINVAL;
426
427 mask = roundup_pow_of_two(ctl->limit + 1) - 1;
428 if (mask != q->tab_mask) {
429 struct sk_buff **ntab;
430
8be04b93
JP
431 ntab = kcalloc(mask + 1, sizeof(struct sk_buff *),
432 GFP_KERNEL | __GFP_NOWARN);
45e14433 433 if (!ntab)
434 ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *));
435 if (!ntab)
436 return -ENOMEM;
437
438 sch_tree_lock(sch);
439 old = q->tab;
440 if (old) {
441 unsigned int oqlen = sch->q.qlen, tail = 0;
442
443 while (q->head != q->tail) {
444 struct sk_buff *skb = q->tab[q->head];
445
446 q->head = (q->head + 1) & q->tab_mask;
447 if (!skb)
448 continue;
449 if (tail < mask) {
450 ntab[tail++] = skb;
451 continue;
452 }
453 sch->qstats.backlog -= qdisc_pkt_len(skb);
454 --sch->q.qlen;
455 qdisc_drop(skb, sch);
456 }
457 qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen);
458 q->head = 0;
459 q->tail = tail;
460 }
461
462 q->tab_mask = mask;
463 q->tab = ntab;
464 } else
465 sch_tree_lock(sch);
466
467 q->flags = ctl->flags;
468 q->limit = ctl->limit;
469
470 red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
471 ctl->Plog, ctl->Scell_log,
a73ed26b
ED
472 nla_data(tb[TCA_CHOKE_STAB]),
473 max_P);
eeca6688 474 red_set_vars(&q->vars);
45e14433 475
476 if (q->head == q->tail)
eeca6688 477 red_end_of_idle_period(&q->vars);
45e14433 478
479 sch_tree_unlock(sch);
480 choke_free(old);
481 return 0;
482}
483
484static int choke_init(struct Qdisc *sch, struct nlattr *opt)
485{
486 return choke_change(sch, opt);
487}
488
489static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
490{
491 struct choke_sched_data *q = qdisc_priv(sch);
492 struct nlattr *opts = NULL;
493 struct tc_red_qopt opt = {
494 .limit = q->limit,
495 .flags = q->flags,
496 .qth_min = q->parms.qth_min >> q->parms.Wlog,
497 .qth_max = q->parms.qth_max >> q->parms.Wlog,
498 .Wlog = q->parms.Wlog,
499 .Plog = q->parms.Plog,
500 .Scell_log = q->parms.Scell_log,
501 };
502
503 opts = nla_nest_start(skb, TCA_OPTIONS);
504 if (opts == NULL)
505 goto nla_put_failure;
506
1b34ec43
DM
507 if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
508 nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
509 goto nla_put_failure;
45e14433 510 return nla_nest_end(skb, opts);
511
512nla_put_failure:
513 nla_nest_cancel(skb, opts);
514 return -EMSGSIZE;
515}
516
517static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
518{
519 struct choke_sched_data *q = qdisc_priv(sch);
520 struct tc_choke_xstats st = {
521 .early = q->stats.prob_drop + q->stats.forced_drop,
522 .marked = q->stats.prob_mark + q->stats.forced_mark,
523 .pdrop = q->stats.pdrop,
524 .other = q->stats.other,
525 .matched = q->stats.matched,
526 };
527
528 return gnet_stats_copy_app(d, &st, sizeof(st));
529}
530
531static void choke_destroy(struct Qdisc *sch)
532{
533 struct choke_sched_data *q = qdisc_priv(sch);
534
535 tcf_destroy_chain(&q->filter_list);
536 choke_free(q->tab);
537}
538
539static struct Qdisc *choke_leaf(struct Qdisc *sch, unsigned long arg)
540{
541 return NULL;
542}
543
544static unsigned long choke_get(struct Qdisc *sch, u32 classid)
545{
546 return 0;
547}
548
549static void choke_put(struct Qdisc *q, unsigned long cl)
550{
551}
552
553static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent,
554 u32 classid)
555{
556 return 0;
557}
558
25d8c0d5
JF
559static struct tcf_proto __rcu **choke_find_tcf(struct Qdisc *sch,
560 unsigned long cl)
45e14433 561{
562 struct choke_sched_data *q = qdisc_priv(sch);
563
564 if (cl)
565 return NULL;
566 return &q->filter_list;
567}
568
569static int choke_dump_class(struct Qdisc *sch, unsigned long cl,
570 struct sk_buff *skb, struct tcmsg *tcm)
571{
572 tcm->tcm_handle |= TC_H_MIN(cl);
573 return 0;
574}
575
576static void choke_walk(struct Qdisc *sch, struct qdisc_walker *arg)
577{
578 if (!arg->stop) {
579 if (arg->fn(sch, 1, arg) < 0) {
580 arg->stop = 1;
581 return;
582 }
583 arg->count++;
584 }
585}
586
587static const struct Qdisc_class_ops choke_class_ops = {
588 .leaf = choke_leaf,
589 .get = choke_get,
590 .put = choke_put,
591 .tcf_chain = choke_find_tcf,
592 .bind_tcf = choke_bind,
593 .unbind_tcf = choke_put,
594 .dump = choke_dump_class,
595 .walk = choke_walk,
596};
597
598static struct sk_buff *choke_peek_head(struct Qdisc *sch)
599{
600 struct choke_sched_data *q = qdisc_priv(sch);
601
602 return (q->head != q->tail) ? q->tab[q->head] : NULL;
603}
604
605static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
606 .id = "choke",
607 .priv_size = sizeof(struct choke_sched_data),
608
609 .enqueue = choke_enqueue,
610 .dequeue = choke_dequeue,
611 .peek = choke_peek_head,
612 .drop = choke_drop,
613 .init = choke_init,
614 .destroy = choke_destroy,
615 .reset = choke_reset,
616 .change = choke_change,
617 .dump = choke_dump,
618 .dump_stats = choke_dump_stats,
619 .owner = THIS_MODULE,
620};
621
622static int __init choke_module_init(void)
623{
624 return register_qdisc(&choke_qdisc_ops);
625}
626
627static void __exit choke_module_exit(void)
628{
629 unregister_qdisc(&choke_qdisc_ops);
630}
631
632module_init(choke_module_init)
633module_exit(choke_module_exit)
634
635MODULE_LICENSE("GPL");