]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/sched/sch_sfq.c
net_sched: sfq: fix mem alloc error recovery
[mirror_ubuntu-artful-kernel.git] / net / sched / sch_sfq.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_sfq.c Stochastic Fairness Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 */
11
1da177e4 12#include <linux/module.h>
1da177e4
LT
13#include <linux/types.h>
14#include <linux/kernel.h>
15#include <linux/jiffies.h>
16#include <linux/string.h>
1da177e4
LT
17#include <linux/in.h>
18#include <linux/errno.h>
1da177e4 19#include <linux/init.h>
1da177e4 20#include <linux/skbuff.h>
32740ddc 21#include <linux/jhash.h>
5a0e3ad6 22#include <linux/slab.h>
817fb15d 23#include <linux/vmalloc.h>
0ba48053 24#include <net/netlink.h>
1da177e4 25#include <net/pkt_sched.h>
11fca931 26#include <net/flow_keys.h>
1da177e4
LT
27
28
29/* Stochastic Fairness Queuing algorithm.
30 =======================================
31
32 Source:
33 Paul E. McKenney "Stochastic Fairness Queuing",
34 IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
35
36 Paul E. McKenney "Stochastic Fairness Queuing",
37 "Interworking: Research and Experience", v.2, 1991, p.113-131.
38
39
40 See also:
41 M. Shreedhar and George Varghese "Efficient Fair
42 Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
43
44
10297b99 45 This is not the thing that is usually called (W)FQ nowadays.
1da177e4
LT
46 It does not use any timestamp mechanism, but instead
47 processes queues in round-robin order.
48
49 ADVANTAGE:
50
51 - It is very cheap. Both CPU and memory requirements are minimal.
52
53 DRAWBACKS:
54
10297b99 55 - "Stochastic" -> It is not 100% fair.
1da177e4
LT
56 When hash collisions occur, several flows are considered as one.
57
58 - "Round-robin" -> It introduces larger delays than virtual clock
59 based schemes, and should not be used for isolating interactive
60 traffic from non-interactive. It means, that this scheduler
61 should be used as leaf of CBQ or P3, which put interactive traffic
62 to higher priority band.
63
64 We still need true WFQ for top level CSZ, but using WFQ
65 for the best effort traffic is absolutely pointless:
66 SFQ is superior for this purpose.
67
68 IMPLEMENTATION:
69 This implementation limits maximal queue length to 128;
eeaeb068 70 max mtu to 2^18-1; max 128 flows, number of hash buckets to 1024.
1da177e4 71 The only goal of this restrictions was that all data
eda83e3b 72 fit into one 4K page on 32bit arches.
1da177e4
LT
73
74 It is easy to increase these values, but not in flight. */
75
eda83e3b
ED
76#define SFQ_DEPTH 128 /* max number of packets per flow */
77#define SFQ_SLOTS 128 /* max number of flows */
78#define SFQ_EMPTY_SLOT 255
817fb15d
ED
79#define SFQ_DEFAULT_HASH_DIVISOR 1024
80
eeaeb068
ED
81/* We use 16 bits to store allot, and want to handle packets up to 64K
82 * Scale allot by 8 (1<<3) so that no overflow occurs.
83 */
84#define SFQ_ALLOT_SHIFT 3
85#define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
1da177e4 86
eda83e3b 87/* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
1da177e4
LT
88typedef unsigned char sfq_index;
89
eda83e3b
ED
90/*
91 * We dont use pointers to save space.
92 * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array
93 * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
94 * are 'pointers' to dep[] array
95 */
cc7ec456 96struct sfq_head {
1da177e4
LT
97 sfq_index next;
98 sfq_index prev;
99};
100
eda83e3b
ED
101struct sfq_slot {
102 struct sk_buff *skblist_next;
103 struct sk_buff *skblist_prev;
104 sfq_index qlen; /* number of skbs in skblist */
105 sfq_index next; /* next slot in sfq chain */
106 struct sfq_head dep; /* anchor in dep[] chains */
107 unsigned short hash; /* hash value (index in ht[]) */
108 short allot; /* credit for this slot */
109};
110
cc7ec456 111struct sfq_sched_data {
1da177e4
LT
112/* Parameters */
113 int perturb_period;
cc7ec456 114 unsigned int quantum; /* Allotment per round: MUST BE >= MTU */
1da177e4 115 int limit;
817fb15d 116 unsigned int divisor; /* number of slots in hash table */
1da177e4 117/* Variables */
7d2681a6 118 struct tcf_proto *filter_list;
1da177e4 119 struct timer_list perturb_timer;
32740ddc 120 u32 perturbation;
eda83e3b 121 sfq_index cur_depth; /* depth of longest slot */
eeaeb068 122 unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
eda83e3b 123 struct sfq_slot *tail; /* current slot in round */
817fb15d 124 sfq_index *ht; /* Hash table (divisor slots) */
eda83e3b
ED
125 struct sfq_slot slots[SFQ_SLOTS];
126 struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */
1da177e4
LT
127};
128
eda83e3b
ED
129/*
130 * sfq_head are either in a sfq_slot or in dep[] array
131 */
132static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
133{
134 if (val < SFQ_SLOTS)
135 return &q->slots[val].dep;
136 return &q->dep[val - SFQ_SLOTS];
137}
138
225d9b89
ED
139/*
140 * In order to be able to quickly rehash our queue when timer changes
141 * q->perturbation, we store flow_keys in skb->cb[]
142 */
143struct sfq_skb_cb {
144 struct flow_keys keys;
145};
146
147static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb)
148{
149 BUILD_BUG_ON(sizeof(skb->cb) <
150 sizeof(struct qdisc_skb_cb) + sizeof(struct sfq_skb_cb));
151 return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data;
152}
153
11fca931
ED
154static unsigned int sfq_hash(const struct sfq_sched_data *q,
155 const struct sk_buff *skb)
1da177e4 156{
225d9b89 157 const struct flow_keys *keys = &sfq_skb_cb(skb)->keys;
11fca931
ED
158 unsigned int hash;
159
225d9b89
ED
160 hash = jhash_3words((__force u32)keys->dst,
161 (__force u32)keys->src ^ keys->ip_proto,
162 (__force u32)keys->ports, q->perturbation);
11fca931 163 return hash & (q->divisor - 1);
1da177e4
LT
164}
165
7d2681a6
PM
166static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
167 int *qerr)
168{
169 struct sfq_sched_data *q = qdisc_priv(sch);
170 struct tcf_result res;
171 int result;
172
173 if (TC_H_MAJ(skb->priority) == sch->handle &&
174 TC_H_MIN(skb->priority) > 0 &&
817fb15d 175 TC_H_MIN(skb->priority) <= q->divisor)
7d2681a6
PM
176 return TC_H_MIN(skb->priority);
177
225d9b89
ED
178 if (!q->filter_list) {
179 skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys);
7d2681a6 180 return sfq_hash(q, skb) + 1;
225d9b89 181 }
7d2681a6 182
c27f339a 183 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
7d2681a6
PM
184 result = tc_classify(skb, q->filter_list, &res);
185 if (result >= 0) {
186#ifdef CONFIG_NET_CLS_ACT
187 switch (result) {
188 case TC_ACT_STOLEN:
189 case TC_ACT_QUEUED:
378a2f09 190 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
7d2681a6
PM
191 case TC_ACT_SHOT:
192 return 0;
193 }
194#endif
817fb15d 195 if (TC_H_MIN(res.classid) <= q->divisor)
7d2681a6
PM
196 return TC_H_MIN(res.classid);
197 }
198 return 0;
199}
200
eda83e3b
ED
201/*
202 * x : slot number [0 .. SFQ_SLOTS - 1]
203 */
1da177e4
LT
204static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
205{
206 sfq_index p, n;
eda83e3b
ED
207 int qlen = q->slots[x].qlen;
208
209 p = qlen + SFQ_SLOTS;
210 n = q->dep[qlen].next;
1da177e4 211
eda83e3b
ED
212 q->slots[x].dep.next = n;
213 q->slots[x].dep.prev = p;
214
215 q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */
216 sfq_dep_head(q, n)->prev = x;
1da177e4
LT
217}
218
eda83e3b
ED
219#define sfq_unlink(q, x, n, p) \
220 n = q->slots[x].dep.next; \
221 p = q->slots[x].dep.prev; \
222 sfq_dep_head(q, p)->next = n; \
223 sfq_dep_head(q, n)->prev = p
224
225
1da177e4
LT
226static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
227{
228 sfq_index p, n;
eda83e3b 229 int d;
1da177e4 230
eda83e3b 231 sfq_unlink(q, x, n, p);
1da177e4 232
eda83e3b
ED
233 d = q->slots[x].qlen--;
234 if (n == p && q->cur_depth == d)
235 q->cur_depth--;
1da177e4
LT
236 sfq_link(q, x);
237}
238
239static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
240{
241 sfq_index p, n;
242 int d;
243
eda83e3b 244 sfq_unlink(q, x, n, p);
1da177e4 245
eda83e3b
ED
246 d = ++q->slots[x].qlen;
247 if (q->cur_depth < d)
248 q->cur_depth = d;
1da177e4
LT
249 sfq_link(q, x);
250}
251
eda83e3b
ED
252/* helper functions : might be changed when/if skb use a standard list_head */
253
254/* remove one skb from tail of slot queue */
255static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
256{
257 struct sk_buff *skb = slot->skblist_prev;
258
259 slot->skblist_prev = skb->prev;
ee09b3c1 260 skb->prev->next = (struct sk_buff *)slot;
eda83e3b
ED
261 skb->next = skb->prev = NULL;
262 return skb;
263}
264
265/* remove one skb from head of slot queue */
266static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
267{
268 struct sk_buff *skb = slot->skblist_next;
269
270 slot->skblist_next = skb->next;
18c8d82a 271 skb->next->prev = (struct sk_buff *)slot;
eda83e3b
ED
272 skb->next = skb->prev = NULL;
273 return skb;
274}
275
276static inline void slot_queue_init(struct sfq_slot *slot)
277{
278 slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
279}
280
281/* add skb to slot queue (tail add) */
282static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
283{
284 skb->prev = slot->skblist_prev;
285 skb->next = (struct sk_buff *)slot;
286 slot->skblist_prev->next = skb;
287 slot->skblist_prev = skb;
288}
289
290#define slot_queue_walk(slot, skb) \
291 for (skb = slot->skblist_next; \
292 skb != (struct sk_buff *)slot; \
293 skb = skb->next)
294
1da177e4
LT
295static unsigned int sfq_drop(struct Qdisc *sch)
296{
297 struct sfq_sched_data *q = qdisc_priv(sch);
eda83e3b 298 sfq_index x, d = q->cur_depth;
1da177e4
LT
299 struct sk_buff *skb;
300 unsigned int len;
eda83e3b 301 struct sfq_slot *slot;
1da177e4 302
eda83e3b 303 /* Queue is full! Find the longest slot and drop tail packet from it */
1da177e4 304 if (d > 1) {
eda83e3b
ED
305 x = q->dep[d].next;
306 slot = &q->slots[x];
307drop:
308 skb = slot_dequeue_tail(slot);
0abf77e5 309 len = qdisc_pkt_len(skb);
1da177e4 310 sfq_dec(q, x);
eda83e3b 311 kfree_skb(skb);
1da177e4
LT
312 sch->q.qlen--;
313 sch->qstats.drops++;
f5539eb8 314 sch->qstats.backlog -= len;
1da177e4
LT
315 return len;
316 }
317
318 if (d == 1) {
319 /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
eda83e3b
ED
320 x = q->tail->next;
321 slot = &q->slots[x];
322 q->tail->next = slot->next;
323 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
324 goto drop;
1da177e4
LT
325 }
326
327 return 0;
328}
329
330static int
6f9e98f7 331sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
1da177e4
LT
332{
333 struct sfq_sched_data *q = qdisc_priv(sch);
7d2681a6 334 unsigned int hash;
8efa8854 335 sfq_index x, qlen;
eda83e3b 336 struct sfq_slot *slot;
7f3ff4f6 337 int uninitialized_var(ret);
7d2681a6
PM
338
339 hash = sfq_classify(skb, sch, &ret);
340 if (hash == 0) {
c27f339a 341 if (ret & __NET_XMIT_BYPASS)
7d2681a6
PM
342 sch->qstats.drops++;
343 kfree_skb(skb);
344 return ret;
345 }
346 hash--;
1da177e4
LT
347
348 x = q->ht[hash];
eda83e3b
ED
349 slot = &q->slots[x];
350 if (x == SFQ_EMPTY_SLOT) {
351 x = q->dep[0].next; /* get a free slot */
352 q->ht[hash] = x;
353 slot = &q->slots[x];
354 slot->hash = hash;
1da177e4 355 }
6f9e98f7 356
eda83e3b 357 /* If selected queue has length q->limit, do simple tail drop,
32740ddc
AK
358 * i.e. drop _this_ packet.
359 */
eda83e3b 360 if (slot->qlen >= q->limit)
32740ddc
AK
361 return qdisc_drop(skb, sch);
362
0abf77e5 363 sch->qstats.backlog += qdisc_pkt_len(skb);
eda83e3b 364 slot_queue_add(slot, skb);
1da177e4 365 sfq_inc(q, x);
eda83e3b
ED
366 if (slot->qlen == 1) { /* The flow is new */
367 if (q->tail == NULL) { /* It is the first flow */
368 slot->next = x;
d47a0ac7 369 q->tail = slot;
1da177e4 370 } else {
eda83e3b
ED
371 slot->next = q->tail->next;
372 q->tail->next = x;
1da177e4 373 }
eeaeb068 374 slot->allot = q->scaled_quantum;
1da177e4 375 }
9190b3b3 376 if (++sch->q.qlen <= q->limit)
9871e50e 377 return NET_XMIT_SUCCESS;
1da177e4 378
8efa8854 379 qlen = slot->qlen;
1da177e4 380 sfq_drop(sch);
8efa8854
ED
381 /* Return Congestion Notification only if we dropped a packet
382 * from this flow.
383 */
e1738bd9
ED
384 if (qlen != slot->qlen)
385 return NET_XMIT_CN;
386
387 /* As we dropped a packet, better let upper stack know this */
388 qdisc_tree_decrease_qlen(sch, 1);
389 return NET_XMIT_SUCCESS;
1da177e4
LT
390}
391
1da177e4 392static struct sk_buff *
6f9e98f7 393sfq_dequeue(struct Qdisc *sch)
1da177e4
LT
394{
395 struct sfq_sched_data *q = qdisc_priv(sch);
396 struct sk_buff *skb;
aa3e2199 397 sfq_index a, next_a;
eda83e3b 398 struct sfq_slot *slot;
1da177e4
LT
399
400 /* No active slots */
eda83e3b 401 if (q->tail == NULL)
1da177e4
LT
402 return NULL;
403
eeaeb068 404next_slot:
eda83e3b
ED
405 a = q->tail->next;
406 slot = &q->slots[a];
eeaeb068
ED
407 if (slot->allot <= 0) {
408 q->tail = slot;
409 slot->allot += q->scaled_quantum;
410 goto next_slot;
411 }
eda83e3b 412 skb = slot_dequeue_head(slot);
1da177e4 413 sfq_dec(q, a);
9190b3b3 414 qdisc_bstats_update(sch, skb);
1da177e4 415 sch->q.qlen--;
0abf77e5 416 sch->qstats.backlog -= qdisc_pkt_len(skb);
1da177e4
LT
417
418 /* Is the slot empty? */
eda83e3b
ED
419 if (slot->qlen == 0) {
420 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
421 next_a = slot->next;
aa3e2199 422 if (a == next_a) {
eda83e3b 423 q->tail = NULL; /* no more active slots */
1da177e4
LT
424 return skb;
425 }
eda83e3b 426 q->tail->next = next_a;
eeaeb068
ED
427 } else {
428 slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
1da177e4
LT
429 }
430 return skb;
431}
432
433static void
6f9e98f7 434sfq_reset(struct Qdisc *sch)
1da177e4
LT
435{
436 struct sk_buff *skb;
437
438 while ((skb = sfq_dequeue(sch)) != NULL)
439 kfree_skb(skb);
440}
441
225d9b89
ED
442/*
443 * When q->perturbation is changed, we rehash all queued skbs
444 * to avoid OOO (Out Of Order) effects.
445 * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
446 * counters.
447 */
448static void sfq_rehash(struct sfq_sched_data *q)
449{
450 struct sk_buff *skb;
451 int i;
452 struct sfq_slot *slot;
453 struct sk_buff_head list;
454
455 __skb_queue_head_init(&list);
456
457 for (i = 0; i < SFQ_SLOTS; i++) {
458 slot = &q->slots[i];
459 if (!slot->qlen)
460 continue;
461 while (slot->qlen) {
462 skb = slot_dequeue_head(slot);
463 sfq_dec(q, i);
464 __skb_queue_tail(&list, skb);
465 }
466 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
467 }
468 q->tail = NULL;
469
470 while ((skb = __skb_dequeue(&list)) != NULL) {
471 unsigned int hash = sfq_hash(q, skb);
472 sfq_index x = q->ht[hash];
473
474 slot = &q->slots[x];
475 if (x == SFQ_EMPTY_SLOT) {
476 x = q->dep[0].next; /* get a free slot */
477 q->ht[hash] = x;
478 slot = &q->slots[x];
479 slot->hash = hash;
480 }
481 slot_queue_add(slot, skb);
482 sfq_inc(q, x);
483 if (slot->qlen == 1) { /* The flow is new */
484 if (q->tail == NULL) { /* It is the first flow */
485 slot->next = x;
486 } else {
487 slot->next = q->tail->next;
488 q->tail->next = x;
489 }
490 q->tail = slot;
491 slot->allot = q->scaled_quantum;
492 }
493 }
494}
495
1da177e4
LT
496static void sfq_perturbation(unsigned long arg)
497{
6f9e98f7 498 struct Qdisc *sch = (struct Qdisc *)arg;
1da177e4 499 struct sfq_sched_data *q = qdisc_priv(sch);
225d9b89 500 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
1da177e4 501
225d9b89 502 spin_lock(root_lock);
d46f8dd8 503 q->perturbation = net_random();
225d9b89
ED
504 if (!q->filter_list && q->tail)
505 sfq_rehash(q);
506 spin_unlock(root_lock);
1da177e4 507
32740ddc
AK
508 if (q->perturb_period)
509 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
1da177e4
LT
510}
511
1e90474c 512static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
513{
514 struct sfq_sched_data *q = qdisc_priv(sch);
1e90474c 515 struct tc_sfq_qopt *ctl = nla_data(opt);
5e50da01 516 unsigned int qlen;
1da177e4 517
1e90474c 518 if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
1da177e4
LT
519 return -EINVAL;
520
119b3d38 521 if (ctl->divisor &&
522 (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
523 return -EINVAL;
524
1da177e4 525 sch_tree_lock(sch);
5ce2d488 526 q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
eeaeb068 527 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
6f9e98f7 528 q->perturb_period = ctl->perturb_period * HZ;
1da177e4 529 if (ctl->limit)
32740ddc 530 q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
119b3d38 531 if (ctl->divisor)
817fb15d 532 q->divisor = ctl->divisor;
5e50da01 533 qlen = sch->q.qlen;
5588b40d 534 while (sch->q.qlen > q->limit)
1da177e4 535 sfq_drop(sch);
5e50da01 536 qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
1da177e4
LT
537
538 del_timer(&q->perturb_timer);
539 if (q->perturb_period) {
32740ddc 540 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
d46f8dd8 541 q->perturbation = net_random();
1da177e4
LT
542 }
543 sch_tree_unlock(sch);
544 return 0;
545}
546
bd16a6cc
ED
547static void *sfq_alloc(size_t sz)
548{
549 void *ptr = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN);
550
551 if (!ptr)
552 ptr = vmalloc(sz);
553 return ptr;
554}
555
556static void sfq_free(void *addr)
557{
558 if (addr) {
559 if (is_vmalloc_addr(addr))
560 vfree(addr);
561 else
562 kfree(addr);
563 }
564}
565
566static void sfq_destroy(struct Qdisc *sch)
567{
568 struct sfq_sched_data *q = qdisc_priv(sch);
569
570 tcf_destroy_chain(&q->filter_list);
571 q->perturb_period = 0;
572 del_timer_sync(&q->perturb_timer);
573 sfq_free(q->ht);
574}
575
1e90474c 576static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
577{
578 struct sfq_sched_data *q = qdisc_priv(sch);
579 int i;
580
d3e99483 581 q->perturb_timer.function = sfq_perturbation;
c19a28e1 582 q->perturb_timer.data = (unsigned long)sch;
d3e99483 583 init_timer_deferrable(&q->perturb_timer);
1da177e4 584
6f9e98f7 585 for (i = 0; i < SFQ_DEPTH; i++) {
eda83e3b
ED
586 q->dep[i].next = i + SFQ_SLOTS;
587 q->dep[i].prev = i + SFQ_SLOTS;
1da177e4 588 }
6f9e98f7 589
32740ddc 590 q->limit = SFQ_DEPTH - 1;
eda83e3b
ED
591 q->cur_depth = 0;
592 q->tail = NULL;
817fb15d 593 q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
1da177e4 594 if (opt == NULL) {
5ce2d488 595 q->quantum = psched_mtu(qdisc_dev(sch));
eeaeb068 596 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
1da177e4 597 q->perturb_period = 0;
d46f8dd8 598 q->perturbation = net_random();
1da177e4
LT
599 } else {
600 int err = sfq_change(sch, opt);
601 if (err)
602 return err;
603 }
6f9e98f7 604
bd16a6cc
ED
605 q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor);
606 if (!q->ht) {
607 sfq_destroy(sch);
817fb15d 608 return -ENOMEM;
bd16a6cc 609 }
817fb15d
ED
610 for (i = 0; i < q->divisor; i++)
611 q->ht[i] = SFQ_EMPTY_SLOT;
612
18c8d82a
ED
613 for (i = 0; i < SFQ_SLOTS; i++) {
614 slot_queue_init(&q->slots[i]);
1da177e4 615 sfq_link(q, i);
18c8d82a 616 }
23624935
ED
617 if (q->limit >= 1)
618 sch->flags |= TCQ_F_CAN_BYPASS;
619 else
620 sch->flags &= ~TCQ_F_CAN_BYPASS;
1da177e4
LT
621 return 0;
622}
623
1da177e4
LT
624static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
625{
626 struct sfq_sched_data *q = qdisc_priv(sch);
27a884dc 627 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
628 struct tc_sfq_qopt opt;
629
630 opt.quantum = q->quantum;
6f9e98f7 631 opt.perturb_period = q->perturb_period / HZ;
1da177e4
LT
632
633 opt.limit = q->limit;
817fb15d 634 opt.divisor = q->divisor;
cdec7e50 635 opt.flows = q->limit;
1da177e4 636
1e90474c 637 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
1da177e4
LT
638
639 return skb->len;
640
1e90474c 641nla_put_failure:
dc5fc579 642 nlmsg_trim(skb, b);
1da177e4
LT
643 return -1;
644}
645
41065fba
JP
646static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg)
647{
648 return NULL;
649}
650
7d2681a6
PM
651static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
652{
653 return 0;
654}
655
eb4a5527
JP
656static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
657 u32 classid)
658{
23624935
ED
659 /* we cannot bypass queue discipline anymore */
660 sch->flags &= ~TCQ_F_CAN_BYPASS;
eb4a5527
JP
661 return 0;
662}
663
da7115d9
JP
664static void sfq_put(struct Qdisc *q, unsigned long cl)
665{
666}
667
7d2681a6
PM
668static struct tcf_proto **sfq_find_tcf(struct Qdisc *sch, unsigned long cl)
669{
670 struct sfq_sched_data *q = qdisc_priv(sch);
671
672 if (cl)
673 return NULL;
674 return &q->filter_list;
675}
676
94de78d1
PM
677static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
678 struct sk_buff *skb, struct tcmsg *tcm)
679{
680 tcm->tcm_handle |= TC_H_MIN(cl);
681 return 0;
682}
683
684static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
685 struct gnet_dump *d)
686{
687 struct sfq_sched_data *q = qdisc_priv(sch);
ee09b3c1
ED
688 sfq_index idx = q->ht[cl - 1];
689 struct gnet_stats_queue qs = { 0 };
690 struct tc_sfq_xstats xstats = { 0 };
c4266263
ED
691 struct sk_buff *skb;
692
ee09b3c1
ED
693 if (idx != SFQ_EMPTY_SLOT) {
694 const struct sfq_slot *slot = &q->slots[idx];
94de78d1 695
eeaeb068 696 xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
ee09b3c1
ED
697 qs.qlen = slot->qlen;
698 slot_queue_walk(slot, skb)
699 qs.backlog += qdisc_pkt_len(skb);
700 }
94de78d1
PM
701 if (gnet_stats_copy_queue(d, &qs) < 0)
702 return -1;
703 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
704}
705
7d2681a6
PM
706static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
707{
94de78d1
PM
708 struct sfq_sched_data *q = qdisc_priv(sch);
709 unsigned int i;
710
711 if (arg->stop)
712 return;
713
817fb15d 714 for (i = 0; i < q->divisor; i++) {
eda83e3b 715 if (q->ht[i] == SFQ_EMPTY_SLOT ||
94de78d1
PM
716 arg->count < arg->skip) {
717 arg->count++;
718 continue;
719 }
720 if (arg->fn(sch, i + 1, arg) < 0) {
721 arg->stop = 1;
722 break;
723 }
724 arg->count++;
725 }
7d2681a6
PM
726}
727
728static const struct Qdisc_class_ops sfq_class_ops = {
41065fba 729 .leaf = sfq_leaf,
7d2681a6 730 .get = sfq_get,
da7115d9 731 .put = sfq_put,
7d2681a6 732 .tcf_chain = sfq_find_tcf,
eb4a5527 733 .bind_tcf = sfq_bind,
da7115d9 734 .unbind_tcf = sfq_put,
94de78d1
PM
735 .dump = sfq_dump_class,
736 .dump_stats = sfq_dump_class_stats,
7d2681a6
PM
737 .walk = sfq_walk,
738};
739
20fea08b 740static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
7d2681a6 741 .cl_ops = &sfq_class_ops,
1da177e4
LT
742 .id = "sfq",
743 .priv_size = sizeof(struct sfq_sched_data),
744 .enqueue = sfq_enqueue,
745 .dequeue = sfq_dequeue,
07bd8df5 746 .peek = qdisc_peek_dequeued,
1da177e4
LT
747 .drop = sfq_drop,
748 .init = sfq_init,
749 .reset = sfq_reset,
750 .destroy = sfq_destroy,
751 .change = NULL,
752 .dump = sfq_dump,
753 .owner = THIS_MODULE,
754};
755
756static int __init sfq_module_init(void)
757{
758 return register_qdisc(&sfq_qdisc_ops);
759}
10297b99 760static void __exit sfq_module_exit(void)
1da177e4
LT
761{
762 unregister_qdisc(&sfq_qdisc_ops);
763}
764module_init(sfq_module_init)
765module_exit(sfq_module_exit)
766MODULE_LICENSE("GPL");