]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * net/sched/sch_generic.c Generic packet scheduler routines. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU General Public License | |
6 | * as published by the Free Software Foundation; either version | |
7 | * 2 of the License, or (at your option) any later version. | |
8 | * | |
9 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | |
10 | * Jamal Hadi Salim, <hadi@cyberus.ca> 990601 | |
11 | * - Ingress support | |
12 | */ | |
13 | ||
1da177e4 | 14 | #include <linux/bitops.h> |
1da177e4 LT |
15 | #include <linux/module.h> |
16 | #include <linux/types.h> | |
17 | #include <linux/kernel.h> | |
18 | #include <linux/sched.h> | |
19 | #include <linux/string.h> | |
1da177e4 | 20 | #include <linux/errno.h> |
1da177e4 LT |
21 | #include <linux/netdevice.h> |
22 | #include <linux/skbuff.h> | |
23 | #include <linux/rtnetlink.h> | |
24 | #include <linux/init.h> | |
25 | #include <linux/rcupdate.h> | |
26 | #include <linux/list.h> | |
1da177e4 LT |
27 | #include <net/pkt_sched.h> |
28 | ||
29 | /* Main transmission queue. */ | |
30 | ||
0463d4ae | 31 | /* Modifications to data participating in scheduling must be protected with |
dc2b4847 | 32 | * queue->lock spinlock. |
0463d4ae PM |
33 | * |
34 | * The idea is the following: | |
35 | * - enqueue, dequeue are serialized via top level device | |
dc2b4847 | 36 | * spinlock queue->lock. |
fd44de7c | 37 | * - ingress filtering is serialized via top level device |
555353cf | 38 | * spinlock dev->rx_queue.lock. |
0463d4ae | 39 | * - updates to tree and tree walking are only done under the rtnl mutex. |
1da177e4 | 40 | */ |
1da177e4 LT |
41 | |
42 | void qdisc_lock_tree(struct net_device *dev) | |
dc2b4847 | 43 | __acquires(dev->tx_queue.lock) |
555353cf | 44 | __acquires(dev->rx_queue.lock) |
1da177e4 | 45 | { |
dc2b4847 | 46 | spin_lock_bh(&dev->tx_queue.lock); |
555353cf | 47 | spin_lock(&dev->rx_queue.lock); |
1da177e4 | 48 | } |
62e3ba1b | 49 | EXPORT_SYMBOL(qdisc_lock_tree); |
1da177e4 LT |
50 | |
51 | void qdisc_unlock_tree(struct net_device *dev) | |
555353cf | 52 | __releases(dev->rx_queue.lock) |
dc2b4847 | 53 | __releases(dev->tx_queue.lock) |
1da177e4 | 54 | { |
555353cf | 55 | spin_unlock(&dev->rx_queue.lock); |
dc2b4847 | 56 | spin_unlock_bh(&dev->tx_queue.lock); |
1da177e4 | 57 | } |
62e3ba1b | 58 | EXPORT_SYMBOL(qdisc_unlock_tree); |
1da177e4 | 59 | |
c716a81a JHS |
60 | static inline int qdisc_qlen(struct Qdisc *q) |
61 | { | |
c716a81a JHS |
62 | return q->q.qlen; |
63 | } | |
64 | ||
6c1361a6 KK |
65 | static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev, |
66 | struct Qdisc *q) | |
c716a81a | 67 | { |
c716a81a JHS |
68 | if (unlikely(skb->next)) |
69 | dev->gso_skb = skb; | |
70 | else | |
71 | q->ops->requeue(skb, q); | |
6c1361a6 | 72 | |
c716a81a JHS |
73 | netif_schedule(dev); |
74 | return 0; | |
75 | } | |
76 | ||
6c1361a6 KK |
77 | static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev, |
78 | struct Qdisc *q) | |
c716a81a | 79 | { |
6c1361a6 | 80 | struct sk_buff *skb; |
c716a81a | 81 | |
6c1361a6 | 82 | if ((skb = dev->gso_skb)) |
c716a81a JHS |
83 | dev->gso_skb = NULL; |
84 | else | |
85 | skb = q->dequeue(q); | |
86 | ||
87 | return skb; | |
88 | } | |
89 | ||
6c1361a6 KK |
90 | static inline int handle_dev_cpu_collision(struct sk_buff *skb, |
91 | struct net_device *dev, | |
92 | struct Qdisc *q) | |
c716a81a | 93 | { |
6c1361a6 | 94 | int ret; |
c716a81a | 95 | |
6c1361a6 KK |
96 | if (unlikely(dev->xmit_lock_owner == smp_processor_id())) { |
97 | /* | |
98 | * Same CPU holding the lock. It may be a transient | |
99 | * configuration error, when hard_start_xmit() recurses. We | |
100 | * detect it by checking xmit owner and drop the packet when | |
101 | * deadloop is detected. Return OK to try the next skb. | |
102 | */ | |
c716a81a | 103 | kfree_skb(skb); |
6c1361a6 KK |
104 | if (net_ratelimit()) |
105 | printk(KERN_WARNING "Dead loop on netdevice %s, " | |
106 | "fix it urgently!\n", dev->name); | |
107 | ret = qdisc_qlen(q); | |
108 | } else { | |
109 | /* | |
110 | * Another cpu is holding lock, requeue & delay xmits for | |
111 | * some time. | |
112 | */ | |
113 | __get_cpu_var(netdev_rx_stat).cpu_collision++; | |
114 | ret = dev_requeue_skb(skb, dev, q); | |
c716a81a JHS |
115 | } |
116 | ||
6c1361a6 | 117 | return ret; |
c716a81a JHS |
118 | } |
119 | ||
10297b99 | 120 | /* |
dc2b4847 | 121 | * NOTE: Called under queue->lock with locally disabled BH. |
6c1361a6 KK |
122 | * |
123 | * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this | |
dc2b4847 | 124 | * device at a time. queue->lock serializes queue accesses for |
b0e1e646 | 125 | * this device AND txq->qdisc pointer itself. |
6c1361a6 KK |
126 | * |
127 | * netif_tx_lock serializes accesses to device driver. | |
128 | * | |
dc2b4847 | 129 | * queue->lock and netif_tx_lock are mutually exclusive, |
6c1361a6 KK |
130 | * if one is grabbed, another must be free. |
131 | * | |
132 | * Note, that this procedure can be called by a watchdog timer | |
133 | * | |
134 | * Returns to the caller: | |
135 | * 0 - queue is empty or throttled. | |
136 | * >0 - queue is not empty. | |
137 | * | |
138 | */ | |
48d83325 | 139 | static inline int qdisc_restart(struct net_device *dev) |
1da177e4 | 140 | { |
b0e1e646 DM |
141 | struct netdev_queue *txq = &dev->tx_queue; |
142 | struct Qdisc *q = txq->qdisc; | |
1da177e4 | 143 | struct sk_buff *skb; |
5f1a485d | 144 | int ret = NETDEV_TX_BUSY; |
1da177e4 | 145 | |
6c1361a6 KK |
146 | /* Dequeue packet */ |
147 | if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL)) | |
c716a81a | 148 | return 0; |
f6a78bfc | 149 | |
6c1361a6 KK |
150 | |
151 | /* And release queue */ | |
b0e1e646 | 152 | spin_unlock(&txq->lock); |
c716a81a | 153 | |
8236632f | 154 | HARD_TX_LOCK(dev, smp_processor_id()); |
5f1a485d PWJ |
155 | if (!netif_subqueue_stopped(dev, skb)) |
156 | ret = dev_hard_start_xmit(skb, dev); | |
8236632f | 157 | HARD_TX_UNLOCK(dev); |
c716a81a | 158 | |
b0e1e646 DM |
159 | spin_lock(&txq->lock); |
160 | q = txq->qdisc; | |
c716a81a | 161 | |
6c1361a6 KK |
162 | switch (ret) { |
163 | case NETDEV_TX_OK: | |
164 | /* Driver sent out skb successfully */ | |
165 | ret = qdisc_qlen(q); | |
166 | break; | |
167 | ||
168 | case NETDEV_TX_LOCKED: | |
169 | /* Driver try lock failed */ | |
170 | ret = handle_dev_cpu_collision(skb, dev, q); | |
171 | break; | |
172 | ||
173 | default: | |
174 | /* Driver returned NETDEV_TX_BUSY - requeue skb */ | |
175 | if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) | |
176 | printk(KERN_WARNING "BUG %s code %d qlen %d\n", | |
177 | dev->name, ret, q->q.qlen); | |
178 | ||
179 | ret = dev_requeue_skb(skb, dev, q); | |
180 | break; | |
181 | } | |
c716a81a | 182 | |
6c1361a6 | 183 | return ret; |
1da177e4 LT |
184 | } |
185 | ||
48d83325 HX |
186 | void __qdisc_run(struct net_device *dev) |
187 | { | |
2ba2506c HX |
188 | unsigned long start_time = jiffies; |
189 | ||
190 | while (qdisc_restart(dev)) { | |
191 | if (netif_queue_stopped(dev)) | |
192 | break; | |
193 | ||
194 | /* | |
195 | * Postpone processing if | |
196 | * 1. another process needs the CPU; | |
197 | * 2. we've been doing it for too long. | |
198 | */ | |
199 | if (need_resched() || jiffies != start_time) { | |
200 | netif_schedule(dev); | |
d90df3ad | 201 | break; |
2ba2506c HX |
202 | } |
203 | } | |
48d83325 HX |
204 | |
205 | clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); | |
206 | } | |
207 | ||
1da177e4 LT |
208 | static void dev_watchdog(unsigned long arg) |
209 | { | |
210 | struct net_device *dev = (struct net_device *)arg; | |
b0e1e646 | 211 | struct netdev_queue *txq = &dev->tx_queue; |
1da177e4 | 212 | |
932ff279 | 213 | netif_tx_lock(dev); |
b0e1e646 | 214 | if (txq->qdisc != &noop_qdisc) { |
1da177e4 LT |
215 | if (netif_device_present(dev) && |
216 | netif_running(dev) && | |
217 | netif_carrier_ok(dev)) { | |
218 | if (netif_queue_stopped(dev) && | |
338f7566 SH |
219 | time_after(jiffies, dev->trans_start + dev->watchdog_timeo)) { |
220 | ||
221 | printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n", | |
222 | dev->name); | |
1da177e4 | 223 | dev->tx_timeout(dev); |
b4192bbd | 224 | WARN_ON_ONCE(1); |
1da177e4 | 225 | } |
f5a6e01c | 226 | if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + dev->watchdog_timeo))) |
1da177e4 LT |
227 | dev_hold(dev); |
228 | } | |
229 | } | |
932ff279 | 230 | netif_tx_unlock(dev); |
1da177e4 LT |
231 | |
232 | dev_put(dev); | |
233 | } | |
234 | ||
1da177e4 LT |
235 | void __netdev_watchdog_up(struct net_device *dev) |
236 | { | |
237 | if (dev->tx_timeout) { | |
238 | if (dev->watchdog_timeo <= 0) | |
239 | dev->watchdog_timeo = 5*HZ; | |
60468d5b VP |
240 | if (!mod_timer(&dev->watchdog_timer, |
241 | round_jiffies(jiffies + dev->watchdog_timeo))) | |
1da177e4 LT |
242 | dev_hold(dev); |
243 | } | |
244 | } | |
245 | ||
246 | static void dev_watchdog_up(struct net_device *dev) | |
247 | { | |
1da177e4 | 248 | __netdev_watchdog_up(dev); |
1da177e4 LT |
249 | } |
250 | ||
251 | static void dev_watchdog_down(struct net_device *dev) | |
252 | { | |
932ff279 | 253 | netif_tx_lock_bh(dev); |
1da177e4 | 254 | if (del_timer(&dev->watchdog_timer)) |
15333061 | 255 | dev_put(dev); |
932ff279 | 256 | netif_tx_unlock_bh(dev); |
1da177e4 LT |
257 | } |
258 | ||
bea3348e SH |
259 | /** |
260 | * netif_carrier_on - set carrier | |
261 | * @dev: network device | |
262 | * | |
263 | * Device has detected that carrier. | |
264 | */ | |
0a242efc DV |
265 | void netif_carrier_on(struct net_device *dev) |
266 | { | |
bfaae0f0 | 267 | if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) { |
0a242efc | 268 | linkwatch_fire_event(dev); |
bfaae0f0 JG |
269 | if (netif_running(dev)) |
270 | __netdev_watchdog_up(dev); | |
271 | } | |
0a242efc | 272 | } |
62e3ba1b | 273 | EXPORT_SYMBOL(netif_carrier_on); |
0a242efc | 274 | |
bea3348e SH |
275 | /** |
276 | * netif_carrier_off - clear carrier | |
277 | * @dev: network device | |
278 | * | |
279 | * Device has detected loss of carrier. | |
280 | */ | |
0a242efc DV |
281 | void netif_carrier_off(struct net_device *dev) |
282 | { | |
283 | if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) | |
284 | linkwatch_fire_event(dev); | |
285 | } | |
62e3ba1b | 286 | EXPORT_SYMBOL(netif_carrier_off); |
0a242efc | 287 | |
1da177e4 LT |
288 | /* "NOOP" scheduler: the best scheduler, recommended for all interfaces |
289 | under all circumstances. It is difficult to invent anything faster or | |
290 | cheaper. | |
291 | */ | |
292 | ||
94df109a | 293 | static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc) |
1da177e4 LT |
294 | { |
295 | kfree_skb(skb); | |
296 | return NET_XMIT_CN; | |
297 | } | |
298 | ||
94df109a | 299 | static struct sk_buff *noop_dequeue(struct Qdisc * qdisc) |
1da177e4 LT |
300 | { |
301 | return NULL; | |
302 | } | |
303 | ||
94df109a | 304 | static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc) |
1da177e4 LT |
305 | { |
306 | if (net_ratelimit()) | |
94df109a TG |
307 | printk(KERN_DEBUG "%s deferred output. It is buggy.\n", |
308 | skb->dev->name); | |
1da177e4 LT |
309 | kfree_skb(skb); |
310 | return NET_XMIT_CN; | |
311 | } | |
312 | ||
20fea08b | 313 | struct Qdisc_ops noop_qdisc_ops __read_mostly = { |
1da177e4 LT |
314 | .id = "noop", |
315 | .priv_size = 0, | |
316 | .enqueue = noop_enqueue, | |
317 | .dequeue = noop_dequeue, | |
318 | .requeue = noop_requeue, | |
319 | .owner = THIS_MODULE, | |
320 | }; | |
321 | ||
322 | struct Qdisc noop_qdisc = { | |
323 | .enqueue = noop_enqueue, | |
324 | .dequeue = noop_dequeue, | |
325 | .flags = TCQ_F_BUILTIN, | |
10297b99 | 326 | .ops = &noop_qdisc_ops, |
1da177e4 LT |
327 | .list = LIST_HEAD_INIT(noop_qdisc.list), |
328 | }; | |
62e3ba1b | 329 | EXPORT_SYMBOL(noop_qdisc); |
1da177e4 | 330 | |
20fea08b | 331 | static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { |
1da177e4 LT |
332 | .id = "noqueue", |
333 | .priv_size = 0, | |
334 | .enqueue = noop_enqueue, | |
335 | .dequeue = noop_dequeue, | |
336 | .requeue = noop_requeue, | |
337 | .owner = THIS_MODULE, | |
338 | }; | |
339 | ||
340 | static struct Qdisc noqueue_qdisc = { | |
341 | .enqueue = NULL, | |
342 | .dequeue = noop_dequeue, | |
343 | .flags = TCQ_F_BUILTIN, | |
344 | .ops = &noqueue_qdisc_ops, | |
345 | .list = LIST_HEAD_INIT(noqueue_qdisc.list), | |
346 | }; | |
347 | ||
348 | ||
349 | static const u8 prio2band[TC_PRIO_MAX+1] = | |
350 | { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 }; | |
351 | ||
352 | /* 3-band FIFO queue: old style, but should be a bit faster than | |
353 | generic prio+fifo combination. | |
354 | */ | |
355 | ||
f87a9c3d TG |
356 | #define PFIFO_FAST_BANDS 3 |
357 | ||
321090e7 TG |
358 | static inline struct sk_buff_head *prio2list(struct sk_buff *skb, |
359 | struct Qdisc *qdisc) | |
1da177e4 LT |
360 | { |
361 | struct sk_buff_head *list = qdisc_priv(qdisc); | |
321090e7 TG |
362 | return list + prio2band[skb->priority & TC_PRIO_MAX]; |
363 | } | |
1da177e4 | 364 | |
f87a9c3d | 365 | static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) |
321090e7 TG |
366 | { |
367 | struct sk_buff_head *list = prio2list(skb, qdisc); | |
1da177e4 | 368 | |
5ce2d488 | 369 | if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) { |
1da177e4 | 370 | qdisc->q.qlen++; |
821d24ae | 371 | return __qdisc_enqueue_tail(skb, qdisc, list); |
1da177e4 | 372 | } |
821d24ae TG |
373 | |
374 | return qdisc_drop(skb, qdisc); | |
1da177e4 LT |
375 | } |
376 | ||
f87a9c3d | 377 | static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) |
1da177e4 LT |
378 | { |
379 | int prio; | |
380 | struct sk_buff_head *list = qdisc_priv(qdisc); | |
1da177e4 | 381 | |
452f299d TG |
382 | for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { |
383 | if (!skb_queue_empty(list + prio)) { | |
1da177e4 | 384 | qdisc->q.qlen--; |
452f299d | 385 | return __qdisc_dequeue_head(qdisc, list + prio); |
1da177e4 LT |
386 | } |
387 | } | |
f87a9c3d | 388 | |
1da177e4 LT |
389 | return NULL; |
390 | } | |
391 | ||
f87a9c3d | 392 | static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) |
1da177e4 | 393 | { |
1da177e4 | 394 | qdisc->q.qlen++; |
321090e7 | 395 | return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc)); |
1da177e4 LT |
396 | } |
397 | ||
f87a9c3d | 398 | static void pfifo_fast_reset(struct Qdisc* qdisc) |
1da177e4 LT |
399 | { |
400 | int prio; | |
401 | struct sk_buff_head *list = qdisc_priv(qdisc); | |
402 | ||
f87a9c3d | 403 | for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) |
821d24ae TG |
404 | __qdisc_reset_queue(qdisc, list + prio); |
405 | ||
406 | qdisc->qstats.backlog = 0; | |
1da177e4 LT |
407 | qdisc->q.qlen = 0; |
408 | } | |
409 | ||
410 | static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) | |
411 | { | |
f87a9c3d | 412 | struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; |
1da177e4 | 413 | |
1da177e4 | 414 | memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1); |
1e90474c | 415 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); |
1da177e4 LT |
416 | return skb->len; |
417 | ||
1e90474c | 418 | nla_put_failure: |
1da177e4 LT |
419 | return -1; |
420 | } | |
421 | ||
1e90474c | 422 | static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) |
1da177e4 | 423 | { |
f87a9c3d | 424 | int prio; |
1da177e4 LT |
425 | struct sk_buff_head *list = qdisc_priv(qdisc); |
426 | ||
f87a9c3d TG |
427 | for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) |
428 | skb_queue_head_init(list + prio); | |
1da177e4 LT |
429 | |
430 | return 0; | |
431 | } | |
432 | ||
20fea08b | 433 | static struct Qdisc_ops pfifo_fast_ops __read_mostly = { |
1da177e4 | 434 | .id = "pfifo_fast", |
f87a9c3d | 435 | .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head), |
1da177e4 LT |
436 | .enqueue = pfifo_fast_enqueue, |
437 | .dequeue = pfifo_fast_dequeue, | |
438 | .requeue = pfifo_fast_requeue, | |
439 | .init = pfifo_fast_init, | |
440 | .reset = pfifo_fast_reset, | |
441 | .dump = pfifo_fast_dump, | |
442 | .owner = THIS_MODULE, | |
443 | }; | |
444 | ||
5ce2d488 | 445 | struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, |
bb949fbd | 446 | struct Qdisc_ops *ops) |
1da177e4 LT |
447 | { |
448 | void *p; | |
449 | struct Qdisc *sch; | |
3d54b82f TG |
450 | unsigned int size; |
451 | int err = -ENOBUFS; | |
1da177e4 LT |
452 | |
453 | /* ensure that the Qdisc and the private data are 32-byte aligned */ | |
3d54b82f TG |
454 | size = QDISC_ALIGN(sizeof(*sch)); |
455 | size += ops->priv_size + (QDISC_ALIGNTO - 1); | |
1da177e4 | 456 | |
0da974f4 | 457 | p = kzalloc(size, GFP_KERNEL); |
1da177e4 | 458 | if (!p) |
3d54b82f | 459 | goto errout; |
3d54b82f TG |
460 | sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); |
461 | sch->padded = (char *) sch - (char *) p; | |
1da177e4 LT |
462 | |
463 | INIT_LIST_HEAD(&sch->list); | |
464 | skb_queue_head_init(&sch->q); | |
465 | sch->ops = ops; | |
466 | sch->enqueue = ops->enqueue; | |
467 | sch->dequeue = ops->dequeue; | |
bb949fbd | 468 | sch->dev_queue = dev_queue; |
5ce2d488 | 469 | dev_hold(qdisc_dev(sch)); |
1da177e4 | 470 | atomic_set(&sch->refcnt, 1); |
3d54b82f TG |
471 | |
472 | return sch; | |
473 | errout: | |
01e123d7 | 474 | return ERR_PTR(err); |
3d54b82f TG |
475 | } |
476 | ||
bb949fbd DM |
477 | struct Qdisc * qdisc_create_dflt(struct net_device *dev, |
478 | struct netdev_queue *dev_queue, | |
479 | struct Qdisc_ops *ops, | |
9f9afec4 | 480 | unsigned int parentid) |
3d54b82f TG |
481 | { |
482 | struct Qdisc *sch; | |
10297b99 | 483 | |
5ce2d488 | 484 | sch = qdisc_alloc(dev_queue, ops); |
3d54b82f TG |
485 | if (IS_ERR(sch)) |
486 | goto errout; | |
9f9afec4 | 487 | sch->parent = parentid; |
3d54b82f | 488 | |
1da177e4 LT |
489 | if (!ops->init || ops->init(sch, NULL) == 0) |
490 | return sch; | |
491 | ||
0fbbeb1b | 492 | qdisc_destroy(sch); |
3d54b82f | 493 | errout: |
1da177e4 LT |
494 | return NULL; |
495 | } | |
62e3ba1b | 496 | EXPORT_SYMBOL(qdisc_create_dflt); |
1da177e4 | 497 | |
dc2b4847 | 498 | /* Under queue->lock and BH! */ |
1da177e4 LT |
499 | |
500 | void qdisc_reset(struct Qdisc *qdisc) | |
501 | { | |
20fea08b | 502 | const struct Qdisc_ops *ops = qdisc->ops; |
1da177e4 LT |
503 | |
504 | if (ops->reset) | |
505 | ops->reset(qdisc); | |
506 | } | |
62e3ba1b | 507 | EXPORT_SYMBOL(qdisc_reset); |
1da177e4 | 508 | |
10297b99 | 509 | /* this is the rcu callback function to clean up a qdisc when there |
1da177e4 LT |
510 | * are no further references to it */ |
511 | ||
512 | static void __qdisc_destroy(struct rcu_head *head) | |
513 | { | |
514 | struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu); | |
1da177e4 LT |
515 | kfree((char *) qdisc - qdisc->padded); |
516 | } | |
517 | ||
dc2b4847 | 518 | /* Under queue->lock and BH! */ |
1da177e4 LT |
519 | |
520 | void qdisc_destroy(struct Qdisc *qdisc) | |
521 | { | |
20fea08b | 522 | const struct Qdisc_ops *ops = qdisc->ops; |
1da177e4 LT |
523 | |
524 | if (qdisc->flags & TCQ_F_BUILTIN || | |
85670cc1 | 525 | !atomic_dec_and_test(&qdisc->refcnt)) |
1da177e4 LT |
526 | return; |
527 | ||
85670cc1 | 528 | list_del(&qdisc->list); |
85670cc1 | 529 | gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); |
85670cc1 PM |
530 | if (ops->reset) |
531 | ops->reset(qdisc); | |
532 | if (ops->destroy) | |
533 | ops->destroy(qdisc); | |
1da177e4 | 534 | |
85670cc1 | 535 | module_put(ops->owner); |
5ce2d488 | 536 | dev_put(qdisc_dev(qdisc)); |
1da177e4 LT |
537 | call_rcu(&qdisc->q_rcu, __qdisc_destroy); |
538 | } | |
62e3ba1b | 539 | EXPORT_SYMBOL(qdisc_destroy); |
1da177e4 LT |
540 | |
541 | void dev_activate(struct net_device *dev) | |
542 | { | |
b0e1e646 DM |
543 | struct netdev_queue *txq = &dev->tx_queue; |
544 | ||
1da177e4 LT |
545 | /* No queueing discipline is attached to device; |
546 | create default one i.e. pfifo_fast for devices, | |
547 | which need queueing and noqueue_qdisc for | |
548 | virtual interfaces | |
549 | */ | |
550 | ||
b0e1e646 | 551 | if (txq->qdisc_sleeping == &noop_qdisc) { |
1da177e4 LT |
552 | struct Qdisc *qdisc; |
553 | if (dev->tx_queue_len) { | |
b0e1e646 | 554 | qdisc = qdisc_create_dflt(dev, txq, |
bb949fbd | 555 | &pfifo_fast_ops, |
9f9afec4 | 556 | TC_H_ROOT); |
1da177e4 LT |
557 | if (qdisc == NULL) { |
558 | printk(KERN_INFO "%s: activation failed\n", dev->name); | |
559 | return; | |
560 | } | |
b0e1e646 | 561 | list_add_tail(&qdisc->list, &txq->qdisc_list); |
1da177e4 LT |
562 | } else { |
563 | qdisc = &noqueue_qdisc; | |
564 | } | |
b0e1e646 | 565 | txq->qdisc_sleeping = qdisc; |
1da177e4 LT |
566 | } |
567 | ||
cacaddf5 TC |
568 | if (!netif_carrier_ok(dev)) |
569 | /* Delay activation until next carrier-on event */ | |
570 | return; | |
571 | ||
b0e1e646 DM |
572 | spin_lock_bh(&txq->lock); |
573 | rcu_assign_pointer(txq->qdisc, txq->qdisc_sleeping); | |
574 | if (txq->qdisc != &noqueue_qdisc) { | |
1da177e4 LT |
575 | dev->trans_start = jiffies; |
576 | dev_watchdog_up(dev); | |
577 | } | |
b0e1e646 DM |
578 | spin_unlock_bh(&txq->lock); |
579 | } | |
580 | ||
581 | static void dev_deactivate_queue(struct net_device *dev, | |
582 | struct netdev_queue *dev_queue, | |
583 | struct Qdisc *qdisc_default) | |
584 | { | |
585 | struct Qdisc *qdisc = dev_queue->qdisc; | |
586 | ||
587 | if (qdisc) { | |
588 | dev_queue->qdisc = qdisc_default; | |
589 | qdisc_reset(qdisc); | |
590 | } | |
1da177e4 LT |
591 | } |
592 | ||
593 | void dev_deactivate(struct net_device *dev) | |
594 | { | |
41a23b07 | 595 | struct sk_buff *skb; |
ce0e32e6 | 596 | int running; |
1da177e4 | 597 | |
dc2b4847 | 598 | spin_lock_bh(&dev->tx_queue.lock); |
b0e1e646 | 599 | dev_deactivate_queue(dev, &dev->tx_queue, &noop_qdisc); |
1da177e4 | 600 | |
41a23b07 HX |
601 | skb = dev->gso_skb; |
602 | dev->gso_skb = NULL; | |
dc2b4847 | 603 | spin_unlock_bh(&dev->tx_queue.lock); |
1da177e4 | 604 | |
41a23b07 HX |
605 | kfree_skb(skb); |
606 | ||
1da177e4 LT |
607 | dev_watchdog_down(dev); |
608 | ||
ce0e32e6 | 609 | /* Wait for outstanding qdisc-less dev_queue_xmit calls. */ |
d4828d85 | 610 | synchronize_rcu(); |
1da177e4 | 611 | |
d4828d85 | 612 | /* Wait for outstanding qdisc_run calls. */ |
ce0e32e6 HX |
613 | do { |
614 | while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) | |
615 | yield(); | |
616 | ||
617 | /* | |
618 | * Double-check inside queue lock to ensure that all effects | |
619 | * of the queue run are visible when we return. | |
620 | */ | |
dc2b4847 | 621 | spin_lock_bh(&dev->tx_queue.lock); |
ce0e32e6 | 622 | running = test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); |
dc2b4847 | 623 | spin_unlock_bh(&dev->tx_queue.lock); |
ce0e32e6 HX |
624 | |
625 | /* | |
626 | * The running flag should never be set at this point because | |
627 | * we've already set dev->qdisc to noop_qdisc *inside* the same | |
628 | * pair of spin locks. That is, if any qdisc_run starts after | |
629 | * our initial test it should see the noop_qdisc and then | |
630 | * clear the RUNNING bit before dropping the queue lock. So | |
631 | * if it is set here then we've found a bug. | |
632 | */ | |
633 | } while (WARN_ON_ONCE(running)); | |
1da177e4 LT |
634 | } |
635 | ||
b0e1e646 DM |
636 | static void dev_init_scheduler_queue(struct net_device *dev, |
637 | struct netdev_queue *dev_queue, | |
638 | struct Qdisc *qdisc) | |
639 | { | |
640 | dev_queue->qdisc = qdisc; | |
641 | dev_queue->qdisc_sleeping = qdisc; | |
642 | INIT_LIST_HEAD(&dev_queue->qdisc_list); | |
643 | } | |
644 | ||
1da177e4 LT |
645 | void dev_init_scheduler(struct net_device *dev) |
646 | { | |
647 | qdisc_lock_tree(dev); | |
b0e1e646 DM |
648 | dev_init_scheduler_queue(dev, &dev->tx_queue, &noop_qdisc); |
649 | dev_init_scheduler_queue(dev, &dev->rx_queue, NULL); | |
1da177e4 LT |
650 | qdisc_unlock_tree(dev); |
651 | ||
b24b8a24 | 652 | setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); |
1da177e4 LT |
653 | } |
654 | ||
b0e1e646 DM |
655 | static void dev_shutdown_scheduler_queue(struct net_device *dev, |
656 | struct netdev_queue *dev_queue, | |
657 | struct Qdisc *qdisc_default) | |
1da177e4 | 658 | { |
b0e1e646 DM |
659 | struct Qdisc *qdisc = dev_queue->qdisc_sleeping; |
660 | ||
661 | if (qdisc) { | |
662 | dev_queue->qdisc = qdisc_default; | |
663 | dev_queue->qdisc_sleeping = qdisc_default; | |
1da177e4 | 664 | |
1da177e4 | 665 | qdisc_destroy(qdisc); |
10297b99 | 666 | } |
b0e1e646 DM |
667 | } |
668 | ||
669 | void dev_shutdown(struct net_device *dev) | |
670 | { | |
671 | qdisc_lock_tree(dev); | |
672 | dev_shutdown_scheduler_queue(dev, &dev->tx_queue, &noop_qdisc); | |
673 | dev_shutdown_scheduler_queue(dev, &dev->rx_queue, NULL); | |
1da177e4 LT |
674 | BUG_TRAP(!timer_pending(&dev->watchdog_timer)); |
675 | qdisc_unlock_tree(dev); | |
676 | } |