]>
Commit | Line | Data |
---|---|---|
8ceee660 BH |
1 | /**************************************************************************** |
2 | * Driver for Solarflare Solarstorm network controllers and boards | |
3 | * Copyright 2005-2006 Fen Systems Ltd. | |
4 | * Copyright 2005-2008 Solarflare Communications Inc. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify it | |
7 | * under the terms of the GNU General Public License version 2 as published | |
8 | * by the Free Software Foundation, incorporated herein by reference. | |
9 | */ | |
10 | ||
11 | #include <linux/pci.h> | |
12 | #include <linux/tcp.h> | |
13 | #include <linux/ip.h> | |
14 | #include <linux/in.h> | |
15 | #include <linux/if_ether.h> | |
16 | #include <linux/highmem.h> | |
17 | #include "net_driver.h" | |
18 | #include "tx.h" | |
19 | #include "efx.h" | |
20 | #include "falcon.h" | |
21 | #include "workarounds.h" | |
22 | ||
23 | /* | |
24 | * TX descriptor ring full threshold | |
25 | * | |
26 | * The tx_queue descriptor ring fill-level must fall below this value | |
27 | * before we restart the netif queue | |
28 | */ | |
29 | #define EFX_NETDEV_TX_THRESHOLD(_tx_queue) \ | |
30 | (_tx_queue->efx->type->txd_ring_mask / 2u) | |
31 | ||
32 | /* We want to be able to nest calls to netif_stop_queue(), since each | |
33 | * channel can have an individual stop on the queue. | |
34 | */ | |
35 | void efx_stop_queue(struct efx_nic *efx) | |
36 | { | |
37 | spin_lock_bh(&efx->netif_stop_lock); | |
38 | EFX_TRACE(efx, "stop TX queue\n"); | |
39 | ||
40 | atomic_inc(&efx->netif_stop_count); | |
41 | netif_stop_queue(efx->net_dev); | |
42 | ||
43 | spin_unlock_bh(&efx->netif_stop_lock); | |
44 | } | |
45 | ||
46 | /* Wake netif's TX queue | |
47 | * We want to be able to nest calls to netif_stop_queue(), since each | |
48 | * channel can have an individual stop on the queue. | |
49 | */ | |
50 | inline void efx_wake_queue(struct efx_nic *efx) | |
51 | { | |
52 | local_bh_disable(); | |
53 | if (atomic_dec_and_lock(&efx->netif_stop_count, | |
54 | &efx->netif_stop_lock)) { | |
55 | EFX_TRACE(efx, "waking TX queue\n"); | |
56 | netif_wake_queue(efx->net_dev); | |
57 | spin_unlock(&efx->netif_stop_lock); | |
58 | } | |
59 | local_bh_enable(); | |
60 | } | |
61 | ||
62 | static inline void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, | |
63 | struct efx_tx_buffer *buffer) | |
64 | { | |
65 | if (buffer->unmap_len) { | |
66 | struct pci_dev *pci_dev = tx_queue->efx->pci_dev; | |
67 | if (buffer->unmap_single) | |
68 | pci_unmap_single(pci_dev, buffer->unmap_addr, | |
69 | buffer->unmap_len, PCI_DMA_TODEVICE); | |
70 | else | |
71 | pci_unmap_page(pci_dev, buffer->unmap_addr, | |
72 | buffer->unmap_len, PCI_DMA_TODEVICE); | |
73 | buffer->unmap_len = 0; | |
74 | buffer->unmap_single = 0; | |
75 | } | |
76 | ||
77 | if (buffer->skb) { | |
78 | dev_kfree_skb_any((struct sk_buff *) buffer->skb); | |
79 | buffer->skb = NULL; | |
80 | EFX_TRACE(tx_queue->efx, "TX queue %d transmission id %x " | |
81 | "complete\n", tx_queue->queue, read_ptr); | |
82 | } | |
83 | } | |
84 | ||
85 | ||
86 | /* | |
87 | * Add a socket buffer to a TX queue | |
88 | * | |
89 | * This maps all fragments of a socket buffer for DMA and adds them to | |
90 | * the TX queue. The queue's insert pointer will be incremented by | |
91 | * the number of fragments in the socket buffer. | |
92 | * | |
93 | * If any DMA mapping fails, any mapped fragments will be unmapped, | |
94 | * the queue's insert pointer will be restored to its original value. | |
95 | * | |
96 | * Returns NETDEV_TX_OK or NETDEV_TX_BUSY | |
97 | * You must hold netif_tx_lock() to call this function. | |
98 | */ | |
99 | static inline int efx_enqueue_skb(struct efx_tx_queue *tx_queue, | |
100 | const struct sk_buff *skb) | |
101 | { | |
102 | struct efx_nic *efx = tx_queue->efx; | |
103 | struct pci_dev *pci_dev = efx->pci_dev; | |
104 | struct efx_tx_buffer *buffer; | |
105 | skb_frag_t *fragment; | |
106 | struct page *page; | |
107 | int page_offset; | |
108 | unsigned int len, unmap_len = 0, fill_level, insert_ptr, misalign; | |
109 | dma_addr_t dma_addr, unmap_addr = 0; | |
110 | unsigned int dma_len; | |
111 | unsigned unmap_single; | |
112 | int q_space, i = 0; | |
113 | int rc = NETDEV_TX_OK; | |
114 | ||
115 | EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count); | |
116 | ||
117 | /* Get size of the initial fragment */ | |
118 | len = skb_headlen(skb); | |
119 | ||
120 | fill_level = tx_queue->insert_count - tx_queue->old_read_count; | |
121 | q_space = efx->type->txd_ring_mask - 1 - fill_level; | |
122 | ||
123 | /* Map for DMA. Use pci_map_single rather than pci_map_page | |
124 | * since this is more efficient on machines with sparse | |
125 | * memory. | |
126 | */ | |
127 | unmap_single = 1; | |
128 | dma_addr = pci_map_single(pci_dev, skb->data, len, PCI_DMA_TODEVICE); | |
129 | ||
130 | /* Process all fragments */ | |
131 | while (1) { | |
132 | if (unlikely(pci_dma_mapping_error(dma_addr))) | |
133 | goto pci_err; | |
134 | ||
135 | /* Store fields for marking in the per-fragment final | |
136 | * descriptor */ | |
137 | unmap_len = len; | |
138 | unmap_addr = dma_addr; | |
139 | ||
140 | /* Add to TX queue, splitting across DMA boundaries */ | |
141 | do { | |
142 | if (unlikely(q_space-- <= 0)) { | |
143 | /* It might be that completions have | |
144 | * happened since the xmit path last | |
145 | * checked. Update the xmit path's | |
146 | * copy of read_count. | |
147 | */ | |
148 | ++tx_queue->stopped; | |
149 | /* This memory barrier protects the | |
150 | * change of stopped from the access | |
151 | * of read_count. */ | |
152 | smp_mb(); | |
153 | tx_queue->old_read_count = | |
154 | *(volatile unsigned *) | |
155 | &tx_queue->read_count; | |
156 | fill_level = (tx_queue->insert_count | |
157 | - tx_queue->old_read_count); | |
158 | q_space = (efx->type->txd_ring_mask - 1 - | |
159 | fill_level); | |
160 | if (unlikely(q_space-- <= 0)) | |
161 | goto stop; | |
162 | smp_mb(); | |
163 | --tx_queue->stopped; | |
164 | } | |
165 | ||
166 | insert_ptr = (tx_queue->insert_count & | |
167 | efx->type->txd_ring_mask); | |
168 | buffer = &tx_queue->buffer[insert_ptr]; | |
169 | EFX_BUG_ON_PARANOID(buffer->skb); | |
170 | EFX_BUG_ON_PARANOID(buffer->len); | |
171 | EFX_BUG_ON_PARANOID(buffer->continuation != 1); | |
172 | EFX_BUG_ON_PARANOID(buffer->unmap_len); | |
173 | ||
174 | dma_len = (((~dma_addr) & efx->type->tx_dma_mask) + 1); | |
175 | if (likely(dma_len > len)) | |
176 | dma_len = len; | |
177 | ||
178 | misalign = (unsigned)dma_addr & efx->type->bug5391_mask; | |
179 | if (misalign && dma_len + misalign > 512) | |
180 | dma_len = 512 - misalign; | |
181 | ||
182 | /* Fill out per descriptor fields */ | |
183 | buffer->len = dma_len; | |
184 | buffer->dma_addr = dma_addr; | |
185 | len -= dma_len; | |
186 | dma_addr += dma_len; | |
187 | ++tx_queue->insert_count; | |
188 | } while (len); | |
189 | ||
190 | /* Transfer ownership of the unmapping to the final buffer */ | |
191 | buffer->unmap_addr = unmap_addr; | |
192 | buffer->unmap_single = unmap_single; | |
193 | buffer->unmap_len = unmap_len; | |
194 | unmap_len = 0; | |
195 | ||
196 | /* Get address and size of next fragment */ | |
197 | if (i >= skb_shinfo(skb)->nr_frags) | |
198 | break; | |
199 | fragment = &skb_shinfo(skb)->frags[i]; | |
200 | len = fragment->size; | |
201 | page = fragment->page; | |
202 | page_offset = fragment->page_offset; | |
203 | i++; | |
204 | /* Map for DMA */ | |
205 | unmap_single = 0; | |
206 | dma_addr = pci_map_page(pci_dev, page, page_offset, len, | |
207 | PCI_DMA_TODEVICE); | |
208 | } | |
209 | ||
210 | /* Transfer ownership of the skb to the final buffer */ | |
211 | buffer->skb = skb; | |
212 | buffer->continuation = 0; | |
213 | ||
214 | /* Pass off to hardware */ | |
215 | falcon_push_buffers(tx_queue); | |
216 | ||
217 | return NETDEV_TX_OK; | |
218 | ||
219 | pci_err: | |
220 | EFX_ERR_RL(efx, " TX queue %d could not map skb with %d bytes %d " | |
221 | "fragments for DMA\n", tx_queue->queue, skb->len, | |
222 | skb_shinfo(skb)->nr_frags + 1); | |
223 | ||
224 | /* Mark the packet as transmitted, and free the SKB ourselves */ | |
225 | dev_kfree_skb_any((struct sk_buff *)skb); | |
226 | goto unwind; | |
227 | ||
228 | stop: | |
229 | rc = NETDEV_TX_BUSY; | |
230 | ||
231 | if (tx_queue->stopped == 1) | |
232 | efx_stop_queue(efx); | |
233 | ||
234 | unwind: | |
235 | /* Work backwards until we hit the original insert pointer value */ | |
236 | while (tx_queue->insert_count != tx_queue->write_count) { | |
237 | --tx_queue->insert_count; | |
238 | insert_ptr = tx_queue->insert_count & efx->type->txd_ring_mask; | |
239 | buffer = &tx_queue->buffer[insert_ptr]; | |
240 | efx_dequeue_buffer(tx_queue, buffer); | |
241 | buffer->len = 0; | |
242 | } | |
243 | ||
244 | /* Free the fragment we were mid-way through pushing */ | |
245 | if (unmap_len) | |
246 | pci_unmap_page(pci_dev, unmap_addr, unmap_len, | |
247 | PCI_DMA_TODEVICE); | |
248 | ||
249 | return rc; | |
250 | } | |
251 | ||
252 | /* Remove packets from the TX queue | |
253 | * | |
254 | * This removes packets from the TX queue, up to and including the | |
255 | * specified index. | |
256 | */ | |
257 | static inline void efx_dequeue_buffers(struct efx_tx_queue *tx_queue, | |
258 | unsigned int index) | |
259 | { | |
260 | struct efx_nic *efx = tx_queue->efx; | |
261 | unsigned int stop_index, read_ptr; | |
262 | unsigned int mask = tx_queue->efx->type->txd_ring_mask; | |
263 | ||
264 | stop_index = (index + 1) & mask; | |
265 | read_ptr = tx_queue->read_count & mask; | |
266 | ||
267 | while (read_ptr != stop_index) { | |
268 | struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr]; | |
269 | if (unlikely(buffer->len == 0)) { | |
270 | EFX_ERR(tx_queue->efx, "TX queue %d spurious TX " | |
271 | "completion id %x\n", tx_queue->queue, | |
272 | read_ptr); | |
273 | efx_schedule_reset(efx, RESET_TYPE_TX_SKIP); | |
274 | return; | |
275 | } | |
276 | ||
277 | efx_dequeue_buffer(tx_queue, buffer); | |
278 | buffer->continuation = 1; | |
279 | buffer->len = 0; | |
280 | ||
281 | ++tx_queue->read_count; | |
282 | read_ptr = tx_queue->read_count & mask; | |
283 | } | |
284 | } | |
285 | ||
286 | /* Initiate a packet transmission on the specified TX queue. | |
287 | * Note that returning anything other than NETDEV_TX_OK will cause the | |
288 | * OS to free the skb. | |
289 | * | |
290 | * This function is split out from efx_hard_start_xmit to allow the | |
291 | * loopback test to direct packets via specific TX queues. It is | |
292 | * therefore a non-static inline, so as not to penalise performance | |
293 | * for non-loopback transmissions. | |
294 | * | |
295 | * Context: netif_tx_lock held | |
296 | */ | |
297 | inline int efx_xmit(struct efx_nic *efx, | |
298 | struct efx_tx_queue *tx_queue, struct sk_buff *skb) | |
299 | { | |
300 | int rc; | |
301 | ||
302 | /* Map fragments for DMA and add to TX queue */ | |
303 | rc = efx_enqueue_skb(tx_queue, skb); | |
304 | if (unlikely(rc != NETDEV_TX_OK)) | |
305 | goto out; | |
306 | ||
307 | /* Update last TX timer */ | |
308 | efx->net_dev->trans_start = jiffies; | |
309 | ||
310 | out: | |
311 | return rc; | |
312 | } | |
313 | ||
314 | /* Initiate a packet transmission. We use one channel per CPU | |
315 | * (sharing when we have more CPUs than channels). On Falcon, the TX | |
316 | * completion events will be directed back to the CPU that transmitted | |
317 | * the packet, which should be cache-efficient. | |
318 | * | |
319 | * Context: non-blocking. | |
320 | * Note that returning anything other than NETDEV_TX_OK will cause the | |
321 | * OS to free the skb. | |
322 | */ | |
323 | int efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev) | |
324 | { | |
325 | struct efx_nic *efx = net_dev->priv; | |
326 | return efx_xmit(efx, &efx->tx_queue[0], skb); | |
327 | } | |
328 | ||
329 | void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) | |
330 | { | |
331 | unsigned fill_level; | |
332 | struct efx_nic *efx = tx_queue->efx; | |
333 | ||
334 | EFX_BUG_ON_PARANOID(index > efx->type->txd_ring_mask); | |
335 | ||
336 | efx_dequeue_buffers(tx_queue, index); | |
337 | ||
338 | /* See if we need to restart the netif queue. This barrier | |
339 | * separates the update of read_count from the test of | |
340 | * stopped. */ | |
341 | smp_mb(); | |
342 | if (unlikely(tx_queue->stopped)) { | |
343 | fill_level = tx_queue->insert_count - tx_queue->read_count; | |
344 | if (fill_level < EFX_NETDEV_TX_THRESHOLD(tx_queue)) { | |
345 | EFX_BUG_ON_PARANOID(!NET_DEV_REGISTERED(efx)); | |
346 | ||
347 | /* Do this under netif_tx_lock(), to avoid racing | |
348 | * with efx_xmit(). */ | |
349 | netif_tx_lock(efx->net_dev); | |
350 | if (tx_queue->stopped) { | |
351 | tx_queue->stopped = 0; | |
352 | efx_wake_queue(efx); | |
353 | } | |
354 | netif_tx_unlock(efx->net_dev); | |
355 | } | |
356 | } | |
357 | } | |
358 | ||
359 | int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) | |
360 | { | |
361 | struct efx_nic *efx = tx_queue->efx; | |
362 | unsigned int txq_size; | |
363 | int i, rc; | |
364 | ||
365 | EFX_LOG(efx, "creating TX queue %d\n", tx_queue->queue); | |
366 | ||
367 | /* Allocate software ring */ | |
368 | txq_size = (efx->type->txd_ring_mask + 1) * sizeof(*tx_queue->buffer); | |
369 | tx_queue->buffer = kzalloc(txq_size, GFP_KERNEL); | |
370 | if (!tx_queue->buffer) { | |
371 | rc = -ENOMEM; | |
372 | goto fail1; | |
373 | } | |
374 | for (i = 0; i <= efx->type->txd_ring_mask; ++i) | |
375 | tx_queue->buffer[i].continuation = 1; | |
376 | ||
377 | /* Allocate hardware ring */ | |
378 | rc = falcon_probe_tx(tx_queue); | |
379 | if (rc) | |
380 | goto fail2; | |
381 | ||
382 | return 0; | |
383 | ||
384 | fail2: | |
385 | kfree(tx_queue->buffer); | |
386 | tx_queue->buffer = NULL; | |
387 | fail1: | |
388 | tx_queue->used = 0; | |
389 | ||
390 | return rc; | |
391 | } | |
392 | ||
393 | int efx_init_tx_queue(struct efx_tx_queue *tx_queue) | |
394 | { | |
395 | EFX_LOG(tx_queue->efx, "initialising TX queue %d\n", tx_queue->queue); | |
396 | ||
397 | tx_queue->insert_count = 0; | |
398 | tx_queue->write_count = 0; | |
399 | tx_queue->read_count = 0; | |
400 | tx_queue->old_read_count = 0; | |
401 | BUG_ON(tx_queue->stopped); | |
402 | ||
403 | /* Set up TX descriptor ring */ | |
404 | return falcon_init_tx(tx_queue); | |
405 | } | |
406 | ||
407 | void efx_release_tx_buffers(struct efx_tx_queue *tx_queue) | |
408 | { | |
409 | struct efx_tx_buffer *buffer; | |
410 | ||
411 | if (!tx_queue->buffer) | |
412 | return; | |
413 | ||
414 | /* Free any buffers left in the ring */ | |
415 | while (tx_queue->read_count != tx_queue->write_count) { | |
416 | buffer = &tx_queue->buffer[tx_queue->read_count & | |
417 | tx_queue->efx->type->txd_ring_mask]; | |
418 | efx_dequeue_buffer(tx_queue, buffer); | |
419 | buffer->continuation = 1; | |
420 | buffer->len = 0; | |
421 | ||
422 | ++tx_queue->read_count; | |
423 | } | |
424 | } | |
425 | ||
426 | void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) | |
427 | { | |
428 | EFX_LOG(tx_queue->efx, "shutting down TX queue %d\n", tx_queue->queue); | |
429 | ||
430 | /* Flush TX queue, remove descriptor ring */ | |
431 | falcon_fini_tx(tx_queue); | |
432 | ||
433 | efx_release_tx_buffers(tx_queue); | |
434 | ||
435 | /* Release queue's stop on port, if any */ | |
436 | if (tx_queue->stopped) { | |
437 | tx_queue->stopped = 0; | |
438 | efx_wake_queue(tx_queue->efx); | |
439 | } | |
440 | } | |
441 | ||
442 | void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) | |
443 | { | |
444 | EFX_LOG(tx_queue->efx, "destroying TX queue %d\n", tx_queue->queue); | |
445 | falcon_remove_tx(tx_queue); | |
446 | ||
447 | kfree(tx_queue->buffer); | |
448 | tx_queue->buffer = NULL; | |
449 | tx_queue->used = 0; | |
450 | } | |
451 | ||
452 |