]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - drivers/net/ethernet/intel/i40e/i40e_txrx.c
i40e: Move rings from pointer to array to array of pointers
[mirror_ubuntu-artful-kernel.git] / drivers / net / ethernet / intel / i40e / i40e_txrx.c
CommitLineData
fd0a05ce
JB
1/*******************************************************************************
2 *
3 * Intel Ethernet Controller XL710 Family Linux Driver
4 * Copyright(c) 2013 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * The full GNU General Public License is included in this distribution in
20 * the file called "COPYING".
21 *
22 * Contact Information:
23 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25 *
26 ******************************************************************************/
27
28#include "i40e.h"
29
30static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
31 u32 td_tag)
32{
33 return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
34 ((u64)td_cmd << I40E_TXD_QW1_CMD_SHIFT) |
35 ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
36 ((u64)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
37 ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT));
38}
39
40/**
41 * i40e_program_fdir_filter - Program a Flow Director filter
42 * @fdir_input: Packet data that will be filter parameters
43 * @pf: The pf pointer
44 * @add: True for add/update, False for remove
45 **/
46int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data,
47 struct i40e_pf *pf, bool add)
48{
49 struct i40e_filter_program_desc *fdir_desc;
50 struct i40e_tx_buffer *tx_buf;
51 struct i40e_tx_desc *tx_desc;
52 struct i40e_ring *tx_ring;
53 struct i40e_vsi *vsi;
54 struct device *dev;
55 dma_addr_t dma;
56 u32 td_cmd = 0;
57 u16 i;
58
59 /* find existing FDIR VSI */
60 vsi = NULL;
61 for (i = 0; i < pf->hw.func_caps.num_vsis; i++)
62 if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
63 vsi = pf->vsi[i];
64 if (!vsi)
65 return -ENOENT;
66
9f65e15b 67 tx_ring = vsi->tx_rings[0];
fd0a05ce
JB
68 dev = tx_ring->dev;
69
70 dma = dma_map_single(dev, fdir_data->raw_packet,
71 I40E_FDIR_MAX_RAW_PACKET_LOOKUP, DMA_TO_DEVICE);
72 if (dma_mapping_error(dev, dma))
73 goto dma_fail;
74
75 /* grab the next descriptor */
fc4ac67b
AD
76 i = tx_ring->next_to_use;
77 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
78 tx_buf = &tx_ring->tx_bi[i];
79
80 i++;
81 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
fd0a05ce
JB
82
83 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32((fdir_data->q_index
84 << I40E_TXD_FLTR_QW0_QINDEX_SHIFT)
85 & I40E_TXD_FLTR_QW0_QINDEX_MASK);
86
87 fdir_desc->qindex_flex_ptype_vsi |= cpu_to_le32((fdir_data->flex_off
88 << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT)
89 & I40E_TXD_FLTR_QW0_FLEXOFF_MASK);
90
91 fdir_desc->qindex_flex_ptype_vsi |= cpu_to_le32((fdir_data->pctype
92 << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT)
93 & I40E_TXD_FLTR_QW0_PCTYPE_MASK);
94
95 /* Use LAN VSI Id if not programmed by user */
96 if (fdir_data->dest_vsi == 0)
97 fdir_desc->qindex_flex_ptype_vsi |=
98 cpu_to_le32((pf->vsi[pf->lan_vsi]->id)
99 << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT);
100 else
101 fdir_desc->qindex_flex_ptype_vsi |=
102 cpu_to_le32((fdir_data->dest_vsi
103 << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT)
104 & I40E_TXD_FLTR_QW0_DEST_VSI_MASK);
105
106 fdir_desc->dtype_cmd_cntindex =
107 cpu_to_le32(I40E_TX_DESC_DTYPE_FILTER_PROG);
108
109 if (add)
110 fdir_desc->dtype_cmd_cntindex |= cpu_to_le32(
111 I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE
112 << I40E_TXD_FLTR_QW1_PCMD_SHIFT);
113 else
114 fdir_desc->dtype_cmd_cntindex |= cpu_to_le32(
115 I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE
116 << I40E_TXD_FLTR_QW1_PCMD_SHIFT);
117
118 fdir_desc->dtype_cmd_cntindex |= cpu_to_le32((fdir_data->dest_ctl
119 << I40E_TXD_FLTR_QW1_DEST_SHIFT)
120 & I40E_TXD_FLTR_QW1_DEST_MASK);
121
122 fdir_desc->dtype_cmd_cntindex |= cpu_to_le32(
123 (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT)
124 & I40E_TXD_FLTR_QW1_FD_STATUS_MASK);
125
126 if (fdir_data->cnt_index != 0) {
127 fdir_desc->dtype_cmd_cntindex |=
128 cpu_to_le32(I40E_TXD_FLTR_QW1_CNT_ENA_MASK);
129 fdir_desc->dtype_cmd_cntindex |=
130 cpu_to_le32((fdir_data->cnt_index
131 << I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT)
132 & I40E_TXD_FLTR_QW1_CNTINDEX_MASK);
133 }
134
135 fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id);
136
137 /* Now program a dummy descriptor */
fc4ac67b
AD
138 i = tx_ring->next_to_use;
139 tx_desc = I40E_TX_DESC(tx_ring, i);
140
141 i++;
142 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
fd0a05ce
JB
143
144 tx_desc->buffer_addr = cpu_to_le64(dma);
145 td_cmd = I40E_TX_DESC_CMD_EOP |
146 I40E_TX_DESC_CMD_RS |
147 I40E_TX_DESC_CMD_DUMMY;
148
149 tx_desc->cmd_type_offset_bsz =
150 build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_LOOKUP, 0);
151
fd0a05ce
JB
152 /* Force memory writes to complete before letting h/w
153 * know there are new descriptors to fetch. (Only
154 * applicable for weak-ordered memory model archs,
155 * such as IA-64).
156 */
157 wmb();
158
fc4ac67b
AD
159 /* Mark the data descriptor to be watched */
160 tx_buf->next_to_watch = tx_desc;
161
fd0a05ce
JB
162 writel(tx_ring->next_to_use, tx_ring->tail);
163 return 0;
164
165dma_fail:
166 return -1;
167}
168
169/**
170 * i40e_fd_handle_status - check the Programming Status for FD
171 * @rx_ring: the Rx ring for this descriptor
172 * @qw: the descriptor data
173 * @prog_id: the id originally used for programming
174 *
175 * This is used to verify if the FD programming or invalidation
176 * requested by SW to the HW is successful or not and take actions accordingly.
177 **/
178static void i40e_fd_handle_status(struct i40e_ring *rx_ring, u32 qw, u8 prog_id)
179{
180 struct pci_dev *pdev = rx_ring->vsi->back->pdev;
181 u32 error;
182
183 error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
184 I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
185
186 /* for now just print the Status */
187 dev_info(&pdev->dev, "FD programming id %02x, Status %08x\n",
188 prog_id, error);
189}
190
191/**
a5e9c572 192 * i40e_unmap_and_free_tx_resource - Release a Tx buffer
fd0a05ce
JB
193 * @ring: the ring that owns the buffer
194 * @tx_buffer: the buffer to free
195 **/
a5e9c572
AD
196static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
197 struct i40e_tx_buffer *tx_buffer)
fd0a05ce 198{
a5e9c572
AD
199 if (tx_buffer->skb) {
200 dev_kfree_skb_any(tx_buffer->skb);
201 if (dma_unmap_len(tx_buffer, len))
fd0a05ce 202 dma_unmap_single(ring->dev,
35a1e2ad
AD
203 dma_unmap_addr(tx_buffer, dma),
204 dma_unmap_len(tx_buffer, len),
fd0a05ce 205 DMA_TO_DEVICE);
a5e9c572
AD
206 } else if (dma_unmap_len(tx_buffer, len)) {
207 dma_unmap_page(ring->dev,
208 dma_unmap_addr(tx_buffer, dma),
209 dma_unmap_len(tx_buffer, len),
210 DMA_TO_DEVICE);
fd0a05ce 211 }
a5e9c572
AD
212 tx_buffer->next_to_watch = NULL;
213 tx_buffer->skb = NULL;
35a1e2ad 214 dma_unmap_len_set(tx_buffer, len, 0);
a5e9c572 215 /* tx_buffer must be completely set up in the transmit path */
fd0a05ce
JB
216}
217
218/**
219 * i40e_clean_tx_ring - Free any empty Tx buffers
220 * @tx_ring: ring to be cleaned
221 **/
222void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
223{
fd0a05ce
JB
224 unsigned long bi_size;
225 u16 i;
226
227 /* ring already cleared, nothing to do */
228 if (!tx_ring->tx_bi)
229 return;
230
231 /* Free all the Tx ring sk_buffs */
a5e9c572
AD
232 for (i = 0; i < tx_ring->count; i++)
233 i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
fd0a05ce
JB
234
235 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
236 memset(tx_ring->tx_bi, 0, bi_size);
237
238 /* Zero out the descriptor ring */
239 memset(tx_ring->desc, 0, tx_ring->size);
240
241 tx_ring->next_to_use = 0;
242 tx_ring->next_to_clean = 0;
7070ce0a
AD
243
244 if (!tx_ring->netdev)
245 return;
246
247 /* cleanup Tx queue statistics */
248 netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
249 tx_ring->queue_index));
fd0a05ce
JB
250}
251
252/**
253 * i40e_free_tx_resources - Free Tx resources per queue
254 * @tx_ring: Tx descriptor ring for a specific queue
255 *
256 * Free all transmit software resources
257 **/
258void i40e_free_tx_resources(struct i40e_ring *tx_ring)
259{
260 i40e_clean_tx_ring(tx_ring);
261 kfree(tx_ring->tx_bi);
262 tx_ring->tx_bi = NULL;
263
264 if (tx_ring->desc) {
265 dma_free_coherent(tx_ring->dev, tx_ring->size,
266 tx_ring->desc, tx_ring->dma);
267 tx_ring->desc = NULL;
268 }
269}
270
271/**
272 * i40e_get_tx_pending - how many tx descriptors not processed
273 * @tx_ring: the ring of descriptors
274 *
275 * Since there is no access to the ring head register
276 * in XL710, we need to use our local copies
277 **/
278static u32 i40e_get_tx_pending(struct i40e_ring *ring)
279{
280 u32 ntu = ((ring->next_to_clean <= ring->next_to_use)
281 ? ring->next_to_use
282 : ring->next_to_use + ring->count);
283 return ntu - ring->next_to_clean;
284}
285
286/**
287 * i40e_check_tx_hang - Is there a hang in the Tx queue
288 * @tx_ring: the ring of descriptors
289 **/
290static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
291{
292 u32 tx_pending = i40e_get_tx_pending(tx_ring);
293 bool ret = false;
294
295 clear_check_for_tx_hang(tx_ring);
296
297 /* Check for a hung queue, but be thorough. This verifies
298 * that a transmit has been completed since the previous
299 * check AND there is at least one packet pending. The
300 * ARMED bit is set to indicate a potential hang. The
301 * bit is cleared if a pause frame is received to remove
302 * false hang detection due to PFC or 802.3x frames. By
303 * requiring this to fail twice we avoid races with
304 * PFC clearing the ARMED bit and conditions where we
305 * run the check_tx_hang logic with a transmit completion
306 * pending but without time to complete it yet.
307 */
a114d0a6 308 if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) &&
fd0a05ce
JB
309 tx_pending) {
310 /* make sure it is true for two checks in a row */
311 ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED,
312 &tx_ring->state);
313 } else {
314 /* update completed stats and disarm the hang check */
a114d0a6 315 tx_ring->tx_stats.tx_done_old = tx_ring->stats.packets;
fd0a05ce
JB
316 clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state);
317 }
318
319 return ret;
320}
321
322/**
323 * i40e_clean_tx_irq - Reclaim resources after transmit completes
324 * @tx_ring: tx ring to clean
325 * @budget: how many cleans we're allowed
326 *
327 * Returns true if there's any budget left (e.g. the clean is finished)
328 **/
329static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
330{
331 u16 i = tx_ring->next_to_clean;
332 struct i40e_tx_buffer *tx_buf;
333 struct i40e_tx_desc *tx_desc;
334 unsigned int total_packets = 0;
335 unsigned int total_bytes = 0;
336
337 tx_buf = &tx_ring->tx_bi[i];
338 tx_desc = I40E_TX_DESC(tx_ring, i);
a5e9c572 339 i -= tx_ring->count;
fd0a05ce 340
a5e9c572
AD
341 do {
342 struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
fd0a05ce
JB
343
344 /* if next_to_watch is not set then there is no work pending */
345 if (!eop_desc)
346 break;
347
a5e9c572
AD
348 /* prevent any other reads prior to eop_desc */
349 read_barrier_depends();
350
fd0a05ce
JB
351 /* if the descriptor isn't done, no work yet to do */
352 if (!(eop_desc->cmd_type_offset_bsz &
353 cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE)))
354 break;
355
c304fdac 356 /* clear next_to_watch to prevent false hangs */
fd0a05ce 357 tx_buf->next_to_watch = NULL;
fd0a05ce 358
a5e9c572
AD
359 /* update the statistics for this packet */
360 total_bytes += tx_buf->bytecount;
361 total_packets += tx_buf->gso_segs;
fd0a05ce 362
a5e9c572
AD
363 /* free the skb */
364 dev_kfree_skb_any(tx_buf->skb);
fd0a05ce 365
a5e9c572
AD
366 /* unmap skb header data */
367 dma_unmap_single(tx_ring->dev,
368 dma_unmap_addr(tx_buf, dma),
369 dma_unmap_len(tx_buf, len),
370 DMA_TO_DEVICE);
fd0a05ce 371
a5e9c572
AD
372 /* clear tx_buffer data */
373 tx_buf->skb = NULL;
374 dma_unmap_len_set(tx_buf, len, 0);
fd0a05ce 375
a5e9c572
AD
376 /* unmap remaining buffers */
377 while (tx_desc != eop_desc) {
fd0a05ce
JB
378
379 tx_buf++;
380 tx_desc++;
381 i++;
a5e9c572
AD
382 if (unlikely(!i)) {
383 i -= tx_ring->count;
fd0a05ce
JB
384 tx_buf = tx_ring->tx_bi;
385 tx_desc = I40E_TX_DESC(tx_ring, 0);
386 }
fd0a05ce 387
a5e9c572
AD
388 /* unmap any remaining paged data */
389 if (dma_unmap_len(tx_buf, len)) {
390 dma_unmap_page(tx_ring->dev,
391 dma_unmap_addr(tx_buf, dma),
392 dma_unmap_len(tx_buf, len),
393 DMA_TO_DEVICE);
394 dma_unmap_len_set(tx_buf, len, 0);
395 }
396 }
397
398 /* move us one more past the eop_desc for start of next pkt */
399 tx_buf++;
400 tx_desc++;
401 i++;
402 if (unlikely(!i)) {
403 i -= tx_ring->count;
404 tx_buf = tx_ring->tx_bi;
405 tx_desc = I40E_TX_DESC(tx_ring, 0);
406 }
407
408 /* update budget accounting */
409 budget--;
410 } while (likely(budget));
411
412 i += tx_ring->count;
fd0a05ce 413 tx_ring->next_to_clean = i;
a114d0a6
AD
414 tx_ring->stats.bytes += total_bytes;
415 tx_ring->stats.packets += total_packets;
fd0a05ce
JB
416 tx_ring->q_vector->tx.total_bytes += total_bytes;
417 tx_ring->q_vector->tx.total_packets += total_packets;
a5e9c572 418
fd0a05ce
JB
419 if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) {
420 /* schedule immediate reset if we believe we hung */
421 dev_info(tx_ring->dev, "Detected Tx Unit Hang\n"
422 " VSI <%d>\n"
423 " Tx Queue <%d>\n"
424 " next_to_use <%x>\n"
425 " next_to_clean <%x>\n",
426 tx_ring->vsi->seid,
427 tx_ring->queue_index,
428 tx_ring->next_to_use, i);
429 dev_info(tx_ring->dev, "tx_bi[next_to_clean]\n"
430 " time_stamp <%lx>\n"
431 " jiffies <%lx>\n",
432 tx_ring->tx_bi[i].time_stamp, jiffies);
433
434 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
435
436 dev_info(tx_ring->dev,
437 "tx hang detected on queue %d, resetting adapter\n",
438 tx_ring->queue_index);
439
440 tx_ring->netdev->netdev_ops->ndo_tx_timeout(tx_ring->netdev);
441
442 /* the adapter is about to reset, no point in enabling stuff */
443 return true;
444 }
445
7070ce0a
AD
446 netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
447 tx_ring->queue_index),
448 total_packets, total_bytes);
449
fd0a05ce
JB
450#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
451 if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
452 (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
453 /* Make sure that anybody stopping the queue after this
454 * sees the new next_to_clean.
455 */
456 smp_mb();
457 if (__netif_subqueue_stopped(tx_ring->netdev,
458 tx_ring->queue_index) &&
459 !test_bit(__I40E_DOWN, &tx_ring->vsi->state)) {
460 netif_wake_subqueue(tx_ring->netdev,
461 tx_ring->queue_index);
462 ++tx_ring->tx_stats.restart_queue;
463 }
464 }
465
466 return budget > 0;
467}
468
469/**
470 * i40e_set_new_dynamic_itr - Find new ITR level
471 * @rc: structure containing ring performance data
472 *
473 * Stores a new ITR value based on packets and byte counts during
474 * the last interrupt. The advantage of per interrupt computation
475 * is faster updates and more accurate ITR for the current traffic
476 * pattern. Constants in this function were computed based on
477 * theoretical maximum wire speed and thresholds were set based on
478 * testing data as well as attempting to minimize response time
479 * while increasing bulk throughput.
480 **/
481static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
482{
483 enum i40e_latency_range new_latency_range = rc->latency_range;
484 u32 new_itr = rc->itr;
485 int bytes_per_int;
486
487 if (rc->total_packets == 0 || !rc->itr)
488 return;
489
490 /* simple throttlerate management
491 * 0-10MB/s lowest (100000 ints/s)
492 * 10-20MB/s low (20000 ints/s)
493 * 20-1249MB/s bulk (8000 ints/s)
494 */
495 bytes_per_int = rc->total_bytes / rc->itr;
496 switch (rc->itr) {
497 case I40E_LOWEST_LATENCY:
498 if (bytes_per_int > 10)
499 new_latency_range = I40E_LOW_LATENCY;
500 break;
501 case I40E_LOW_LATENCY:
502 if (bytes_per_int > 20)
503 new_latency_range = I40E_BULK_LATENCY;
504 else if (bytes_per_int <= 10)
505 new_latency_range = I40E_LOWEST_LATENCY;
506 break;
507 case I40E_BULK_LATENCY:
508 if (bytes_per_int <= 20)
509 rc->latency_range = I40E_LOW_LATENCY;
510 break;
511 }
512
513 switch (new_latency_range) {
514 case I40E_LOWEST_LATENCY:
515 new_itr = I40E_ITR_100K;
516 break;
517 case I40E_LOW_LATENCY:
518 new_itr = I40E_ITR_20K;
519 break;
520 case I40E_BULK_LATENCY:
521 new_itr = I40E_ITR_8K;
522 break;
523 default:
524 break;
525 }
526
527 if (new_itr != rc->itr) {
528 /* do an exponential smoothing */
529 new_itr = (10 * new_itr * rc->itr) /
530 ((9 * new_itr) + rc->itr);
531 rc->itr = new_itr & I40E_MAX_ITR;
532 }
533
534 rc->total_bytes = 0;
535 rc->total_packets = 0;
536}
537
538/**
539 * i40e_update_dynamic_itr - Adjust ITR based on bytes per int
540 * @q_vector: the vector to adjust
541 **/
542static void i40e_update_dynamic_itr(struct i40e_q_vector *q_vector)
543{
544 u16 vector = q_vector->vsi->base_vector + q_vector->v_idx;
545 struct i40e_hw *hw = &q_vector->vsi->back->hw;
546 u32 reg_addr;
547 u16 old_itr;
548
549 reg_addr = I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1);
550 old_itr = q_vector->rx.itr;
551 i40e_set_new_dynamic_itr(&q_vector->rx);
552 if (old_itr != q_vector->rx.itr)
553 wr32(hw, reg_addr, q_vector->rx.itr);
554
555 reg_addr = I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1);
556 old_itr = q_vector->tx.itr;
557 i40e_set_new_dynamic_itr(&q_vector->tx);
558 if (old_itr != q_vector->tx.itr)
559 wr32(hw, reg_addr, q_vector->tx.itr);
560
561 i40e_flush(hw);
562}
563
564/**
565 * i40e_clean_programming_status - clean the programming status descriptor
566 * @rx_ring: the rx ring that has this descriptor
567 * @rx_desc: the rx descriptor written back by HW
568 *
569 * Flow director should handle FD_FILTER_STATUS to check its filter programming
570 * status being successful or not and take actions accordingly. FCoE should
571 * handle its context/filter programming/invalidation status and take actions.
572 *
573 **/
574static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
575 union i40e_rx_desc *rx_desc)
576{
577 u64 qw;
578 u8 id;
579
580 qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
581 id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
582 I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
583
584 if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
585 i40e_fd_handle_status(rx_ring, qw, id);
586}
587
588/**
589 * i40e_setup_tx_descriptors - Allocate the Tx descriptors
590 * @tx_ring: the tx ring to set up
591 *
592 * Return 0 on success, negative on error
593 **/
594int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
595{
596 struct device *dev = tx_ring->dev;
597 int bi_size;
598
599 if (!dev)
600 return -ENOMEM;
601
602 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
603 tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
604 if (!tx_ring->tx_bi)
605 goto err;
606
607 /* round up to nearest 4K */
608 tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
609 tx_ring->size = ALIGN(tx_ring->size, 4096);
610 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
611 &tx_ring->dma, GFP_KERNEL);
612 if (!tx_ring->desc) {
613 dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
614 tx_ring->size);
615 goto err;
616 }
617
618 tx_ring->next_to_use = 0;
619 tx_ring->next_to_clean = 0;
620 return 0;
621
622err:
623 kfree(tx_ring->tx_bi);
624 tx_ring->tx_bi = NULL;
625 return -ENOMEM;
626}
627
628/**
629 * i40e_clean_rx_ring - Free Rx buffers
630 * @rx_ring: ring to be cleaned
631 **/
632void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
633{
634 struct device *dev = rx_ring->dev;
635 struct i40e_rx_buffer *rx_bi;
636 unsigned long bi_size;
637 u16 i;
638
639 /* ring already cleared, nothing to do */
640 if (!rx_ring->rx_bi)
641 return;
642
643 /* Free all the Rx ring sk_buffs */
644 for (i = 0; i < rx_ring->count; i++) {
645 rx_bi = &rx_ring->rx_bi[i];
646 if (rx_bi->dma) {
647 dma_unmap_single(dev,
648 rx_bi->dma,
649 rx_ring->rx_buf_len,
650 DMA_FROM_DEVICE);
651 rx_bi->dma = 0;
652 }
653 if (rx_bi->skb) {
654 dev_kfree_skb(rx_bi->skb);
655 rx_bi->skb = NULL;
656 }
657 if (rx_bi->page) {
658 if (rx_bi->page_dma) {
659 dma_unmap_page(dev,
660 rx_bi->page_dma,
661 PAGE_SIZE / 2,
662 DMA_FROM_DEVICE);
663 rx_bi->page_dma = 0;
664 }
665 __free_page(rx_bi->page);
666 rx_bi->page = NULL;
667 rx_bi->page_offset = 0;
668 }
669 }
670
671 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
672 memset(rx_ring->rx_bi, 0, bi_size);
673
674 /* Zero out the descriptor ring */
675 memset(rx_ring->desc, 0, rx_ring->size);
676
677 rx_ring->next_to_clean = 0;
678 rx_ring->next_to_use = 0;
679}
680
681/**
682 * i40e_free_rx_resources - Free Rx resources
683 * @rx_ring: ring to clean the resources from
684 *
685 * Free all receive software resources
686 **/
687void i40e_free_rx_resources(struct i40e_ring *rx_ring)
688{
689 i40e_clean_rx_ring(rx_ring);
690 kfree(rx_ring->rx_bi);
691 rx_ring->rx_bi = NULL;
692
693 if (rx_ring->desc) {
694 dma_free_coherent(rx_ring->dev, rx_ring->size,
695 rx_ring->desc, rx_ring->dma);
696 rx_ring->desc = NULL;
697 }
698}
699
700/**
701 * i40e_setup_rx_descriptors - Allocate Rx descriptors
702 * @rx_ring: Rx descriptor ring (for a specific queue) to setup
703 *
704 * Returns 0 on success, negative on failure
705 **/
706int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
707{
708 struct device *dev = rx_ring->dev;
709 int bi_size;
710
711 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
712 rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
713 if (!rx_ring->rx_bi)
714 goto err;
715
716 /* Round up to nearest 4K */
717 rx_ring->size = ring_is_16byte_desc_enabled(rx_ring)
718 ? rx_ring->count * sizeof(union i40e_16byte_rx_desc)
719 : rx_ring->count * sizeof(union i40e_32byte_rx_desc);
720 rx_ring->size = ALIGN(rx_ring->size, 4096);
721 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
722 &rx_ring->dma, GFP_KERNEL);
723
724 if (!rx_ring->desc) {
725 dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
726 rx_ring->size);
727 goto err;
728 }
729
730 rx_ring->next_to_clean = 0;
731 rx_ring->next_to_use = 0;
732
733 return 0;
734err:
735 kfree(rx_ring->rx_bi);
736 rx_ring->rx_bi = NULL;
737 return -ENOMEM;
738}
739
740/**
741 * i40e_release_rx_desc - Store the new tail and head values
742 * @rx_ring: ring to bump
743 * @val: new head index
744 **/
745static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
746{
747 rx_ring->next_to_use = val;
748 /* Force memory writes to complete before letting h/w
749 * know there are new descriptors to fetch. (Only
750 * applicable for weak-ordered memory model archs,
751 * such as IA-64).
752 */
753 wmb();
754 writel(val, rx_ring->tail);
755}
756
757/**
758 * i40e_alloc_rx_buffers - Replace used receive buffers; packet split
759 * @rx_ring: ring to place buffers on
760 * @cleaned_count: number of buffers to replace
761 **/
762void i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
763{
764 u16 i = rx_ring->next_to_use;
765 union i40e_rx_desc *rx_desc;
766 struct i40e_rx_buffer *bi;
767 struct sk_buff *skb;
768
769 /* do nothing if no valid netdev defined */
770 if (!rx_ring->netdev || !cleaned_count)
771 return;
772
773 while (cleaned_count--) {
774 rx_desc = I40E_RX_DESC(rx_ring, i);
775 bi = &rx_ring->rx_bi[i];
776 skb = bi->skb;
777
778 if (!skb) {
779 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
780 rx_ring->rx_buf_len);
781 if (!skb) {
782 rx_ring->rx_stats.alloc_rx_buff_failed++;
783 goto no_buffers;
784 }
785 /* initialize queue mapping */
786 skb_record_rx_queue(skb, rx_ring->queue_index);
787 bi->skb = skb;
788 }
789
790 if (!bi->dma) {
791 bi->dma = dma_map_single(rx_ring->dev,
792 skb->data,
793 rx_ring->rx_buf_len,
794 DMA_FROM_DEVICE);
795 if (dma_mapping_error(rx_ring->dev, bi->dma)) {
796 rx_ring->rx_stats.alloc_rx_buff_failed++;
797 bi->dma = 0;
798 goto no_buffers;
799 }
800 }
801
802 if (ring_is_ps_enabled(rx_ring)) {
803 if (!bi->page) {
804 bi->page = alloc_page(GFP_ATOMIC);
805 if (!bi->page) {
806 rx_ring->rx_stats.alloc_rx_page_failed++;
807 goto no_buffers;
808 }
809 }
810
811 if (!bi->page_dma) {
812 /* use a half page if we're re-using */
813 bi->page_offset ^= PAGE_SIZE / 2;
814 bi->page_dma = dma_map_page(rx_ring->dev,
815 bi->page,
816 bi->page_offset,
817 PAGE_SIZE / 2,
818 DMA_FROM_DEVICE);
819 if (dma_mapping_error(rx_ring->dev,
820 bi->page_dma)) {
821 rx_ring->rx_stats.alloc_rx_page_failed++;
822 bi->page_dma = 0;
823 goto no_buffers;
824 }
825 }
826
827 /* Refresh the desc even if buffer_addrs didn't change
828 * because each write-back erases this info.
829 */
830 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
831 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
832 } else {
833 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
834 rx_desc->read.hdr_addr = 0;
835 }
836 i++;
837 if (i == rx_ring->count)
838 i = 0;
839 }
840
841no_buffers:
842 if (rx_ring->next_to_use != i)
843 i40e_release_rx_desc(rx_ring, i);
844}
845
846/**
847 * i40e_receive_skb - Send a completed packet up the stack
848 * @rx_ring: rx ring in play
849 * @skb: packet to send up
850 * @vlan_tag: vlan tag for packet
851 **/
852static void i40e_receive_skb(struct i40e_ring *rx_ring,
853 struct sk_buff *skb, u16 vlan_tag)
854{
855 struct i40e_q_vector *q_vector = rx_ring->q_vector;
856 struct i40e_vsi *vsi = rx_ring->vsi;
857 u64 flags = vsi->back->flags;
858
859 if (vlan_tag & VLAN_VID_MASK)
860 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
861
862 if (flags & I40E_FLAG_IN_NETPOLL)
863 netif_rx(skb);
864 else
865 napi_gro_receive(&q_vector->napi, skb);
866}
867
868/**
869 * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
870 * @vsi: the VSI we care about
871 * @skb: skb currently being received and modified
872 * @rx_status: status value of last descriptor in packet
873 * @rx_error: error value of last descriptor in packet
874 **/
875static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
876 struct sk_buff *skb,
877 u32 rx_status,
878 u32 rx_error)
879{
880 skb->ip_summed = CHECKSUM_NONE;
881
882 /* Rx csum enabled and ip headers found? */
883 if (!(vsi->netdev->features & NETIF_F_RXCSUM &&
884 rx_status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
885 return;
886
887 /* IP or L4 checksum error */
888 if (rx_error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
889 (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
890 vsi->back->hw_csum_rx_error++;
891 return;
892 }
893
894 skb->ip_summed = CHECKSUM_UNNECESSARY;
895}
896
897/**
898 * i40e_rx_hash - returns the hash value from the Rx descriptor
899 * @ring: descriptor ring
900 * @rx_desc: specific descriptor
901 **/
902static inline u32 i40e_rx_hash(struct i40e_ring *ring,
903 union i40e_rx_desc *rx_desc)
904{
905 if (ring->netdev->features & NETIF_F_RXHASH) {
906 if ((le64_to_cpu(rx_desc->wb.qword1.status_error_len) >>
907 I40E_RX_DESC_STATUS_FLTSTAT_SHIFT) &
908 I40E_RX_DESC_FLTSTAT_RSS_HASH)
909 return le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
910 }
911 return 0;
912}
913
914/**
915 * i40e_clean_rx_irq - Reclaim resources after receive completes
916 * @rx_ring: rx ring to clean
917 * @budget: how many cleans we're allowed
918 *
919 * Returns true if there's any budget left (e.g. the clean is finished)
920 **/
921static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
922{
923 unsigned int total_rx_bytes = 0, total_rx_packets = 0;
924 u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
925 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
926 const int current_node = numa_node_id();
927 struct i40e_vsi *vsi = rx_ring->vsi;
928 u16 i = rx_ring->next_to_clean;
929 union i40e_rx_desc *rx_desc;
930 u32 rx_error, rx_status;
931 u64 qword;
932
933 rx_desc = I40E_RX_DESC(rx_ring, i);
934 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
935 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK)
936 >> I40E_RXD_QW1_STATUS_SHIFT;
937
938 while (rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) {
939 union i40e_rx_desc *next_rxd;
940 struct i40e_rx_buffer *rx_bi;
941 struct sk_buff *skb;
942 u16 vlan_tag;
943 if (i40e_rx_is_programming_status(qword)) {
944 i40e_clean_programming_status(rx_ring, rx_desc);
945 I40E_RX_NEXT_DESC_PREFETCH(rx_ring, i, next_rxd);
946 goto next_desc;
947 }
948 rx_bi = &rx_ring->rx_bi[i];
949 skb = rx_bi->skb;
950 prefetch(skb->data);
951
952 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
953 >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
954 rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
955 >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
956 rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK)
957 >> I40E_RXD_QW1_LENGTH_SPH_SHIFT;
958
959 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK)
960 >> I40E_RXD_QW1_ERROR_SHIFT;
961 rx_hbo = rx_error & (1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
962 rx_error &= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
963
964 rx_bi->skb = NULL;
965
966 /* This memory barrier is needed to keep us from reading
967 * any other fields out of the rx_desc until we know the
968 * STATUS_DD bit is set
969 */
970 rmb();
971
972 /* Get the header and possibly the whole packet
973 * If this is an skb from previous receive dma will be 0
974 */
975 if (rx_bi->dma) {
976 u16 len;
977
978 if (rx_hbo)
979 len = I40E_RX_HDR_SIZE;
980 else if (rx_sph)
981 len = rx_header_len;
982 else if (rx_packet_len)
983 len = rx_packet_len; /* 1buf/no split found */
984 else
985 len = rx_header_len; /* split always mode */
986
987 skb_put(skb, len);
988 dma_unmap_single(rx_ring->dev,
989 rx_bi->dma,
990 rx_ring->rx_buf_len,
991 DMA_FROM_DEVICE);
992 rx_bi->dma = 0;
993 }
994
995 /* Get the rest of the data if this was a header split */
996 if (ring_is_ps_enabled(rx_ring) && rx_packet_len) {
997
998 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
999 rx_bi->page,
1000 rx_bi->page_offset,
1001 rx_packet_len);
1002
1003 skb->len += rx_packet_len;
1004 skb->data_len += rx_packet_len;
1005 skb->truesize += rx_packet_len;
1006
1007 if ((page_count(rx_bi->page) == 1) &&
1008 (page_to_nid(rx_bi->page) == current_node))
1009 get_page(rx_bi->page);
1010 else
1011 rx_bi->page = NULL;
1012
1013 dma_unmap_page(rx_ring->dev,
1014 rx_bi->page_dma,
1015 PAGE_SIZE / 2,
1016 DMA_FROM_DEVICE);
1017 rx_bi->page_dma = 0;
1018 }
1019 I40E_RX_NEXT_DESC_PREFETCH(rx_ring, i, next_rxd);
1020
1021 if (unlikely(
1022 !(rx_status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1023 struct i40e_rx_buffer *next_buffer;
1024
1025 next_buffer = &rx_ring->rx_bi[i];
1026
1027 if (ring_is_ps_enabled(rx_ring)) {
1028 rx_bi->skb = next_buffer->skb;
1029 rx_bi->dma = next_buffer->dma;
1030 next_buffer->skb = skb;
1031 next_buffer->dma = 0;
1032 }
1033 rx_ring->rx_stats.non_eop_descs++;
1034 goto next_desc;
1035 }
1036
1037 /* ERR_MASK will only have valid bits if EOP set */
1038 if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1039 dev_kfree_skb_any(skb);
1040 goto next_desc;
1041 }
1042
1043 skb->rxhash = i40e_rx_hash(rx_ring, rx_desc);
1044 i40e_rx_checksum(vsi, skb, rx_status, rx_error);
1045
1046 /* probably a little skewed due to removing CRC */
1047 total_rx_bytes += skb->len;
1048 total_rx_packets++;
1049
1050 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1051 vlan_tag = rx_status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1052 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1053 : 0;
1054 i40e_receive_skb(rx_ring, skb, vlan_tag);
1055
1056 rx_ring->netdev->last_rx = jiffies;
1057 budget--;
1058next_desc:
1059 rx_desc->wb.qword1.status_error_len = 0;
1060 if (!budget)
1061 break;
1062
1063 cleaned_count++;
1064 /* return some buffers to hardware, one at a time is too slow */
1065 if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1066 i40e_alloc_rx_buffers(rx_ring, cleaned_count);
1067 cleaned_count = 0;
1068 }
1069
1070 /* use prefetched values */
1071 rx_desc = next_rxd;
1072 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1073 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK)
1074 >> I40E_RXD_QW1_STATUS_SHIFT;
1075 }
1076
1077 rx_ring->next_to_clean = i;
a114d0a6
AD
1078 rx_ring->stats.packets += total_rx_packets;
1079 rx_ring->stats.bytes += total_rx_bytes;
fd0a05ce
JB
1080 rx_ring->q_vector->rx.total_packets += total_rx_packets;
1081 rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1082
1083 if (cleaned_count)
1084 i40e_alloc_rx_buffers(rx_ring, cleaned_count);
1085
1086 return budget > 0;
1087}
1088
1089/**
1090 * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
1091 * @napi: napi struct with our devices info in it
1092 * @budget: amount of work driver is allowed to do this pass, in packets
1093 *
1094 * This function will clean all queues associated with a q_vector.
1095 *
1096 * Returns the amount of work done
1097 **/
1098int i40e_napi_poll(struct napi_struct *napi, int budget)
1099{
1100 struct i40e_q_vector *q_vector =
1101 container_of(napi, struct i40e_q_vector, napi);
1102 struct i40e_vsi *vsi = q_vector->vsi;
cd0b6fa6 1103 struct i40e_ring *ring;
fd0a05ce
JB
1104 bool clean_complete = true;
1105 int budget_per_ring;
fd0a05ce
JB
1106
1107 if (test_bit(__I40E_DOWN, &vsi->state)) {
1108 napi_complete(napi);
1109 return 0;
1110 }
1111
cd0b6fa6
AD
1112 /* Since the actual Tx work is minimal, we can give the Tx a larger
1113 * budget and be more aggressive about cleaning up the Tx descriptors.
1114 */
1115 i40e_for_each_ring(ring, q_vector->tx)
1116 clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
1117
fd0a05ce
JB
1118 /* We attempt to distribute budget to each Rx queue fairly, but don't
1119 * allow the budget to go below 1 because that would exit polling early.
fd0a05ce
JB
1120 */
1121 budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
cd0b6fa6
AD
1122
1123 i40e_for_each_ring(ring, q_vector->rx)
1124 clean_complete &= i40e_clean_rx_irq(ring, budget_per_ring);
fd0a05ce
JB
1125
1126 /* If work not completed, return budget and polling will return */
1127 if (!clean_complete)
1128 return budget;
1129
1130 /* Work is done so exit the polling mode and re-enable the interrupt */
1131 napi_complete(napi);
1132 if (ITR_IS_DYNAMIC(vsi->rx_itr_setting) ||
1133 ITR_IS_DYNAMIC(vsi->tx_itr_setting))
1134 i40e_update_dynamic_itr(q_vector);
1135
1136 if (!test_bit(__I40E_DOWN, &vsi->state)) {
1137 if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
1138 i40e_irq_dynamic_enable(vsi,
1139 q_vector->v_idx + vsi->base_vector);
1140 } else {
1141 struct i40e_hw *hw = &vsi->back->hw;
1142 /* We re-enable the queue 0 cause, but
1143 * don't worry about dynamic_enable
1144 * because we left it on for the other
1145 * possible interrupts during napi
1146 */
1147 u32 qval = rd32(hw, I40E_QINT_RQCTL(0));
1148 qval |= I40E_QINT_RQCTL_CAUSE_ENA_MASK;
1149 wr32(hw, I40E_QINT_RQCTL(0), qval);
1150
1151 qval = rd32(hw, I40E_QINT_TQCTL(0));
1152 qval |= I40E_QINT_TQCTL_CAUSE_ENA_MASK;
1153 wr32(hw, I40E_QINT_TQCTL(0), qval);
1154 i40e_flush(hw);
1155 }
1156 }
1157
1158 return 0;
1159}
1160
1161/**
1162 * i40e_atr - Add a Flow Director ATR filter
1163 * @tx_ring: ring to add programming descriptor to
1164 * @skb: send buffer
1165 * @flags: send flags
1166 * @protocol: wire protocol
1167 **/
1168static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
1169 u32 flags, __be16 protocol)
1170{
1171 struct i40e_filter_program_desc *fdir_desc;
1172 struct i40e_pf *pf = tx_ring->vsi->back;
1173 union {
1174 unsigned char *network;
1175 struct iphdr *ipv4;
1176 struct ipv6hdr *ipv6;
1177 } hdr;
1178 struct tcphdr *th;
1179 unsigned int hlen;
1180 u32 flex_ptype, dtype_cmd;
fc4ac67b 1181 u16 i;
fd0a05ce
JB
1182
1183 /* make sure ATR is enabled */
1184 if (!(pf->flags & I40E_FLAG_FDIR_ATR_ENABLED))
1185 return;
1186
1187 /* if sampling is disabled do nothing */
1188 if (!tx_ring->atr_sample_rate)
1189 return;
1190
1191 tx_ring->atr_count++;
1192
1193 /* snag network header to get L4 type and address */
1194 hdr.network = skb_network_header(skb);
1195
1196 /* Currently only IPv4/IPv6 with TCP is supported */
1197 if (protocol == htons(ETH_P_IP)) {
1198 if (hdr.ipv4->protocol != IPPROTO_TCP)
1199 return;
1200
1201 /* access ihl as a u8 to avoid unaligned access on ia64 */
1202 hlen = (hdr.network[0] & 0x0F) << 2;
1203 } else if (protocol == htons(ETH_P_IPV6)) {
1204 if (hdr.ipv6->nexthdr != IPPROTO_TCP)
1205 return;
1206
1207 hlen = sizeof(struct ipv6hdr);
1208 } else {
1209 return;
1210 }
1211
1212 th = (struct tcphdr *)(hdr.network + hlen);
1213
1214 /* sample on all syn/fin packets or once every atr sample rate */
1215 if (!th->fin && !th->syn && (tx_ring->atr_count < tx_ring->atr_sample_rate))
1216 return;
1217
1218 tx_ring->atr_count = 0;
1219
1220 /* grab the next descriptor */
fc4ac67b
AD
1221 i = tx_ring->next_to_use;
1222 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
1223
1224 i++;
1225 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
fd0a05ce
JB
1226
1227 flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
1228 I40E_TXD_FLTR_QW0_QINDEX_MASK;
1229 flex_ptype |= (protocol == htons(ETH_P_IP)) ?
1230 (I40E_FILTER_PCTYPE_NONF_IPV4_TCP <<
1231 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) :
1232 (I40E_FILTER_PCTYPE_NONF_IPV6_TCP <<
1233 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
1234
1235 flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
1236
1237 dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
1238
1239 dtype_cmd |= th->fin ?
1240 (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
1241 I40E_TXD_FLTR_QW1_PCMD_SHIFT) :
1242 (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
1243 I40E_TXD_FLTR_QW1_PCMD_SHIFT);
1244
1245 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX <<
1246 I40E_TXD_FLTR_QW1_DEST_SHIFT;
1247
1248 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID <<
1249 I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
1250
1251 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
1252 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
1253}
1254
1255#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
1256/**
1257 * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
1258 * @skb: send buffer
1259 * @tx_ring: ring to send buffer on
1260 * @flags: the tx flags to be set
1261 *
1262 * Checks the skb and set up correspondingly several generic transmit flags
1263 * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
1264 *
1265 * Returns error code indicate the frame should be dropped upon error and the
1266 * otherwise returns 0 to indicate the flags has been set properly.
1267 **/
1268static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
1269 struct i40e_ring *tx_ring,
1270 u32 *flags)
1271{
1272 __be16 protocol = skb->protocol;
1273 u32 tx_flags = 0;
1274
1275 /* if we have a HW VLAN tag being added, default to the HW one */
1276 if (vlan_tx_tag_present(skb)) {
1277 tx_flags |= vlan_tx_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
1278 tx_flags |= I40E_TX_FLAGS_HW_VLAN;
1279 /* else if it is a SW VLAN, check the next protocol and store the tag */
1280 } else if (protocol == __constant_htons(ETH_P_8021Q)) {
1281 struct vlan_hdr *vhdr, _vhdr;
1282 vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
1283 if (!vhdr)
1284 return -EINVAL;
1285
1286 protocol = vhdr->h_vlan_encapsulated_proto;
1287 tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT;
1288 tx_flags |= I40E_TX_FLAGS_SW_VLAN;
1289 }
1290
1291 /* Insert 802.1p priority into VLAN header */
1292 if ((tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED) &&
1293 ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) ||
1294 (skb->priority != TC_PRIO_CONTROL))) {
1295 tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK;
1296 tx_flags |= (skb->priority & 0x7) <<
1297 I40E_TX_FLAGS_VLAN_PRIO_SHIFT;
1298 if (tx_flags & I40E_TX_FLAGS_SW_VLAN) {
1299 struct vlan_ethhdr *vhdr;
1300 if (skb_header_cloned(skb) &&
1301 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
1302 return -ENOMEM;
1303 vhdr = (struct vlan_ethhdr *)skb->data;
1304 vhdr->h_vlan_TCI = htons(tx_flags >>
1305 I40E_TX_FLAGS_VLAN_SHIFT);
1306 } else {
1307 tx_flags |= I40E_TX_FLAGS_HW_VLAN;
1308 }
1309 }
1310 *flags = tx_flags;
1311 return 0;
1312}
1313
fd0a05ce
JB
1314/**
1315 * i40e_tso - set up the tso context descriptor
1316 * @tx_ring: ptr to the ring to send
1317 * @skb: ptr to the skb we're sending
1318 * @tx_flags: the collected send information
1319 * @protocol: the send protocol
1320 * @hdr_len: ptr to the size of the packet header
1321 * @cd_tunneling: ptr to context descriptor bits
1322 *
1323 * Returns 0 if no TSO can happen, 1 if tso is going, or error
1324 **/
1325static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
1326 u32 tx_flags, __be16 protocol, u8 *hdr_len,
1327 u64 *cd_type_cmd_tso_mss, u32 *cd_tunneling)
1328{
1329 u32 cd_cmd, cd_tso_len, cd_mss;
1330 struct tcphdr *tcph;
1331 struct iphdr *iph;
1332 u32 l4len;
1333 int err;
1334 struct ipv6hdr *ipv6h;
1335
1336 if (!skb_is_gso(skb))
1337 return 0;
1338
1339 if (skb_header_cloned(skb)) {
1340 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1341 if (err)
1342 return err;
1343 }
1344
1345 if (protocol == __constant_htons(ETH_P_IP)) {
1346 iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
1347 tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
1348 iph->tot_len = 0;
1349 iph->check = 0;
1350 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
1351 0, IPPROTO_TCP, 0);
1352 } else if (skb_is_gso_v6(skb)) {
1353
1354 ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb)
1355 : ipv6_hdr(skb);
1356 tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
1357 ipv6h->payload_len = 0;
1358 tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
1359 0, IPPROTO_TCP, 0);
1360 }
1361
1362 l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb);
1363 *hdr_len = (skb->encapsulation
1364 ? (skb_inner_transport_header(skb) - skb->data)
1365 : skb_transport_offset(skb)) + l4len;
1366
1367 /* find the field values */
1368 cd_cmd = I40E_TX_CTX_DESC_TSO;
1369 cd_tso_len = skb->len - *hdr_len;
1370 cd_mss = skb_shinfo(skb)->gso_size;
1371 *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT)
1372 | ((u64)cd_tso_len
1373 << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT)
1374 | ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
1375 return 1;
1376}
1377
1378/**
1379 * i40e_tx_enable_csum - Enable Tx checksum offloads
1380 * @skb: send buffer
1381 * @tx_flags: Tx flags currently set
1382 * @td_cmd: Tx descriptor command bits to set
1383 * @td_offset: Tx descriptor header offsets to set
1384 * @cd_tunneling: ptr to context desc bits
1385 **/
1386static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags,
1387 u32 *td_cmd, u32 *td_offset,
1388 struct i40e_ring *tx_ring,
1389 u32 *cd_tunneling)
1390{
1391 struct ipv6hdr *this_ipv6_hdr;
1392 unsigned int this_tcp_hdrlen;
1393 struct iphdr *this_ip_hdr;
1394 u32 network_hdr_len;
1395 u8 l4_hdr = 0;
1396
1397 if (skb->encapsulation) {
1398 network_hdr_len = skb_inner_network_header_len(skb);
1399 this_ip_hdr = inner_ip_hdr(skb);
1400 this_ipv6_hdr = inner_ipv6_hdr(skb);
1401 this_tcp_hdrlen = inner_tcp_hdrlen(skb);
1402
1403 if (tx_flags & I40E_TX_FLAGS_IPV4) {
1404
1405 if (tx_flags & I40E_TX_FLAGS_TSO) {
1406 *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
1407 ip_hdr(skb)->check = 0;
1408 } else {
1409 *cd_tunneling |=
1410 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
1411 }
1412 } else if (tx_flags & I40E_TX_FLAGS_IPV6) {
1413 if (tx_flags & I40E_TX_FLAGS_TSO) {
1414 *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
1415 ip_hdr(skb)->check = 0;
1416 } else {
1417 *cd_tunneling |=
1418 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
1419 }
1420 }
1421
1422 /* Now set the ctx descriptor fields */
1423 *cd_tunneling |= (skb_network_header_len(skb) >> 2) <<
1424 I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
1425 I40E_TXD_CTX_UDP_TUNNELING |
1426 ((skb_inner_network_offset(skb) -
1427 skb_transport_offset(skb)) >> 1) <<
1428 I40E_TXD_CTX_QW0_NATLEN_SHIFT;
1429
1430 } else {
1431 network_hdr_len = skb_network_header_len(skb);
1432 this_ip_hdr = ip_hdr(skb);
1433 this_ipv6_hdr = ipv6_hdr(skb);
1434 this_tcp_hdrlen = tcp_hdrlen(skb);
1435 }
1436
1437 /* Enable IP checksum offloads */
1438 if (tx_flags & I40E_TX_FLAGS_IPV4) {
1439 l4_hdr = this_ip_hdr->protocol;
1440 /* the stack computes the IP header already, the only time we
1441 * need the hardware to recompute it is in the case of TSO.
1442 */
1443 if (tx_flags & I40E_TX_FLAGS_TSO) {
1444 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
1445 this_ip_hdr->check = 0;
1446 } else {
1447 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
1448 }
1449 /* Now set the td_offset for IP header length */
1450 *td_offset = (network_hdr_len >> 2) <<
1451 I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
1452 } else if (tx_flags & I40E_TX_FLAGS_IPV6) {
1453 l4_hdr = this_ipv6_hdr->nexthdr;
1454 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
1455 /* Now set the td_offset for IP header length */
1456 *td_offset = (network_hdr_len >> 2) <<
1457 I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
1458 }
1459 /* words in MACLEN + dwords in IPLEN + dwords in L4Len */
1460 *td_offset |= (skb_network_offset(skb) >> 1) <<
1461 I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
1462
1463 /* Enable L4 checksum offloads */
1464 switch (l4_hdr) {
1465 case IPPROTO_TCP:
1466 /* enable checksum offloads */
1467 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
1468 *td_offset |= (this_tcp_hdrlen >> 2) <<
1469 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
1470 break;
1471 case IPPROTO_SCTP:
1472 /* enable SCTP checksum offload */
1473 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
1474 *td_offset |= (sizeof(struct sctphdr) >> 2) <<
1475 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
1476 break;
1477 case IPPROTO_UDP:
1478 /* enable UDP checksum offload */
1479 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
1480 *td_offset |= (sizeof(struct udphdr) >> 2) <<
1481 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
1482 break;
1483 default:
1484 break;
1485 }
1486}
1487
1488/**
1489 * i40e_create_tx_ctx Build the Tx context descriptor
1490 * @tx_ring: ring to create the descriptor on
1491 * @cd_type_cmd_tso_mss: Quad Word 1
1492 * @cd_tunneling: Quad Word 0 - bits 0-31
1493 * @cd_l2tag2: Quad Word 0 - bits 32-63
1494 **/
1495static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
1496 const u64 cd_type_cmd_tso_mss,
1497 const u32 cd_tunneling, const u32 cd_l2tag2)
1498{
1499 struct i40e_tx_context_desc *context_desc;
fc4ac67b 1500 int i = tx_ring->next_to_use;
fd0a05ce
JB
1501
1502 if (!cd_type_cmd_tso_mss && !cd_tunneling && !cd_l2tag2)
1503 return;
1504
1505 /* grab the next descriptor */
fc4ac67b
AD
1506 context_desc = I40E_TX_CTXTDESC(tx_ring, i);
1507
1508 i++;
1509 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
fd0a05ce
JB
1510
1511 /* cpu_to_le32 and assign to struct fields */
1512 context_desc->tunneling_params = cpu_to_le32(cd_tunneling);
1513 context_desc->l2tag2 = cpu_to_le16(cd_l2tag2);
1514 context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
1515}
1516
1517/**
1518 * i40e_tx_map - Build the Tx descriptor
1519 * @tx_ring: ring to send buffer on
1520 * @skb: send buffer
1521 * @first: first buffer info buffer to use
1522 * @tx_flags: collected send information
1523 * @hdr_len: size of the packet header
1524 * @td_cmd: the command field in the descriptor
1525 * @td_offset: offset for checksum or crc
1526 **/
1527static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
1528 struct i40e_tx_buffer *first, u32 tx_flags,
1529 const u8 hdr_len, u32 td_cmd, u32 td_offset)
1530{
fd0a05ce
JB
1531 unsigned int data_len = skb->data_len;
1532 unsigned int size = skb_headlen(skb);
a5e9c572 1533 struct skb_frag_struct *frag;
fd0a05ce
JB
1534 struct i40e_tx_buffer *tx_bi;
1535 struct i40e_tx_desc *tx_desc;
a5e9c572 1536 u16 i = tx_ring->next_to_use;
fd0a05ce
JB
1537 u32 td_tag = 0;
1538 dma_addr_t dma;
1539 u16 gso_segs;
1540
fd0a05ce
JB
1541 if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
1542 td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
1543 td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >>
1544 I40E_TX_FLAGS_VLAN_SHIFT;
1545 }
1546
a5e9c572
AD
1547 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO))
1548 gso_segs = skb_shinfo(skb)->gso_segs;
1549 else
1550 gso_segs = 1;
1551
1552 /* multiply data chunks by size of headers */
1553 first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len);
1554 first->gso_segs = gso_segs;
1555 first->skb = skb;
1556 first->tx_flags = tx_flags;
1557
1558 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
1559
fd0a05ce 1560 tx_desc = I40E_TX_DESC(tx_ring, i);
a5e9c572
AD
1561 tx_bi = first;
1562
1563 for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
1564 if (dma_mapping_error(tx_ring->dev, dma))
1565 goto dma_error;
1566
1567 /* record length, and DMA address */
1568 dma_unmap_len_set(tx_bi, len, size);
1569 dma_unmap_addr_set(tx_bi, dma, dma);
1570
1571 tx_desc->buffer_addr = cpu_to_le64(dma);
1572
1573 while (unlikely(size > I40E_MAX_DATA_PER_TXD)) {
fd0a05ce
JB
1574 tx_desc->cmd_type_offset_bsz =
1575 build_ctob(td_cmd, td_offset,
1576 I40E_MAX_DATA_PER_TXD, td_tag);
1577
fd0a05ce
JB
1578 tx_desc++;
1579 i++;
1580 if (i == tx_ring->count) {
1581 tx_desc = I40E_TX_DESC(tx_ring, 0);
1582 i = 0;
1583 }
fd0a05ce 1584
a5e9c572
AD
1585 dma += I40E_MAX_DATA_PER_TXD;
1586 size -= I40E_MAX_DATA_PER_TXD;
fd0a05ce 1587
a5e9c572
AD
1588 tx_desc->buffer_addr = cpu_to_le64(dma);
1589 }
fd0a05ce
JB
1590
1591 if (likely(!data_len))
1592 break;
1593
a5e9c572
AD
1594 tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
1595 size, td_tag);
fd0a05ce
JB
1596
1597 tx_desc++;
1598 i++;
1599 if (i == tx_ring->count) {
1600 tx_desc = I40E_TX_DESC(tx_ring, 0);
1601 i = 0;
1602 }
1603
a5e9c572
AD
1604 size = skb_frag_size(frag);
1605 data_len -= size;
fd0a05ce 1606
a5e9c572
AD
1607 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
1608 DMA_TO_DEVICE);
fd0a05ce 1609
a5e9c572
AD
1610 tx_bi = &tx_ring->tx_bi[i];
1611 }
fd0a05ce 1612
a5e9c572
AD
1613 tx_desc->cmd_type_offset_bsz =
1614 build_ctob(td_cmd, td_offset, size, td_tag) |
1615 cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT);
fd0a05ce 1616
7070ce0a
AD
1617 netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
1618 tx_ring->queue_index),
1619 first->bytecount);
1620
a5e9c572 1621 /* set the timestamp */
fd0a05ce 1622 first->time_stamp = jiffies;
fd0a05ce
JB
1623
1624 /* Force memory writes to complete before letting h/w
1625 * know there are new descriptors to fetch. (Only
1626 * applicable for weak-ordered memory model archs,
1627 * such as IA-64).
1628 */
1629 wmb();
1630
a5e9c572
AD
1631 /* set next_to_watch value indicating a packet is present */
1632 first->next_to_watch = tx_desc;
1633
1634 i++;
1635 if (i == tx_ring->count)
1636 i = 0;
1637
1638 tx_ring->next_to_use = i;
1639
1640 /* notify HW of packet */
fd0a05ce 1641 writel(i, tx_ring->tail);
a5e9c572 1642
fd0a05ce
JB
1643 return;
1644
1645dma_error:
a5e9c572 1646 dev_info(tx_ring->dev, "TX DMA map failed\n");
fd0a05ce
JB
1647
1648 /* clear dma mappings for failed tx_bi map */
1649 for (;;) {
1650 tx_bi = &tx_ring->tx_bi[i];
a5e9c572 1651 i40e_unmap_and_free_tx_resource(tx_ring, tx_bi);
fd0a05ce
JB
1652 if (tx_bi == first)
1653 break;
1654 if (i == 0)
1655 i = tx_ring->count;
1656 i--;
1657 }
1658
fd0a05ce
JB
1659 tx_ring->next_to_use = i;
1660}
1661
1662/**
1663 * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions
1664 * @tx_ring: the ring to be checked
1665 * @size: the size buffer we want to assure is available
1666 *
1667 * Returns -EBUSY if a stop is needed, else 0
1668 **/
1669static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
1670{
1671 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
1672 smp_mb();
1673
1674 /* Check again in a case another CPU has just made room available. */
1675 if (likely(I40E_DESC_UNUSED(tx_ring) < size))
1676 return -EBUSY;
1677
1678 /* A reprieve! - use start_queue because it doesn't call schedule */
1679 netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
1680 ++tx_ring->tx_stats.restart_queue;
1681 return 0;
1682}
1683
1684/**
1685 * i40e_maybe_stop_tx - 1st level check for tx stop conditions
1686 * @tx_ring: the ring to be checked
1687 * @size: the size buffer we want to assure is available
1688 *
1689 * Returns 0 if stop is not needed
1690 **/
1691static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
1692{
1693 if (likely(I40E_DESC_UNUSED(tx_ring) >= size))
1694 return 0;
1695 return __i40e_maybe_stop_tx(tx_ring, size);
1696}
1697
1698/**
1699 * i40e_xmit_descriptor_count - calculate number of tx descriptors needed
1700 * @skb: send buffer
1701 * @tx_ring: ring to send buffer on
1702 *
1703 * Returns number of data descriptors needed for this skb. Returns 0 to indicate
1704 * there is not enough descriptors available in this ring since we need at least
1705 * one descriptor.
1706 **/
1707static int i40e_xmit_descriptor_count(struct sk_buff *skb,
1708 struct i40e_ring *tx_ring)
1709{
1710#if PAGE_SIZE > I40E_MAX_DATA_PER_TXD
1711 unsigned int f;
1712#endif
1713 int count = 0;
1714
1715 /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
1716 * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
1717 * + 2 desc gap to keep tail from touching head,
1718 * + 1 desc for context descriptor,
1719 * otherwise try next time
1720 */
1721#if PAGE_SIZE > I40E_MAX_DATA_PER_TXD
1722 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
1723 count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
1724#else
1725 count += skb_shinfo(skb)->nr_frags;
1726#endif
1727 count += TXD_USE_COUNT(skb_headlen(skb));
1728 if (i40e_maybe_stop_tx(tx_ring, count + 3)) {
1729 tx_ring->tx_stats.tx_busy++;
1730 return 0;
1731 }
1732 return count;
1733}
1734
1735/**
1736 * i40e_xmit_frame_ring - Sends buffer on Tx ring
1737 * @skb: send buffer
1738 * @tx_ring: ring to send buffer on
1739 *
1740 * Returns NETDEV_TX_OK if sent, else an error code
1741 **/
1742static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
1743 struct i40e_ring *tx_ring)
1744{
1745 u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
1746 u32 cd_tunneling = 0, cd_l2tag2 = 0;
1747 struct i40e_tx_buffer *first;
1748 u32 td_offset = 0;
1749 u32 tx_flags = 0;
1750 __be16 protocol;
1751 u32 td_cmd = 0;
1752 u8 hdr_len = 0;
1753 int tso;
1754 if (0 == i40e_xmit_descriptor_count(skb, tx_ring))
1755 return NETDEV_TX_BUSY;
1756
1757 /* prepare the xmit flags */
1758 if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
1759 goto out_drop;
1760
1761 /* obtain protocol of skb */
1762 protocol = skb->protocol;
1763
1764 /* record the location of the first descriptor for this packet */
1765 first = &tx_ring->tx_bi[tx_ring->next_to_use];
1766
1767 /* setup IPv4/IPv6 offloads */
1768 if (protocol == __constant_htons(ETH_P_IP))
1769 tx_flags |= I40E_TX_FLAGS_IPV4;
1770 else if (protocol == __constant_htons(ETH_P_IPV6))
1771 tx_flags |= I40E_TX_FLAGS_IPV6;
1772
1773 tso = i40e_tso(tx_ring, skb, tx_flags, protocol, &hdr_len,
1774 &cd_type_cmd_tso_mss, &cd_tunneling);
1775
1776 if (tso < 0)
1777 goto out_drop;
1778 else if (tso)
1779 tx_flags |= I40E_TX_FLAGS_TSO;
1780
1781 skb_tx_timestamp(skb);
1782
b1941306
AD
1783 /* always enable CRC insertion offload */
1784 td_cmd |= I40E_TX_DESC_CMD_ICRC;
1785
fd0a05ce 1786 /* Always offload the checksum, since it's in the data descriptor */
b1941306 1787 if (skb->ip_summed == CHECKSUM_PARTIAL) {
fd0a05ce
JB
1788 tx_flags |= I40E_TX_FLAGS_CSUM;
1789
fd0a05ce
JB
1790 i40e_tx_enable_csum(skb, tx_flags, &td_cmd, &td_offset,
1791 tx_ring, &cd_tunneling);
b1941306 1792 }
fd0a05ce
JB
1793
1794 i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
1795 cd_tunneling, cd_l2tag2);
1796
1797 /* Add Flow Director ATR if it's enabled.
1798 *
1799 * NOTE: this must always be directly before the data descriptor.
1800 */
1801 i40e_atr(tx_ring, skb, tx_flags, protocol);
1802
1803 i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
1804 td_cmd, td_offset);
1805
1806 i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
1807
1808 return NETDEV_TX_OK;
1809
1810out_drop:
1811 dev_kfree_skb_any(skb);
1812 return NETDEV_TX_OK;
1813}
1814
1815/**
1816 * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer
1817 * @skb: send buffer
1818 * @netdev: network interface device structure
1819 *
1820 * Returns NETDEV_TX_OK if sent, else an error code
1821 **/
1822netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
1823{
1824 struct i40e_netdev_priv *np = netdev_priv(netdev);
1825 struct i40e_vsi *vsi = np->vsi;
9f65e15b 1826 struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
fd0a05ce
JB
1827
1828 /* hardware can't handle really short frames, hardware padding works
1829 * beyond this point
1830 */
1831 if (unlikely(skb->len < I40E_MIN_TX_LEN)) {
1832 if (skb_pad(skb, I40E_MIN_TX_LEN - skb->len))
1833 return NETDEV_TX_OK;
1834 skb->len = I40E_MIN_TX_LEN;
1835 skb_set_tail_pointer(skb, I40E_MIN_TX_LEN);
1836 }
1837
1838 return i40e_xmit_frame_ring(skb, tx_ring);
1839}