]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - drivers/net/ethernet/intel/i40e/i40e_txrx.c
i40e/i40evf: Move stack var deeper
[mirror_ubuntu-artful-kernel.git] / drivers / net / ethernet / intel / i40e / i40e_txrx.c
CommitLineData
fd0a05ce
JB
1/*******************************************************************************
2 *
3 * Intel Ethernet Controller XL710 Family Linux Driver
ecc6a239 4 * Copyright(c) 2013 - 2016 Intel Corporation.
fd0a05ce
JB
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
dc641b73
GR
15 * You should have received a copy of the GNU General Public License along
16 * with this program. If not, see <http://www.gnu.org/licenses/>.
fd0a05ce
JB
17 *
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
20 *
21 * Contact Information:
22 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
23 * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
24 *
25 ******************************************************************************/
26
1c112a64 27#include <linux/prefetch.h>
a132af24 28#include <net/busy_poll.h>
fd0a05ce 29#include "i40e.h"
206812b5 30#include "i40e_prototype.h"
fd0a05ce
JB
31
32static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
33 u32 td_tag)
34{
35 return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
36 ((u64)td_cmd << I40E_TXD_QW1_CMD_SHIFT) |
37 ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
38 ((u64)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
39 ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT));
40}
41
eaefbd06 42#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
49d7d933 43#define I40E_FD_CLEAN_DELAY 10
fd0a05ce
JB
44/**
45 * i40e_program_fdir_filter - Program a Flow Director filter
17a73f6b
JG
46 * @fdir_data: Packet data that will be filter parameters
47 * @raw_packet: the pre-allocated packet buffer for FDir
b40c82e6 48 * @pf: The PF pointer
fd0a05ce
JB
49 * @add: True for add/update, False for remove
50 **/
17a73f6b 51int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
fd0a05ce
JB
52 struct i40e_pf *pf, bool add)
53{
54 struct i40e_filter_program_desc *fdir_desc;
49d7d933 55 struct i40e_tx_buffer *tx_buf, *first;
fd0a05ce
JB
56 struct i40e_tx_desc *tx_desc;
57 struct i40e_ring *tx_ring;
eaefbd06 58 unsigned int fpt, dcc;
fd0a05ce
JB
59 struct i40e_vsi *vsi;
60 struct device *dev;
61 dma_addr_t dma;
62 u32 td_cmd = 0;
49d7d933 63 u16 delay = 0;
fd0a05ce
JB
64 u16 i;
65
66 /* find existing FDIR VSI */
67 vsi = NULL;
505682cd 68 for (i = 0; i < pf->num_alloc_vsi; i++)
fd0a05ce
JB
69 if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
70 vsi = pf->vsi[i];
71 if (!vsi)
72 return -ENOENT;
73
9f65e15b 74 tx_ring = vsi->tx_rings[0];
fd0a05ce
JB
75 dev = tx_ring->dev;
76
49d7d933
ASJ
77 /* we need two descriptors to add/del a filter and we can wait */
78 do {
79 if (I40E_DESC_UNUSED(tx_ring) > 1)
80 break;
81 msleep_interruptible(1);
82 delay++;
83 } while (delay < I40E_FD_CLEAN_DELAY);
84
85 if (!(I40E_DESC_UNUSED(tx_ring) > 1))
86 return -EAGAIN;
87
17a73f6b
JG
88 dma = dma_map_single(dev, raw_packet,
89 I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
fd0a05ce
JB
90 if (dma_mapping_error(dev, dma))
91 goto dma_fail;
92
93 /* grab the next descriptor */
fc4ac67b
AD
94 i = tx_ring->next_to_use;
95 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
49d7d933
ASJ
96 first = &tx_ring->tx_bi[i];
97 memset(first, 0, sizeof(struct i40e_tx_buffer));
fc4ac67b 98
49d7d933 99 tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
fd0a05ce 100
eaefbd06
JB
101 fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
102 I40E_TXD_FLTR_QW0_QINDEX_MASK;
fd0a05ce 103
eaefbd06
JB
104 fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) &
105 I40E_TXD_FLTR_QW0_FLEXOFF_MASK;
fd0a05ce 106
eaefbd06
JB
107 fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) &
108 I40E_TXD_FLTR_QW0_PCTYPE_MASK;
fd0a05ce
JB
109
110 /* Use LAN VSI Id if not programmed by user */
111 if (fdir_data->dest_vsi == 0)
eaefbd06
JB
112 fpt |= (pf->vsi[pf->lan_vsi]->id) <<
113 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
fd0a05ce 114 else
eaefbd06
JB
115 fpt |= ((u32)fdir_data->dest_vsi <<
116 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) &
117 I40E_TXD_FLTR_QW0_DEST_VSI_MASK;
118
eaefbd06 119 dcc = I40E_TX_DESC_DTYPE_FILTER_PROG;
fd0a05ce
JB
120
121 if (add)
eaefbd06
JB
122 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
123 I40E_TXD_FLTR_QW1_PCMD_SHIFT;
fd0a05ce 124 else
eaefbd06
JB
125 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
126 I40E_TXD_FLTR_QW1_PCMD_SHIFT;
fd0a05ce 127
eaefbd06
JB
128 dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) &
129 I40E_TXD_FLTR_QW1_DEST_MASK;
fd0a05ce 130
eaefbd06
JB
131 dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) &
132 I40E_TXD_FLTR_QW1_FD_STATUS_MASK;
fd0a05ce
JB
133
134 if (fdir_data->cnt_index != 0) {
eaefbd06
JB
135 dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
136 dcc |= ((u32)fdir_data->cnt_index <<
137 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
433c47de 138 I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
fd0a05ce
JB
139 }
140
99753ea6
JB
141 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt);
142 fdir_desc->rsvd = cpu_to_le32(0);
eaefbd06 143 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc);
fd0a05ce
JB
144 fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id);
145
146 /* Now program a dummy descriptor */
fc4ac67b
AD
147 i = tx_ring->next_to_use;
148 tx_desc = I40E_TX_DESC(tx_ring, i);
298deef1 149 tx_buf = &tx_ring->tx_bi[i];
fc4ac67b 150
49d7d933
ASJ
151 tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
152
153 memset(tx_buf, 0, sizeof(struct i40e_tx_buffer));
fd0a05ce 154
298deef1 155 /* record length, and DMA address */
17a73f6b 156 dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
298deef1
ASJ
157 dma_unmap_addr_set(tx_buf, dma, dma);
158
fd0a05ce 159 tx_desc->buffer_addr = cpu_to_le64(dma);
eaefbd06 160 td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
fd0a05ce 161
49d7d933
ASJ
162 tx_buf->tx_flags = I40E_TX_FLAGS_FD_SB;
163 tx_buf->raw_buf = (void *)raw_packet;
164
fd0a05ce 165 tx_desc->cmd_type_offset_bsz =
17a73f6b 166 build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0);
fd0a05ce 167
fd0a05ce 168 /* Force memory writes to complete before letting h/w
49d7d933 169 * know there are new descriptors to fetch.
fd0a05ce
JB
170 */
171 wmb();
172
fc4ac67b 173 /* Mark the data descriptor to be watched */
49d7d933 174 first->next_to_watch = tx_desc;
fc4ac67b 175
fd0a05ce
JB
176 writel(tx_ring->next_to_use, tx_ring->tail);
177 return 0;
178
179dma_fail:
180 return -1;
181}
182
17a73f6b
JG
183#define IP_HEADER_OFFSET 14
184#define I40E_UDPIP_DUMMY_PACKET_LEN 42
185/**
186 * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters
187 * @vsi: pointer to the targeted VSI
188 * @fd_data: the flow director data required for the FDir descriptor
17a73f6b
JG
189 * @add: true adds a filter, false removes it
190 *
191 * Returns 0 if the filters were successfully added or removed
192 **/
193static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
194 struct i40e_fdir_filter *fd_data,
49d7d933 195 bool add)
17a73f6b
JG
196{
197 struct i40e_pf *pf = vsi->back;
198 struct udphdr *udp;
199 struct iphdr *ip;
200 bool err = false;
49d7d933 201 u8 *raw_packet;
17a73f6b 202 int ret;
17a73f6b
JG
203 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
204 0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
205 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
206
49d7d933
ASJ
207 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
208 if (!raw_packet)
209 return -ENOMEM;
17a73f6b
JG
210 memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN);
211
212 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
213 udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET
214 + sizeof(struct iphdr));
215
216 ip->daddr = fd_data->dst_ip[0];
217 udp->dest = fd_data->dst_port;
218 ip->saddr = fd_data->src_ip[0];
219 udp->source = fd_data->src_port;
220
b2d36c03
KS
221 fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
222 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
223 if (ret) {
224 dev_info(&pf->pdev->dev,
e99bdd39
CW
225 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
226 fd_data->pctype, fd_data->fd_id, ret);
b2d36c03 227 err = true;
4205d379 228 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
f7233c54
ASJ
229 if (add)
230 dev_info(&pf->pdev->dev,
231 "Filter OK for PCTYPE %d loc = %d\n",
232 fd_data->pctype, fd_data->fd_id);
233 else
234 dev_info(&pf->pdev->dev,
235 "Filter deleted for PCTYPE %d loc = %d\n",
236 fd_data->pctype, fd_data->fd_id);
17a73f6b 237 }
a42e7a36
KP
238 if (err)
239 kfree(raw_packet);
240
17a73f6b
JG
241 return err ? -EOPNOTSUPP : 0;
242}
243
244#define I40E_TCPIP_DUMMY_PACKET_LEN 54
245/**
246 * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters
247 * @vsi: pointer to the targeted VSI
248 * @fd_data: the flow director data required for the FDir descriptor
17a73f6b
JG
249 * @add: true adds a filter, false removes it
250 *
251 * Returns 0 if the filters were successfully added or removed
252 **/
253static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
254 struct i40e_fdir_filter *fd_data,
49d7d933 255 bool add)
17a73f6b
JG
256{
257 struct i40e_pf *pf = vsi->back;
258 struct tcphdr *tcp;
259 struct iphdr *ip;
260 bool err = false;
49d7d933 261 u8 *raw_packet;
17a73f6b
JG
262 int ret;
263 /* Dummy packet */
264 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
265 0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0,
266 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11,
267 0x0, 0x72, 0, 0, 0, 0};
268
49d7d933
ASJ
269 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
270 if (!raw_packet)
271 return -ENOMEM;
17a73f6b
JG
272 memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN);
273
274 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
275 tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET
276 + sizeof(struct iphdr));
277
278 ip->daddr = fd_data->dst_ip[0];
279 tcp->dest = fd_data->dst_port;
280 ip->saddr = fd_data->src_ip[0];
281 tcp->source = fd_data->src_port;
282
283 if (add) {
1e1be8f6 284 pf->fd_tcp_rule++;
17a73f6b 285 if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
2e4875e3
ASJ
286 if (I40E_DEBUG_FD & pf->hw.debug_mask)
287 dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
17a73f6b
JG
288 pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
289 }
1e1be8f6
ASJ
290 } else {
291 pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ?
292 (pf->fd_tcp_rule - 1) : 0;
293 if (pf->fd_tcp_rule == 0) {
294 pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
2e4875e3
ASJ
295 if (I40E_DEBUG_FD & pf->hw.debug_mask)
296 dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n");
1e1be8f6 297 }
17a73f6b
JG
298 }
299
b2d36c03 300 fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
17a73f6b
JG
301 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
302
303 if (ret) {
304 dev_info(&pf->pdev->dev,
e99bdd39
CW
305 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
306 fd_data->pctype, fd_data->fd_id, ret);
17a73f6b 307 err = true;
4205d379 308 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
f7233c54
ASJ
309 if (add)
310 dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n",
311 fd_data->pctype, fd_data->fd_id);
312 else
313 dev_info(&pf->pdev->dev,
314 "Filter deleted for PCTYPE %d loc = %d\n",
315 fd_data->pctype, fd_data->fd_id);
17a73f6b
JG
316 }
317
a42e7a36
KP
318 if (err)
319 kfree(raw_packet);
320
17a73f6b
JG
321 return err ? -EOPNOTSUPP : 0;
322}
323
324/**
325 * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
326 * a specific flow spec
327 * @vsi: pointer to the targeted VSI
328 * @fd_data: the flow director data required for the FDir descriptor
17a73f6b
JG
329 * @add: true adds a filter, false removes it
330 *
4eeb1fff 331 * Returns 0 if the filters were successfully added or removed
17a73f6b
JG
332 **/
333static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
334 struct i40e_fdir_filter *fd_data,
49d7d933 335 bool add)
17a73f6b
JG
336{
337 return -EOPNOTSUPP;
338}
339
340#define I40E_IP_DUMMY_PACKET_LEN 34
341/**
342 * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
343 * a specific flow spec
344 * @vsi: pointer to the targeted VSI
345 * @fd_data: the flow director data required for the FDir descriptor
17a73f6b
JG
346 * @add: true adds a filter, false removes it
347 *
348 * Returns 0 if the filters were successfully added or removed
349 **/
350static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
351 struct i40e_fdir_filter *fd_data,
49d7d933 352 bool add)
17a73f6b
JG
353{
354 struct i40e_pf *pf = vsi->back;
355 struct iphdr *ip;
356 bool err = false;
49d7d933 357 u8 *raw_packet;
17a73f6b
JG
358 int ret;
359 int i;
360 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
361 0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0,
362 0, 0, 0, 0};
363
17a73f6b
JG
364 for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
365 i <= I40E_FILTER_PCTYPE_FRAG_IPV4; i++) {
49d7d933
ASJ
366 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
367 if (!raw_packet)
368 return -ENOMEM;
369 memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
370 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
371
372 ip->saddr = fd_data->src_ip[0];
373 ip->daddr = fd_data->dst_ip[0];
374 ip->protocol = 0;
375
17a73f6b
JG
376 fd_data->pctype = i;
377 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
378
379 if (ret) {
380 dev_info(&pf->pdev->dev,
e99bdd39
CW
381 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
382 fd_data->pctype, fd_data->fd_id, ret);
17a73f6b 383 err = true;
4205d379 384 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
f7233c54
ASJ
385 if (add)
386 dev_info(&pf->pdev->dev,
387 "Filter OK for PCTYPE %d loc = %d\n",
388 fd_data->pctype, fd_data->fd_id);
389 else
390 dev_info(&pf->pdev->dev,
391 "Filter deleted for PCTYPE %d loc = %d\n",
392 fd_data->pctype, fd_data->fd_id);
17a73f6b
JG
393 }
394 }
395
a42e7a36
KP
396 if (err)
397 kfree(raw_packet);
398
17a73f6b
JG
399 return err ? -EOPNOTSUPP : 0;
400}
401
402/**
403 * i40e_add_del_fdir - Build raw packets to add/del fdir filter
404 * @vsi: pointer to the targeted VSI
405 * @cmd: command to get or set RX flow classification rules
406 * @add: true adds a filter, false removes it
407 *
408 **/
409int i40e_add_del_fdir(struct i40e_vsi *vsi,
410 struct i40e_fdir_filter *input, bool add)
411{
412 struct i40e_pf *pf = vsi->back;
17a73f6b
JG
413 int ret;
414
17a73f6b
JG
415 switch (input->flow_type & ~FLOW_EXT) {
416 case TCP_V4_FLOW:
49d7d933 417 ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
17a73f6b
JG
418 break;
419 case UDP_V4_FLOW:
49d7d933 420 ret = i40e_add_del_fdir_udpv4(vsi, input, add);
17a73f6b
JG
421 break;
422 case SCTP_V4_FLOW:
49d7d933 423 ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
17a73f6b
JG
424 break;
425 case IPV4_FLOW:
49d7d933 426 ret = i40e_add_del_fdir_ipv4(vsi, input, add);
17a73f6b
JG
427 break;
428 case IP_USER_FLOW:
429 switch (input->ip4_proto) {
430 case IPPROTO_TCP:
49d7d933 431 ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
17a73f6b
JG
432 break;
433 case IPPROTO_UDP:
49d7d933 434 ret = i40e_add_del_fdir_udpv4(vsi, input, add);
17a73f6b
JG
435 break;
436 case IPPROTO_SCTP:
49d7d933 437 ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
17a73f6b
JG
438 break;
439 default:
49d7d933 440 ret = i40e_add_del_fdir_ipv4(vsi, input, add);
17a73f6b
JG
441 break;
442 }
443 break;
444 default:
c5ffe7e1 445 dev_info(&pf->pdev->dev, "Could not specify spec type %d\n",
17a73f6b
JG
446 input->flow_type);
447 ret = -EINVAL;
448 }
449
49d7d933 450 /* The buffer allocated here is freed by the i40e_clean_tx_ring() */
17a73f6b
JG
451 return ret;
452}
453
fd0a05ce
JB
454/**
455 * i40e_fd_handle_status - check the Programming Status for FD
456 * @rx_ring: the Rx ring for this descriptor
55a5e60b 457 * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
fd0a05ce
JB
458 * @prog_id: the id originally used for programming
459 *
460 * This is used to verify if the FD programming or invalidation
461 * requested by SW to the HW is successful or not and take actions accordingly.
462 **/
55a5e60b
ASJ
463static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
464 union i40e_rx_desc *rx_desc, u8 prog_id)
fd0a05ce 465{
55a5e60b
ASJ
466 struct i40e_pf *pf = rx_ring->vsi->back;
467 struct pci_dev *pdev = pf->pdev;
468 u32 fcnt_prog, fcnt_avail;
fd0a05ce 469 u32 error;
55a5e60b 470 u64 qw;
fd0a05ce 471
55a5e60b 472 qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
fd0a05ce
JB
473 error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
474 I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
475
41a1d04b 476 if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
3487b6c3 477 pf->fd_inv = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fd_id);
f7233c54
ASJ
478 if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
479 (I40E_DEBUG_FD & pf->hw.debug_mask))
480 dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
3487b6c3 481 pf->fd_inv);
55a5e60b 482
04294e38
ASJ
483 /* Check if the programming error is for ATR.
484 * If so, auto disable ATR and set a state for
485 * flush in progress. Next time we come here if flush is in
486 * progress do nothing, once flush is complete the state will
487 * be cleared.
488 */
489 if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state))
490 return;
491
1e1be8f6
ASJ
492 pf->fd_add_err++;
493 /* store the current atr filter count */
494 pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
495
04294e38
ASJ
496 if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
497 (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) {
498 pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
499 set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
500 }
501
55a5e60b 502 /* filter programming failed most likely due to table full */
04294e38 503 fcnt_prog = i40e_get_global_fd_count(pf);
12957388 504 fcnt_avail = pf->fdir_pf_filter_count;
55a5e60b
ASJ
505 /* If ATR is running fcnt_prog can quickly change,
506 * if we are very close to full, it makes sense to disable
507 * FD ATR/SB and then re-enable it when there is room.
508 */
509 if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
1e1be8f6 510 if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
b814ba65 511 !(pf->auto_disable_flags &
b814ba65 512 I40E_FLAG_FD_SB_ENABLED)) {
2e4875e3
ASJ
513 if (I40E_DEBUG_FD & pf->hw.debug_mask)
514 dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
55a5e60b
ASJ
515 pf->auto_disable_flags |=
516 I40E_FLAG_FD_SB_ENABLED;
55a5e60b 517 }
55a5e60b 518 }
41a1d04b 519 } else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
13c2884f 520 if (I40E_DEBUG_FD & pf->hw.debug_mask)
e99bdd39 521 dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
13c2884f 522 rx_desc->wb.qword0.hi_dword.fd_id);
55a5e60b 523 }
fd0a05ce
JB
524}
525
526/**
a5e9c572 527 * i40e_unmap_and_free_tx_resource - Release a Tx buffer
fd0a05ce
JB
528 * @ring: the ring that owns the buffer
529 * @tx_buffer: the buffer to free
530 **/
a5e9c572
AD
531static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
532 struct i40e_tx_buffer *tx_buffer)
fd0a05ce 533{
a5e9c572 534 if (tx_buffer->skb) {
a42e7a36 535 dev_kfree_skb_any(tx_buffer->skb);
a5e9c572 536 if (dma_unmap_len(tx_buffer, len))
fd0a05ce 537 dma_unmap_single(ring->dev,
35a1e2ad
AD
538 dma_unmap_addr(tx_buffer, dma),
539 dma_unmap_len(tx_buffer, len),
fd0a05ce 540 DMA_TO_DEVICE);
a5e9c572
AD
541 } else if (dma_unmap_len(tx_buffer, len)) {
542 dma_unmap_page(ring->dev,
543 dma_unmap_addr(tx_buffer, dma),
544 dma_unmap_len(tx_buffer, len),
545 DMA_TO_DEVICE);
fd0a05ce 546 }
a42e7a36
KP
547
548 if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
549 kfree(tx_buffer->raw_buf);
550
a5e9c572
AD
551 tx_buffer->next_to_watch = NULL;
552 tx_buffer->skb = NULL;
35a1e2ad 553 dma_unmap_len_set(tx_buffer, len, 0);
a5e9c572 554 /* tx_buffer must be completely set up in the transmit path */
fd0a05ce
JB
555}
556
557/**
558 * i40e_clean_tx_ring - Free any empty Tx buffers
559 * @tx_ring: ring to be cleaned
560 **/
561void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
562{
fd0a05ce
JB
563 unsigned long bi_size;
564 u16 i;
565
566 /* ring already cleared, nothing to do */
567 if (!tx_ring->tx_bi)
568 return;
569
570 /* Free all the Tx ring sk_buffs */
a5e9c572
AD
571 for (i = 0; i < tx_ring->count; i++)
572 i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
fd0a05ce
JB
573
574 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
575 memset(tx_ring->tx_bi, 0, bi_size);
576
577 /* Zero out the descriptor ring */
578 memset(tx_ring->desc, 0, tx_ring->size);
579
580 tx_ring->next_to_use = 0;
581 tx_ring->next_to_clean = 0;
7070ce0a
AD
582
583 if (!tx_ring->netdev)
584 return;
585
586 /* cleanup Tx queue statistics */
587 netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
588 tx_ring->queue_index));
fd0a05ce
JB
589}
590
591/**
592 * i40e_free_tx_resources - Free Tx resources per queue
593 * @tx_ring: Tx descriptor ring for a specific queue
594 *
595 * Free all transmit software resources
596 **/
597void i40e_free_tx_resources(struct i40e_ring *tx_ring)
598{
599 i40e_clean_tx_ring(tx_ring);
600 kfree(tx_ring->tx_bi);
601 tx_ring->tx_bi = NULL;
602
603 if (tx_ring->desc) {
604 dma_free_coherent(tx_ring->dev, tx_ring->size,
605 tx_ring->desc, tx_ring->dma);
606 tx_ring->desc = NULL;
607 }
608}
609
610/**
611 * i40e_get_tx_pending - how many tx descriptors not processed
612 * @tx_ring: the ring of descriptors
dd353109 613 * @in_sw: is tx_pending being checked in SW or HW
fd0a05ce
JB
614 *
615 * Since there is no access to the ring head register
616 * in XL710, we need to use our local copies
617 **/
dd353109 618u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw)
fd0a05ce 619{
a68de58d
JB
620 u32 head, tail;
621
dd353109
ASJ
622 if (!in_sw)
623 head = i40e_get_head(ring);
624 else
625 head = ring->next_to_clean;
a68de58d
JB
626 tail = readl(ring->tail);
627
628 if (head != tail)
629 return (head < tail) ?
630 tail - head : (tail + ring->count - head);
631
632 return 0;
fd0a05ce
JB
633}
634
d91649f5
JB
635#define WB_STRIDE 0x3
636
fd0a05ce
JB
637/**
638 * i40e_clean_tx_irq - Reclaim resources after transmit completes
a619afe8
AD
639 * @vsi: the VSI we care about
640 * @tx_ring: Tx ring to clean
641 * @napi_budget: Used to determine if we are in netpoll
fd0a05ce
JB
642 *
643 * Returns true if there's any budget left (e.g. the clean is finished)
644 **/
a619afe8
AD
645static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
646 struct i40e_ring *tx_ring, int napi_budget)
fd0a05ce
JB
647{
648 u16 i = tx_ring->next_to_clean;
649 struct i40e_tx_buffer *tx_buf;
1943d8ba 650 struct i40e_tx_desc *tx_head;
fd0a05ce 651 struct i40e_tx_desc *tx_desc;
a619afe8
AD
652 unsigned int total_bytes = 0, total_packets = 0;
653 unsigned int budget = vsi->work_limit;
fd0a05ce
JB
654
655 tx_buf = &tx_ring->tx_bi[i];
656 tx_desc = I40E_TX_DESC(tx_ring, i);
a5e9c572 657 i -= tx_ring->count;
fd0a05ce 658
1943d8ba
JB
659 tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
660
a5e9c572
AD
661 do {
662 struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
fd0a05ce
JB
663
664 /* if next_to_watch is not set then there is no work pending */
665 if (!eop_desc)
666 break;
667
a5e9c572
AD
668 /* prevent any other reads prior to eop_desc */
669 read_barrier_depends();
670
1943d8ba
JB
671 /* we have caught up to head, no work left to do */
672 if (tx_head == tx_desc)
fd0a05ce
JB
673 break;
674
c304fdac 675 /* clear next_to_watch to prevent false hangs */
fd0a05ce 676 tx_buf->next_to_watch = NULL;
fd0a05ce 677
a5e9c572
AD
678 /* update the statistics for this packet */
679 total_bytes += tx_buf->bytecount;
680 total_packets += tx_buf->gso_segs;
fd0a05ce 681
a5e9c572 682 /* free the skb */
a619afe8 683 napi_consume_skb(tx_buf->skb, napi_budget);
fd0a05ce 684
a5e9c572
AD
685 /* unmap skb header data */
686 dma_unmap_single(tx_ring->dev,
687 dma_unmap_addr(tx_buf, dma),
688 dma_unmap_len(tx_buf, len),
689 DMA_TO_DEVICE);
fd0a05ce 690
a5e9c572
AD
691 /* clear tx_buffer data */
692 tx_buf->skb = NULL;
693 dma_unmap_len_set(tx_buf, len, 0);
fd0a05ce 694
a5e9c572
AD
695 /* unmap remaining buffers */
696 while (tx_desc != eop_desc) {
fd0a05ce
JB
697
698 tx_buf++;
699 tx_desc++;
700 i++;
a5e9c572
AD
701 if (unlikely(!i)) {
702 i -= tx_ring->count;
fd0a05ce
JB
703 tx_buf = tx_ring->tx_bi;
704 tx_desc = I40E_TX_DESC(tx_ring, 0);
705 }
fd0a05ce 706
a5e9c572
AD
707 /* unmap any remaining paged data */
708 if (dma_unmap_len(tx_buf, len)) {
709 dma_unmap_page(tx_ring->dev,
710 dma_unmap_addr(tx_buf, dma),
711 dma_unmap_len(tx_buf, len),
712 DMA_TO_DEVICE);
713 dma_unmap_len_set(tx_buf, len, 0);
714 }
715 }
716
717 /* move us one more past the eop_desc for start of next pkt */
718 tx_buf++;
719 tx_desc++;
720 i++;
721 if (unlikely(!i)) {
722 i -= tx_ring->count;
723 tx_buf = tx_ring->tx_bi;
724 tx_desc = I40E_TX_DESC(tx_ring, 0);
725 }
726
016890b9
JB
727 prefetch(tx_desc);
728
a5e9c572
AD
729 /* update budget accounting */
730 budget--;
731 } while (likely(budget));
732
733 i += tx_ring->count;
fd0a05ce 734 tx_ring->next_to_clean = i;
980e9b11 735 u64_stats_update_begin(&tx_ring->syncp);
a114d0a6
AD
736 tx_ring->stats.bytes += total_bytes;
737 tx_ring->stats.packets += total_packets;
980e9b11 738 u64_stats_update_end(&tx_ring->syncp);
fd0a05ce
JB
739 tx_ring->q_vector->tx.total_bytes += total_bytes;
740 tx_ring->q_vector->tx.total_packets += total_packets;
a5e9c572 741
58044743
AS
742 if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) {
743 unsigned int j = 0;
744
745 /* check to see if there are < 4 descriptors
746 * waiting to be written back, then kick the hardware to force
747 * them to be written back in case we stay in NAPI.
748 * In this mode on X722 we do not enable Interrupt.
749 */
dd353109 750 j = i40e_get_tx_pending(tx_ring, false);
58044743
AS
751
752 if (budget &&
753 ((j / (WB_STRIDE + 1)) == 0) && (j != 0) &&
a619afe8 754 !test_bit(__I40E_DOWN, &vsi->state) &&
58044743
AS
755 (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
756 tx_ring->arm_wb = true;
757 }
d91649f5 758
7070ce0a
AD
759 netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
760 tx_ring->queue_index),
761 total_packets, total_bytes);
762
fd0a05ce
JB
763#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
764 if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
765 (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
766 /* Make sure that anybody stopping the queue after this
767 * sees the new next_to_clean.
768 */
769 smp_mb();
770 if (__netif_subqueue_stopped(tx_ring->netdev,
771 tx_ring->queue_index) &&
a619afe8 772 !test_bit(__I40E_DOWN, &vsi->state)) {
fd0a05ce
JB
773 netif_wake_subqueue(tx_ring->netdev,
774 tx_ring->queue_index);
775 ++tx_ring->tx_stats.restart_queue;
776 }
777 }
778
d91649f5
JB
779 return !!budget;
780}
781
782/**
ecc6a239 783 * i40e_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled
d91649f5 784 * @vsi: the VSI we care about
ecc6a239 785 * @q_vector: the vector on which to enable writeback
d91649f5
JB
786 *
787 **/
ecc6a239
ASJ
788static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi,
789 struct i40e_q_vector *q_vector)
d91649f5 790{
8e0764b4 791 u16 flags = q_vector->tx.ring[0].flags;
ecc6a239 792 u32 val;
8e0764b4 793
ecc6a239
ASJ
794 if (!(flags & I40E_TXR_FLAGS_WB_ON_ITR))
795 return;
8e0764b4 796
ecc6a239
ASJ
797 if (q_vector->arm_wb_state)
798 return;
8e0764b4 799
ecc6a239
ASJ
800 if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
801 val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK |
802 I40E_PFINT_DYN_CTLN_ITR_INDX_MASK; /* set noitr */
a3d772a3 803
ecc6a239
ASJ
804 wr32(&vsi->back->hw,
805 I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1),
806 val);
807 } else {
808 val = I40E_PFINT_DYN_CTL0_WB_ON_ITR_MASK |
809 I40E_PFINT_DYN_CTL0_ITR_INDX_MASK; /* set noitr */
a3d772a3 810
ecc6a239
ASJ
811 wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
812 }
813 q_vector->arm_wb_state = true;
814}
815
816/**
817 * i40e_force_wb - Issue SW Interrupt so HW does a wb
818 * @vsi: the VSI we care about
819 * @q_vector: the vector on which to force writeback
820 *
821 **/
822void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
823{
824 if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
8e0764b4
ASJ
825 u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
826 I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */
827 I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
828 I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK;
829 /* allow 00 to be written to the index */
830
831 wr32(&vsi->back->hw,
832 I40E_PFINT_DYN_CTLN(q_vector->v_idx +
833 vsi->base_vector - 1), val);
834 } else {
835 u32 val = I40E_PFINT_DYN_CTL0_INTENA_MASK |
836 I40E_PFINT_DYN_CTL0_ITR_INDX_MASK | /* set noitr */
837 I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK |
838 I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK;
839 /* allow 00 to be written to the index */
840
841 wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
842 }
fd0a05ce
JB
843}
844
845/**
846 * i40e_set_new_dynamic_itr - Find new ITR level
847 * @rc: structure containing ring performance data
848 *
8f5e39ce
JB
849 * Returns true if ITR changed, false if not
850 *
fd0a05ce
JB
851 * Stores a new ITR value based on packets and byte counts during
852 * the last interrupt. The advantage of per interrupt computation
853 * is faster updates and more accurate ITR for the current traffic
854 * pattern. Constants in this function were computed based on
855 * theoretical maximum wire speed and thresholds were set based on
856 * testing data as well as attempting to minimize response time
857 * while increasing bulk throughput.
858 **/
8f5e39ce 859static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
fd0a05ce
JB
860{
861 enum i40e_latency_range new_latency_range = rc->latency_range;
c56625d5 862 struct i40e_q_vector *qv = rc->ring->q_vector;
fd0a05ce
JB
863 u32 new_itr = rc->itr;
864 int bytes_per_int;
51cc6d9f 865 int usecs;
fd0a05ce
JB
866
867 if (rc->total_packets == 0 || !rc->itr)
8f5e39ce 868 return false;
fd0a05ce
JB
869
870 /* simple throttlerate management
c56625d5 871 * 0-10MB/s lowest (50000 ints/s)
fd0a05ce 872 * 10-20MB/s low (20000 ints/s)
c56625d5
JB
873 * 20-1249MB/s bulk (18000 ints/s)
874 * > 40000 Rx packets per second (8000 ints/s)
51cc6d9f
JB
875 *
876 * The math works out because the divisor is in 10^(-6) which
877 * turns the bytes/us input value into MB/s values, but
878 * make sure to use usecs, as the register values written
ee2319cf
JB
879 * are in 2 usec increments in the ITR registers, and make sure
880 * to use the smoothed values that the countdown timer gives us.
fd0a05ce 881 */
ee2319cf 882 usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
51cc6d9f 883 bytes_per_int = rc->total_bytes / usecs;
ee2319cf 884
de32e3ef 885 switch (new_latency_range) {
fd0a05ce
JB
886 case I40E_LOWEST_LATENCY:
887 if (bytes_per_int > 10)
888 new_latency_range = I40E_LOW_LATENCY;
889 break;
890 case I40E_LOW_LATENCY:
891 if (bytes_per_int > 20)
892 new_latency_range = I40E_BULK_LATENCY;
893 else if (bytes_per_int <= 10)
894 new_latency_range = I40E_LOWEST_LATENCY;
895 break;
896 case I40E_BULK_LATENCY:
c56625d5 897 case I40E_ULTRA_LATENCY:
de32e3ef
CW
898 default:
899 if (bytes_per_int <= 20)
900 new_latency_range = I40E_LOW_LATENCY;
fd0a05ce
JB
901 break;
902 }
c56625d5
JB
903
904 /* this is to adjust RX more aggressively when streaming small
905 * packets. The value of 40000 was picked as it is just beyond
906 * what the hardware can receive per second if in low latency
907 * mode.
908 */
909#define RX_ULTRA_PACKET_RATE 40000
910
911 if ((((rc->total_packets * 1000000) / usecs) > RX_ULTRA_PACKET_RATE) &&
912 (&qv->rx == rc))
913 new_latency_range = I40E_ULTRA_LATENCY;
914
de32e3ef 915 rc->latency_range = new_latency_range;
fd0a05ce
JB
916
917 switch (new_latency_range) {
918 case I40E_LOWEST_LATENCY:
c56625d5 919 new_itr = I40E_ITR_50K;
fd0a05ce
JB
920 break;
921 case I40E_LOW_LATENCY:
922 new_itr = I40E_ITR_20K;
923 break;
924 case I40E_BULK_LATENCY:
c56625d5
JB
925 new_itr = I40E_ITR_18K;
926 break;
927 case I40E_ULTRA_LATENCY:
fd0a05ce
JB
928 new_itr = I40E_ITR_8K;
929 break;
930 default:
931 break;
932 }
933
fd0a05ce
JB
934 rc->total_bytes = 0;
935 rc->total_packets = 0;
8f5e39ce
JB
936
937 if (new_itr != rc->itr) {
938 rc->itr = new_itr;
939 return true;
940 }
941
942 return false;
fd0a05ce
JB
943}
944
fd0a05ce
JB
945/**
946 * i40e_clean_programming_status - clean the programming status descriptor
947 * @rx_ring: the rx ring that has this descriptor
948 * @rx_desc: the rx descriptor written back by HW
949 *
950 * Flow director should handle FD_FILTER_STATUS to check its filter programming
951 * status being successful or not and take actions accordingly. FCoE should
952 * handle its context/filter programming/invalidation status and take actions.
953 *
954 **/
955static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
956 union i40e_rx_desc *rx_desc)
957{
958 u64 qw;
959 u8 id;
960
961 qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
962 id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
963 I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
964
965 if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
55a5e60b 966 i40e_fd_handle_status(rx_ring, rx_desc, id);
38e00438
VD
967#ifdef I40E_FCOE
968 else if ((id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) ||
969 (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS))
970 i40e_fcoe_handle_status(rx_ring, rx_desc, id);
971#endif
fd0a05ce
JB
972}
973
974/**
975 * i40e_setup_tx_descriptors - Allocate the Tx descriptors
976 * @tx_ring: the tx ring to set up
977 *
978 * Return 0 on success, negative on error
979 **/
980int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
981{
982 struct device *dev = tx_ring->dev;
983 int bi_size;
984
985 if (!dev)
986 return -ENOMEM;
987
e908f815
JB
988 /* warn if we are about to overwrite the pointer */
989 WARN_ON(tx_ring->tx_bi);
fd0a05ce
JB
990 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
991 tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
992 if (!tx_ring->tx_bi)
993 goto err;
994
995 /* round up to nearest 4K */
996 tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
1943d8ba
JB
997 /* add u32 for head writeback, align after this takes care of
998 * guaranteeing this is at least one cache line in size
999 */
1000 tx_ring->size += sizeof(u32);
fd0a05ce
JB
1001 tx_ring->size = ALIGN(tx_ring->size, 4096);
1002 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
1003 &tx_ring->dma, GFP_KERNEL);
1004 if (!tx_ring->desc) {
1005 dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
1006 tx_ring->size);
1007 goto err;
1008 }
1009
1010 tx_ring->next_to_use = 0;
1011 tx_ring->next_to_clean = 0;
1012 return 0;
1013
1014err:
1015 kfree(tx_ring->tx_bi);
1016 tx_ring->tx_bi = NULL;
1017 return -ENOMEM;
1018}
1019
1020/**
1021 * i40e_clean_rx_ring - Free Rx buffers
1022 * @rx_ring: ring to be cleaned
1023 **/
1024void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
1025{
1026 struct device *dev = rx_ring->dev;
1027 struct i40e_rx_buffer *rx_bi;
1028 unsigned long bi_size;
1029 u16 i;
1030
1031 /* ring already cleared, nothing to do */
1032 if (!rx_ring->rx_bi)
1033 return;
1034
a132af24
MW
1035 if (ring_is_ps_enabled(rx_ring)) {
1036 int bufsz = ALIGN(rx_ring->rx_hdr_len, 256) * rx_ring->count;
1037
1038 rx_bi = &rx_ring->rx_bi[0];
1039 if (rx_bi->hdr_buf) {
1040 dma_free_coherent(dev,
1041 bufsz,
1042 rx_bi->hdr_buf,
1043 rx_bi->dma);
1044 for (i = 0; i < rx_ring->count; i++) {
1045 rx_bi = &rx_ring->rx_bi[i];
1046 rx_bi->dma = 0;
37a2973a 1047 rx_bi->hdr_buf = NULL;
a132af24
MW
1048 }
1049 }
1050 }
fd0a05ce
JB
1051 /* Free all the Rx ring sk_buffs */
1052 for (i = 0; i < rx_ring->count; i++) {
1053 rx_bi = &rx_ring->rx_bi[i];
1054 if (rx_bi->dma) {
1055 dma_unmap_single(dev,
1056 rx_bi->dma,
1057 rx_ring->rx_buf_len,
1058 DMA_FROM_DEVICE);
1059 rx_bi->dma = 0;
1060 }
1061 if (rx_bi->skb) {
1062 dev_kfree_skb(rx_bi->skb);
1063 rx_bi->skb = NULL;
1064 }
1065 if (rx_bi->page) {
1066 if (rx_bi->page_dma) {
1067 dma_unmap_page(dev,
1068 rx_bi->page_dma,
f16704e5 1069 PAGE_SIZE,
fd0a05ce
JB
1070 DMA_FROM_DEVICE);
1071 rx_bi->page_dma = 0;
1072 }
1073 __free_page(rx_bi->page);
1074 rx_bi->page = NULL;
1075 rx_bi->page_offset = 0;
1076 }
1077 }
1078
1079 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1080 memset(rx_ring->rx_bi, 0, bi_size);
1081
1082 /* Zero out the descriptor ring */
1083 memset(rx_ring->desc, 0, rx_ring->size);
1084
1085 rx_ring->next_to_clean = 0;
1086 rx_ring->next_to_use = 0;
1087}
1088
1089/**
1090 * i40e_free_rx_resources - Free Rx resources
1091 * @rx_ring: ring to clean the resources from
1092 *
1093 * Free all receive software resources
1094 **/
1095void i40e_free_rx_resources(struct i40e_ring *rx_ring)
1096{
1097 i40e_clean_rx_ring(rx_ring);
1098 kfree(rx_ring->rx_bi);
1099 rx_ring->rx_bi = NULL;
1100
1101 if (rx_ring->desc) {
1102 dma_free_coherent(rx_ring->dev, rx_ring->size,
1103 rx_ring->desc, rx_ring->dma);
1104 rx_ring->desc = NULL;
1105 }
1106}
1107
a132af24
MW
1108/**
1109 * i40e_alloc_rx_headers - allocate rx header buffers
1110 * @rx_ring: ring to alloc buffers
1111 *
1112 * Allocate rx header buffers for the entire ring. As these are static,
1113 * this is only called when setting up a new ring.
1114 **/
1115void i40e_alloc_rx_headers(struct i40e_ring *rx_ring)
1116{
1117 struct device *dev = rx_ring->dev;
1118 struct i40e_rx_buffer *rx_bi;
1119 dma_addr_t dma;
1120 void *buffer;
1121 int buf_size;
1122 int i;
1123
1124 if (rx_ring->rx_bi[0].hdr_buf)
1125 return;
1126 /* Make sure the buffers don't cross cache line boundaries. */
1127 buf_size = ALIGN(rx_ring->rx_hdr_len, 256);
1128 buffer = dma_alloc_coherent(dev, buf_size * rx_ring->count,
1129 &dma, GFP_KERNEL);
1130 if (!buffer)
1131 return;
1132 for (i = 0; i < rx_ring->count; i++) {
1133 rx_bi = &rx_ring->rx_bi[i];
1134 rx_bi->dma = dma + (i * buf_size);
1135 rx_bi->hdr_buf = buffer + (i * buf_size);
1136 }
1137}
1138
fd0a05ce
JB
1139/**
1140 * i40e_setup_rx_descriptors - Allocate Rx descriptors
1141 * @rx_ring: Rx descriptor ring (for a specific queue) to setup
1142 *
1143 * Returns 0 on success, negative on failure
1144 **/
1145int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
1146{
1147 struct device *dev = rx_ring->dev;
1148 int bi_size;
1149
e908f815
JB
1150 /* warn if we are about to overwrite the pointer */
1151 WARN_ON(rx_ring->rx_bi);
fd0a05ce
JB
1152 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1153 rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
1154 if (!rx_ring->rx_bi)
1155 goto err;
1156
f217d6ca 1157 u64_stats_init(&rx_ring->syncp);
638702bd 1158
fd0a05ce
JB
1159 /* Round up to nearest 4K */
1160 rx_ring->size = ring_is_16byte_desc_enabled(rx_ring)
1161 ? rx_ring->count * sizeof(union i40e_16byte_rx_desc)
1162 : rx_ring->count * sizeof(union i40e_32byte_rx_desc);
1163 rx_ring->size = ALIGN(rx_ring->size, 4096);
1164 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
1165 &rx_ring->dma, GFP_KERNEL);
1166
1167 if (!rx_ring->desc) {
1168 dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
1169 rx_ring->size);
1170 goto err;
1171 }
1172
1173 rx_ring->next_to_clean = 0;
1174 rx_ring->next_to_use = 0;
1175
1176 return 0;
1177err:
1178 kfree(rx_ring->rx_bi);
1179 rx_ring->rx_bi = NULL;
1180 return -ENOMEM;
1181}
1182
1183/**
1184 * i40e_release_rx_desc - Store the new tail and head values
1185 * @rx_ring: ring to bump
1186 * @val: new head index
1187 **/
1188static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
1189{
1190 rx_ring->next_to_use = val;
1191 /* Force memory writes to complete before letting h/w
1192 * know there are new descriptors to fetch. (Only
1193 * applicable for weak-ordered memory model archs,
1194 * such as IA-64).
1195 */
1196 wmb();
1197 writel(val, rx_ring->tail);
1198}
1199
1200/**
a132af24 1201 * i40e_alloc_rx_buffers_ps - Replace used receive buffers; packet split
fd0a05ce
JB
1202 * @rx_ring: ring to place buffers on
1203 * @cleaned_count: number of buffers to replace
c2e245ab
JB
1204 *
1205 * Returns true if any errors on allocation
fd0a05ce 1206 **/
c2e245ab 1207bool i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count)
a132af24
MW
1208{
1209 u16 i = rx_ring->next_to_use;
1210 union i40e_rx_desc *rx_desc;
1211 struct i40e_rx_buffer *bi;
f16704e5 1212 const int current_node = numa_node_id();
a132af24
MW
1213
1214 /* do nothing if no valid netdev defined */
1215 if (!rx_ring->netdev || !cleaned_count)
c2e245ab 1216 return false;
a132af24
MW
1217
1218 while (cleaned_count--) {
1219 rx_desc = I40E_RX_DESC(rx_ring, i);
1220 bi = &rx_ring->rx_bi[i];
1221
1222 if (bi->skb) /* desc is in use */
1223 goto no_buffers;
f16704e5
MW
1224
1225 /* If we've been moved to a different NUMA node, release the
1226 * page so we can get a new one on the current node.
1227 */
1228 if (bi->page && page_to_nid(bi->page) != current_node) {
1229 dma_unmap_page(rx_ring->dev,
1230 bi->page_dma,
1231 PAGE_SIZE,
1232 DMA_FROM_DEVICE);
1233 __free_page(bi->page);
1234 bi->page = NULL;
1235 bi->page_dma = 0;
1236 rx_ring->rx_stats.realloc_count++;
1237 } else if (bi->page) {
1238 rx_ring->rx_stats.page_reuse_count++;
1239 }
1240
a132af24
MW
1241 if (!bi->page) {
1242 bi->page = alloc_page(GFP_ATOMIC);
1243 if (!bi->page) {
1244 rx_ring->rx_stats.alloc_page_failed++;
1245 goto no_buffers;
1246 }
a132af24
MW
1247 bi->page_dma = dma_map_page(rx_ring->dev,
1248 bi->page,
f16704e5
MW
1249 0,
1250 PAGE_SIZE,
a132af24 1251 DMA_FROM_DEVICE);
f16704e5 1252 if (dma_mapping_error(rx_ring->dev, bi->page_dma)) {
a132af24 1253 rx_ring->rx_stats.alloc_page_failed++;
f16704e5
MW
1254 __free_page(bi->page);
1255 bi->page = NULL;
a132af24 1256 bi->page_dma = 0;
f16704e5 1257 bi->page_offset = 0;
a132af24
MW
1258 goto no_buffers;
1259 }
f16704e5 1260 bi->page_offset = 0;
a132af24
MW
1261 }
1262
a132af24
MW
1263 /* Refresh the desc even if buffer_addrs didn't change
1264 * because each write-back erases this info.
1265 */
f16704e5
MW
1266 rx_desc->read.pkt_addr =
1267 cpu_to_le64(bi->page_dma + bi->page_offset);
a132af24
MW
1268 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
1269 i++;
1270 if (i == rx_ring->count)
1271 i = 0;
1272 }
1273
c2e245ab
JB
1274 if (rx_ring->next_to_use != i)
1275 i40e_release_rx_desc(rx_ring, i);
1276
1277 return false;
1278
a132af24
MW
1279no_buffers:
1280 if (rx_ring->next_to_use != i)
1281 i40e_release_rx_desc(rx_ring, i);
c2e245ab
JB
1282
1283 /* make sure to come back via polling to try again after
1284 * allocation failure
1285 */
1286 return true;
a132af24
MW
1287}
1288
1289/**
1290 * i40e_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer
1291 * @rx_ring: ring to place buffers on
1292 * @cleaned_count: number of buffers to replace
c2e245ab
JB
1293 *
1294 * Returns true if any errors on allocation
a132af24 1295 **/
c2e245ab 1296bool i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
fd0a05ce
JB
1297{
1298 u16 i = rx_ring->next_to_use;
1299 union i40e_rx_desc *rx_desc;
1300 struct i40e_rx_buffer *bi;
1301 struct sk_buff *skb;
1302
1303 /* do nothing if no valid netdev defined */
1304 if (!rx_ring->netdev || !cleaned_count)
c2e245ab 1305 return false;
fd0a05ce
JB
1306
1307 while (cleaned_count--) {
1308 rx_desc = I40E_RX_DESC(rx_ring, i);
1309 bi = &rx_ring->rx_bi[i];
1310 skb = bi->skb;
1311
1312 if (!skb) {
dd1a5df8
JB
1313 skb = __netdev_alloc_skb_ip_align(rx_ring->netdev,
1314 rx_ring->rx_buf_len,
1315 GFP_ATOMIC |
1316 __GFP_NOWARN);
fd0a05ce 1317 if (!skb) {
420136cc 1318 rx_ring->rx_stats.alloc_buff_failed++;
fd0a05ce
JB
1319 goto no_buffers;
1320 }
1321 /* initialize queue mapping */
1322 skb_record_rx_queue(skb, rx_ring->queue_index);
1323 bi->skb = skb;
1324 }
1325
1326 if (!bi->dma) {
1327 bi->dma = dma_map_single(rx_ring->dev,
1328 skb->data,
1329 rx_ring->rx_buf_len,
1330 DMA_FROM_DEVICE);
1331 if (dma_mapping_error(rx_ring->dev, bi->dma)) {
420136cc 1332 rx_ring->rx_stats.alloc_buff_failed++;
fd0a05ce 1333 bi->dma = 0;
c2e245ab
JB
1334 dev_kfree_skb(bi->skb);
1335 bi->skb = NULL;
fd0a05ce
JB
1336 goto no_buffers;
1337 }
1338 }
1339
a132af24
MW
1340 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
1341 rx_desc->read.hdr_addr = 0;
fd0a05ce
JB
1342 i++;
1343 if (i == rx_ring->count)
1344 i = 0;
1345 }
1346
c2e245ab
JB
1347 if (rx_ring->next_to_use != i)
1348 i40e_release_rx_desc(rx_ring, i);
1349
1350 return false;
1351
fd0a05ce
JB
1352no_buffers:
1353 if (rx_ring->next_to_use != i)
1354 i40e_release_rx_desc(rx_ring, i);
c2e245ab
JB
1355
1356 /* make sure to come back via polling to try again after
1357 * allocation failure
1358 */
1359 return true;
fd0a05ce
JB
1360}
1361
1362/**
1363 * i40e_receive_skb - Send a completed packet up the stack
1364 * @rx_ring: rx ring in play
1365 * @skb: packet to send up
1366 * @vlan_tag: vlan tag for packet
1367 **/
1368static void i40e_receive_skb(struct i40e_ring *rx_ring,
1369 struct sk_buff *skb, u16 vlan_tag)
1370{
1371 struct i40e_q_vector *q_vector = rx_ring->q_vector;
fd0a05ce
JB
1372
1373 if (vlan_tag & VLAN_VID_MASK)
1374 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
1375
8b650359 1376 napi_gro_receive(&q_vector->napi, skb);
fd0a05ce
JB
1377}
1378
1379/**
1380 * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
1381 * @vsi: the VSI we care about
1382 * @skb: skb currently being received and modified
1383 * @rx_status: status value of last descriptor in packet
1384 * @rx_error: error value of last descriptor in packet
8144f0f7 1385 * @rx_ptype: ptype value of last descriptor in packet
fd0a05ce
JB
1386 **/
1387static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
1388 struct sk_buff *skb,
1389 u32 rx_status,
8144f0f7
JG
1390 u32 rx_error,
1391 u16 rx_ptype)
fd0a05ce 1392{
8a3c91cc 1393 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype);
fad57330 1394 bool ipv4, ipv6, ipv4_tunnel, ipv6_tunnel;
8144f0f7 1395
fd0a05ce
JB
1396 skb->ip_summed = CHECKSUM_NONE;
1397
1398 /* Rx csum enabled and ip headers found? */
8a3c91cc
JB
1399 if (!(vsi->netdev->features & NETIF_F_RXCSUM))
1400 return;
1401
1402 /* did the hardware decode the packet and checksum? */
41a1d04b 1403 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
8a3c91cc
JB
1404 return;
1405
1406 /* both known and outer_ip must be set for the below code to work */
1407 if (!(decoded.known && decoded.outer_ip))
fd0a05ce
JB
1408 return;
1409
fad57330
AD
1410 ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
1411 (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4);
1412 ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
1413 (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6);
8a3c91cc
JB
1414
1415 if (ipv4 &&
41a1d04b
JB
1416 (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) |
1417 BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT))))
8a3c91cc
JB
1418 goto checksum_fail;
1419
ddf1d0d7 1420 /* likely incorrect csum if alternate IP extension headers found */
8a3c91cc 1421 if (ipv6 &&
41a1d04b 1422 rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
8a3c91cc 1423 /* don't increment checksum err here, non-fatal err */
8ee75a8e
SN
1424 return;
1425
8a3c91cc 1426 /* there was some L4 error, count error and punt packet to the stack */
41a1d04b 1427 if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT))
8a3c91cc
JB
1428 goto checksum_fail;
1429
1430 /* handle packets that were not able to be checksummed due
1431 * to arrival speed, in this case the stack can compute
1432 * the csum.
1433 */
41a1d04b 1434 if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
fd0a05ce 1435 return;
fd0a05ce 1436
a9c9a81f
AD
1437 /* The hardware supported by this driver does not validate outer
1438 * checksums for tunneled VXLAN or GENEVE frames. I don't agree
1439 * with it but the specification states that you "MAY validate", it
1440 * doesn't make it a hard requirement so if we have validated the
1441 * inner checksum report CHECKSUM_UNNECESSARY.
8a3c91cc 1442 */
8144f0f7 1443
fad57330
AD
1444 ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) &&
1445 (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4);
1446 ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
1447 (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
1448
fd0a05ce 1449 skb->ip_summed = CHECKSUM_UNNECESSARY;
fa4ba69b 1450 skb->csum_level = ipv4_tunnel || ipv6_tunnel;
8a3c91cc
JB
1451
1452 return;
1453
1454checksum_fail:
1455 vsi->back->hw_csum_rx_error++;
fd0a05ce
JB
1456}
1457
1458/**
857942fd 1459 * i40e_ptype_to_htype - get a hash type
206812b5
JB
1460 * @ptype: the ptype value from the descriptor
1461 *
1462 * Returns a hash type to be used by skb_set_hash
1463 **/
857942fd 1464static inline enum pkt_hash_types i40e_ptype_to_htype(u8 ptype)
206812b5
JB
1465{
1466 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
1467
1468 if (!decoded.known)
1469 return PKT_HASH_TYPE_NONE;
1470
1471 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1472 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
1473 return PKT_HASH_TYPE_L4;
1474 else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1475 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
1476 return PKT_HASH_TYPE_L3;
1477 else
1478 return PKT_HASH_TYPE_L2;
1479}
1480
857942fd
ASJ
1481/**
1482 * i40e_rx_hash - set the hash value in the skb
1483 * @ring: descriptor ring
1484 * @rx_desc: specific descriptor
1485 **/
1486static inline void i40e_rx_hash(struct i40e_ring *ring,
1487 union i40e_rx_desc *rx_desc,
1488 struct sk_buff *skb,
1489 u8 rx_ptype)
1490{
1491 u32 hash;
1492 const __le64 rss_mask =
1493 cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
1494 I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
1495
1496 if (ring->netdev->features & NETIF_F_RXHASH)
1497 return;
1498
1499 if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) {
1500 hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
1501 skb_set_hash(skb, hash, i40e_ptype_to_htype(rx_ptype));
1502 }
1503}
1504
fd0a05ce 1505/**
a132af24 1506 * i40e_clean_rx_irq_ps - Reclaim resources after receive; packet split
fd0a05ce
JB
1507 * @rx_ring: rx ring to clean
1508 * @budget: how many cleans we're allowed
1509 *
1510 * Returns true if there's any budget left (e.g. the clean is finished)
1511 **/
c2e245ab 1512static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, const int budget)
fd0a05ce
JB
1513{
1514 unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1515 u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
1516 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
fd0a05ce
JB
1517 struct i40e_vsi *vsi = rx_ring->vsi;
1518 u16 i = rx_ring->next_to_clean;
1519 union i40e_rx_desc *rx_desc;
1520 u32 rx_error, rx_status;
c2e245ab 1521 bool failure = false;
206812b5 1522 u8 rx_ptype;
fd0a05ce 1523 u64 qword;
f16704e5 1524 u32 copysize;
fd0a05ce 1525
390f86df
EB
1526 if (budget <= 0)
1527 return 0;
1528
a132af24 1529 do {
fd0a05ce
JB
1530 struct i40e_rx_buffer *rx_bi;
1531 struct sk_buff *skb;
1532 u16 vlan_tag;
a132af24
MW
1533 /* return some buffers to hardware, one at a time is too slow */
1534 if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
c2e245ab
JB
1535 failure = failure ||
1536 i40e_alloc_rx_buffers_ps(rx_ring,
1537 cleaned_count);
a132af24
MW
1538 cleaned_count = 0;
1539 }
1540
1541 i = rx_ring->next_to_clean;
1542 rx_desc = I40E_RX_DESC(rx_ring, i);
1543 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1544 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1545 I40E_RXD_QW1_STATUS_SHIFT;
1546
41a1d04b 1547 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
a132af24
MW
1548 break;
1549
1550 /* This memory barrier is needed to keep us from reading
1551 * any other fields out of the rx_desc until we know the
1552 * DD bit is set.
1553 */
67317166 1554 dma_rmb();
f16704e5
MW
1555 /* sync header buffer for reading */
1556 dma_sync_single_range_for_cpu(rx_ring->dev,
1557 rx_ring->rx_bi[0].dma,
1558 i * rx_ring->rx_hdr_len,
1559 rx_ring->rx_hdr_len,
1560 DMA_FROM_DEVICE);
fd0a05ce
JB
1561 if (i40e_rx_is_programming_status(qword)) {
1562 i40e_clean_programming_status(rx_ring, rx_desc);
a132af24
MW
1563 I40E_RX_INCREMENT(rx_ring, i);
1564 continue;
fd0a05ce
JB
1565 }
1566 rx_bi = &rx_ring->rx_bi[i];
1567 skb = rx_bi->skb;
a132af24 1568 if (likely(!skb)) {
dd1a5df8
JB
1569 skb = __netdev_alloc_skb_ip_align(rx_ring->netdev,
1570 rx_ring->rx_hdr_len,
1571 GFP_ATOMIC |
1572 __GFP_NOWARN);
8b6ed9c2 1573 if (!skb) {
a132af24 1574 rx_ring->rx_stats.alloc_buff_failed++;
c2e245ab 1575 failure = true;
8b6ed9c2
JB
1576 break;
1577 }
1578
a132af24
MW
1579 /* initialize queue mapping */
1580 skb_record_rx_queue(skb, rx_ring->queue_index);
1581 /* we are reusing so sync this buffer for CPU use */
1582 dma_sync_single_range_for_cpu(rx_ring->dev,
3578fa0a
JB
1583 rx_ring->rx_bi[0].dma,
1584 i * rx_ring->rx_hdr_len,
a132af24
MW
1585 rx_ring->rx_hdr_len,
1586 DMA_FROM_DEVICE);
1587 }
829af3ac
MW
1588 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1589 I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1590 rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >>
1591 I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1592 rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >>
1593 I40E_RXD_QW1_LENGTH_SPH_SHIFT;
1594
1595 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1596 I40E_RXD_QW1_ERROR_SHIFT;
41a1d04b
JB
1597 rx_hbo = rx_error & BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1598 rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
fd0a05ce 1599
8144f0f7
JG
1600 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1601 I40E_RXD_QW1_PTYPE_SHIFT;
f16704e5
MW
1602 /* sync half-page for reading */
1603 dma_sync_single_range_for_cpu(rx_ring->dev,
1604 rx_bi->page_dma,
1605 rx_bi->page_offset,
1606 PAGE_SIZE / 2,
1607 DMA_FROM_DEVICE);
1608 prefetch(page_address(rx_bi->page) + rx_bi->page_offset);
fd0a05ce 1609 rx_bi->skb = NULL;
a132af24 1610 cleaned_count++;
f16704e5 1611 copysize = 0;
a132af24
MW
1612 if (rx_hbo || rx_sph) {
1613 int len;
6995b36c 1614
fd0a05ce
JB
1615 if (rx_hbo)
1616 len = I40E_RX_HDR_SIZE;
fd0a05ce 1617 else
a132af24
MW
1618 len = rx_header_len;
1619 memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len);
1620 } else if (skb->len == 0) {
1621 int len;
f16704e5
MW
1622 unsigned char *va = page_address(rx_bi->page) +
1623 rx_bi->page_offset;
a132af24 1624
f16704e5
MW
1625 len = min(rx_packet_len, rx_ring->rx_hdr_len);
1626 memcpy(__skb_put(skb, len), va, len);
1627 copysize = len;
a132af24 1628 rx_packet_len -= len;
fd0a05ce 1629 }
fd0a05ce 1630 /* Get the rest of the data if this was a header split */
a132af24 1631 if (rx_packet_len) {
f16704e5
MW
1632 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
1633 rx_bi->page,
1634 rx_bi->page_offset + copysize,
1635 rx_packet_len, I40E_RXBUFFER_2048);
1636
f16704e5
MW
1637 /* If the page count is more than 2, then both halves
1638 * of the page are used and we need to free it. Do it
1639 * here instead of in the alloc code. Otherwise one
1640 * of the half-pages might be released between now and
1641 * then, and we wouldn't know which one to use.
16fd08b8
MW
1642 * Don't call get_page and free_page since those are
1643 * both expensive atomic operations that just change
1644 * the refcount in opposite directions. Just give the
1645 * page to the stack; he can have our refcount.
f16704e5
MW
1646 */
1647 if (page_count(rx_bi->page) > 2) {
1648 dma_unmap_page(rx_ring->dev,
1649 rx_bi->page_dma,
1650 PAGE_SIZE,
1651 DMA_FROM_DEVICE);
fd0a05ce 1652 rx_bi->page = NULL;
f16704e5
MW
1653 rx_bi->page_dma = 0;
1654 rx_ring->rx_stats.realloc_count++;
16fd08b8
MW
1655 } else {
1656 get_page(rx_bi->page);
1657 /* switch to the other half-page here; the
1658 * allocation code programs the right addr
1659 * into HW. If we haven't used this half-page,
1660 * the address won't be changed, and HW can
1661 * just use it next time through.
1662 */
1663 rx_bi->page_offset ^= PAGE_SIZE / 2;
f16704e5 1664 }
fd0a05ce 1665
fd0a05ce 1666 }
a132af24 1667 I40E_RX_INCREMENT(rx_ring, i);
fd0a05ce
JB
1668
1669 if (unlikely(
41a1d04b 1670 !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
fd0a05ce
JB
1671 struct i40e_rx_buffer *next_buffer;
1672
1673 next_buffer = &rx_ring->rx_bi[i];
a132af24 1674 next_buffer->skb = skb;
fd0a05ce 1675 rx_ring->rx_stats.non_eop_descs++;
a132af24 1676 continue;
fd0a05ce
JB
1677 }
1678
1679 /* ERR_MASK will only have valid bits if EOP set */
41a1d04b 1680 if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
fd0a05ce 1681 dev_kfree_skb_any(skb);
a132af24 1682 continue;
fd0a05ce
JB
1683 }
1684
857942fd
ASJ
1685 i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
1686
beb0dff1
JK
1687 if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1688 i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1689 I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1690 I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1691 rx_ring->last_rx_timestamp = jiffies;
1692 }
1693
fd0a05ce
JB
1694 /* probably a little skewed due to removing CRC */
1695 total_rx_bytes += skb->len;
1696 total_rx_packets++;
1697
1698 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
8144f0f7
JG
1699
1700 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1701
41a1d04b 1702 vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
fd0a05ce
JB
1703 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1704 : 0;
38e00438
VD
1705#ifdef I40E_FCOE
1706 if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1707 dev_kfree_skb_any(skb);
a132af24 1708 continue;
38e00438
VD
1709 }
1710#endif
fd0a05ce
JB
1711 i40e_receive_skb(rx_ring, skb, vlan_tag);
1712
fd0a05ce 1713 rx_desc->wb.qword1.status_error_len = 0;
fd0a05ce 1714
a132af24
MW
1715 } while (likely(total_rx_packets < budget));
1716
1717 u64_stats_update_begin(&rx_ring->syncp);
1718 rx_ring->stats.packets += total_rx_packets;
1719 rx_ring->stats.bytes += total_rx_bytes;
1720 u64_stats_update_end(&rx_ring->syncp);
1721 rx_ring->q_vector->rx.total_packets += total_rx_packets;
1722 rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1723
c2e245ab 1724 return failure ? budget : total_rx_packets;
a132af24
MW
1725}
1726
1727/**
1728 * i40e_clean_rx_irq_1buf - Reclaim resources after receive; single buffer
1729 * @rx_ring: rx ring to clean
1730 * @budget: how many cleans we're allowed
1731 *
1732 * Returns number of packets cleaned
1733 **/
1734static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
1735{
1736 unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1737 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1738 struct i40e_vsi *vsi = rx_ring->vsi;
1739 union i40e_rx_desc *rx_desc;
1740 u32 rx_error, rx_status;
1741 u16 rx_packet_len;
c2e245ab 1742 bool failure = false;
a132af24
MW
1743 u8 rx_ptype;
1744 u64 qword;
1745 u16 i;
1746
1747 do {
1748 struct i40e_rx_buffer *rx_bi;
1749 struct sk_buff *skb;
1750 u16 vlan_tag;
fd0a05ce
JB
1751 /* return some buffers to hardware, one at a time is too slow */
1752 if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
c2e245ab
JB
1753 failure = failure ||
1754 i40e_alloc_rx_buffers_1buf(rx_ring,
1755 cleaned_count);
fd0a05ce
JB
1756 cleaned_count = 0;
1757 }
1758
a132af24
MW
1759 i = rx_ring->next_to_clean;
1760 rx_desc = I40E_RX_DESC(rx_ring, i);
fd0a05ce 1761 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
829af3ac 1762 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
a132af24
MW
1763 I40E_RXD_QW1_STATUS_SHIFT;
1764
41a1d04b 1765 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
a132af24
MW
1766 break;
1767
1768 /* This memory barrier is needed to keep us from reading
1769 * any other fields out of the rx_desc until we know the
1770 * DD bit is set.
1771 */
67317166 1772 dma_rmb();
a132af24
MW
1773
1774 if (i40e_rx_is_programming_status(qword)) {
1775 i40e_clean_programming_status(rx_ring, rx_desc);
1776 I40E_RX_INCREMENT(rx_ring, i);
1777 continue;
1778 }
1779 rx_bi = &rx_ring->rx_bi[i];
1780 skb = rx_bi->skb;
1781 prefetch(skb->data);
1782
1783 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1784 I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1785
1786 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1787 I40E_RXD_QW1_ERROR_SHIFT;
41a1d04b 1788 rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
a132af24
MW
1789
1790 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1791 I40E_RXD_QW1_PTYPE_SHIFT;
1792 rx_bi->skb = NULL;
1793 cleaned_count++;
1794
1795 /* Get the header and possibly the whole packet
1796 * If this is an skb from previous receive dma will be 0
1797 */
1798 skb_put(skb, rx_packet_len);
1799 dma_unmap_single(rx_ring->dev, rx_bi->dma, rx_ring->rx_buf_len,
1800 DMA_FROM_DEVICE);
1801 rx_bi->dma = 0;
1802
1803 I40E_RX_INCREMENT(rx_ring, i);
1804
1805 if (unlikely(
41a1d04b 1806 !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
a132af24
MW
1807 rx_ring->rx_stats.non_eop_descs++;
1808 continue;
1809 }
1810
1811 /* ERR_MASK will only have valid bits if EOP set */
41a1d04b 1812 if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
a132af24 1813 dev_kfree_skb_any(skb);
a132af24
MW
1814 continue;
1815 }
1816
857942fd 1817 i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
a132af24
MW
1818 if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1819 i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1820 I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1821 I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1822 rx_ring->last_rx_timestamp = jiffies;
1823 }
1824
1825 /* probably a little skewed due to removing CRC */
1826 total_rx_bytes += skb->len;
1827 total_rx_packets++;
1828
1829 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1830
1831 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1832
41a1d04b 1833 vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
a132af24
MW
1834 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1835 : 0;
1836#ifdef I40E_FCOE
1837 if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1838 dev_kfree_skb_any(skb);
1839 continue;
1840 }
1841#endif
1842 i40e_receive_skb(rx_ring, skb, vlan_tag);
1843
a132af24
MW
1844 rx_desc->wb.qword1.status_error_len = 0;
1845 } while (likely(total_rx_packets < budget));
fd0a05ce 1846
980e9b11 1847 u64_stats_update_begin(&rx_ring->syncp);
a114d0a6
AD
1848 rx_ring->stats.packets += total_rx_packets;
1849 rx_ring->stats.bytes += total_rx_bytes;
980e9b11 1850 u64_stats_update_end(&rx_ring->syncp);
fd0a05ce
JB
1851 rx_ring->q_vector->rx.total_packets += total_rx_packets;
1852 rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1853
c2e245ab 1854 return failure ? budget : total_rx_packets;
fd0a05ce
JB
1855}
1856
8f5e39ce
JB
1857static u32 i40e_buildreg_itr(const int type, const u16 itr)
1858{
1859 u32 val;
1860
1861 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
40d72a50
JB
1862 /* Don't clear PBA because that can cause lost interrupts that
1863 * came in while we were cleaning/polling
1864 */
8f5e39ce
JB
1865 (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
1866 (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
1867
1868 return val;
1869}
1870
1871/* a small macro to shorten up some long lines */
1872#define INTREG I40E_PFINT_DYN_CTLN
1873
de32e3ef
CW
1874/**
1875 * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
1876 * @vsi: the VSI we care about
1877 * @q_vector: q_vector for which itr is being updated and interrupt enabled
1878 *
1879 **/
1880static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
1881 struct i40e_q_vector *q_vector)
1882{
1883 struct i40e_hw *hw = &vsi->back->hw;
8f5e39ce
JB
1884 bool rx = false, tx = false;
1885 u32 rxval, txval;
de32e3ef 1886 int vector;
a75e8005 1887 int idx = q_vector->v_idx;
de32e3ef
CW
1888
1889 vector = (q_vector->v_idx + vsi->base_vector);
8f5e39ce 1890
ee2319cf
JB
1891 /* avoid dynamic calculation if in countdown mode OR if
1892 * all dynamic is disabled
1893 */
8f5e39ce
JB
1894 rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
1895
ee2319cf 1896 if (q_vector->itr_countdown > 0 ||
a75e8005
KL
1897 (!ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting) &&
1898 !ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting))) {
ee2319cf
JB
1899 goto enable_int;
1900 }
1901
a75e8005 1902 if (ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting)) {
8f5e39ce
JB
1903 rx = i40e_set_new_dynamic_itr(&q_vector->rx);
1904 rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
de32e3ef 1905 }
8f5e39ce 1906
a75e8005 1907 if (ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting)) {
8f5e39ce
JB
1908 tx = i40e_set_new_dynamic_itr(&q_vector->tx);
1909 txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
de32e3ef 1910 }
8f5e39ce
JB
1911
1912 if (rx || tx) {
1913 /* get the higher of the two ITR adjustments and
1914 * use the same value for both ITR registers
1915 * when in adaptive mode (Rx and/or Tx)
1916 */
1917 u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
1918
1919 q_vector->tx.itr = q_vector->rx.itr = itr;
1920 txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
1921 tx = true;
1922 rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
1923 rx = true;
1924 }
1925
1926 /* only need to enable the interrupt once, but need
1927 * to possibly update both ITR values
1928 */
1929 if (rx) {
1930 /* set the INTENA_MSK_MASK so that this first write
1931 * won't actually enable the interrupt, instead just
1932 * updating the ITR (it's bit 31 PF and VF)
1933 */
1934 rxval |= BIT(31);
1935 /* don't check _DOWN because interrupt isn't being enabled */
1936 wr32(hw, INTREG(vector - 1), rxval);
1937 }
1938
ee2319cf 1939enable_int:
8f5e39ce
JB
1940 if (!test_bit(__I40E_DOWN, &vsi->state))
1941 wr32(hw, INTREG(vector - 1), txval);
ee2319cf
JB
1942
1943 if (q_vector->itr_countdown)
1944 q_vector->itr_countdown--;
1945 else
1946 q_vector->itr_countdown = ITR_COUNTDOWN_START;
de32e3ef
CW
1947}
1948
fd0a05ce
JB
1949/**
1950 * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
1951 * @napi: napi struct with our devices info in it
1952 * @budget: amount of work driver is allowed to do this pass, in packets
1953 *
1954 * This function will clean all queues associated with a q_vector.
1955 *
1956 * Returns the amount of work done
1957 **/
1958int i40e_napi_poll(struct napi_struct *napi, int budget)
1959{
1960 struct i40e_q_vector *q_vector =
1961 container_of(napi, struct i40e_q_vector, napi);
1962 struct i40e_vsi *vsi = q_vector->vsi;
cd0b6fa6 1963 struct i40e_ring *ring;
fd0a05ce 1964 bool clean_complete = true;
d91649f5 1965 bool arm_wb = false;
fd0a05ce 1966 int budget_per_ring;
32b3e08f 1967 int work_done = 0;
fd0a05ce
JB
1968
1969 if (test_bit(__I40E_DOWN, &vsi->state)) {
1970 napi_complete(napi);
1971 return 0;
1972 }
1973
9c6c1259
KP
1974 /* Clear hung_detected bit */
1975 clear_bit(I40E_Q_VECTOR_HUNG_DETECT, &q_vector->hung_detected);
cd0b6fa6
AD
1976 /* Since the actual Tx work is minimal, we can give the Tx a larger
1977 * budget and be more aggressive about cleaning up the Tx descriptors.
1978 */
d91649f5 1979 i40e_for_each_ring(ring, q_vector->tx) {
a619afe8 1980 if (!i40e_clean_tx_irq(vsi, ring, budget)) {
f2edaaaa
AD
1981 clean_complete = false;
1982 continue;
1983 }
1984 arm_wb |= ring->arm_wb;
0deda868 1985 ring->arm_wb = false;
d91649f5 1986 }
cd0b6fa6 1987
c67caceb
AD
1988 /* Handle case where we are called by netpoll with a budget of 0 */
1989 if (budget <= 0)
1990 goto tx_only;
1991
fd0a05ce
JB
1992 /* We attempt to distribute budget to each Rx queue fairly, but don't
1993 * allow the budget to go below 1 because that would exit polling early.
fd0a05ce
JB
1994 */
1995 budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
cd0b6fa6 1996
a132af24 1997 i40e_for_each_ring(ring, q_vector->rx) {
32b3e08f
JB
1998 int cleaned;
1999
a132af24
MW
2000 if (ring_is_ps_enabled(ring))
2001 cleaned = i40e_clean_rx_irq_ps(ring, budget_per_ring);
2002 else
2003 cleaned = i40e_clean_rx_irq_1buf(ring, budget_per_ring);
32b3e08f
JB
2004
2005 work_done += cleaned;
f2edaaaa
AD
2006 /* if we clean as many as budgeted, we must not be done */
2007 if (cleaned >= budget_per_ring)
2008 clean_complete = false;
a132af24 2009 }
fd0a05ce
JB
2010
2011 /* If work not completed, return budget and polling will return */
d91649f5 2012 if (!clean_complete) {
c67caceb 2013tx_only:
164c9f54
ASJ
2014 if (arm_wb) {
2015 q_vector->tx.ring[0].tx_stats.tx_force_wb++;
ecc6a239 2016 i40e_enable_wb_on_itr(vsi, q_vector);
164c9f54 2017 }
fd0a05ce 2018 return budget;
d91649f5 2019 }
fd0a05ce 2020
8e0764b4
ASJ
2021 if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
2022 q_vector->arm_wb_state = false;
2023
fd0a05ce 2024 /* Work is done so exit the polling mode and re-enable the interrupt */
32b3e08f 2025 napi_complete_done(napi, work_done);
de32e3ef
CW
2026 if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
2027 i40e_update_enable_itr(vsi, q_vector);
2028 } else { /* Legacy mode */
40d72a50 2029 i40e_irq_dynamic_enable_icr0(vsi->back, false);
fd0a05ce 2030 }
fd0a05ce
JB
2031 return 0;
2032}
2033
2034/**
2035 * i40e_atr - Add a Flow Director ATR filter
2036 * @tx_ring: ring to add programming descriptor to
2037 * @skb: send buffer
89232c3b 2038 * @tx_flags: send tx flags
fd0a05ce
JB
2039 **/
2040static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
6b037cd4 2041 u32 tx_flags)
fd0a05ce
JB
2042{
2043 struct i40e_filter_program_desc *fdir_desc;
2044 struct i40e_pf *pf = tx_ring->vsi->back;
2045 union {
2046 unsigned char *network;
2047 struct iphdr *ipv4;
2048 struct ipv6hdr *ipv6;
2049 } hdr;
2050 struct tcphdr *th;
2051 unsigned int hlen;
2052 u32 flex_ptype, dtype_cmd;
ffcc55c0 2053 int l4_proto;
fc4ac67b 2054 u16 i;
fd0a05ce
JB
2055
2056 /* make sure ATR is enabled */
60ea5f83 2057 if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
fd0a05ce
JB
2058 return;
2059
04294e38
ASJ
2060 if ((pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
2061 return;
2062
fd0a05ce
JB
2063 /* if sampling is disabled do nothing */
2064 if (!tx_ring->atr_sample_rate)
2065 return;
2066
6b037cd4 2067 /* Currently only IPv4/IPv6 with TCP is supported */
89232c3b
ASJ
2068 if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6)))
2069 return;
fd0a05ce 2070
ffcc55c0
AD
2071 /* snag network header to get L4 type and address */
2072 hdr.network = (tx_flags & I40E_TX_FLAGS_UDP_TUNNEL) ?
2073 skb_inner_network_header(skb) : skb_network_header(skb);
fd0a05ce 2074
ffcc55c0
AD
2075 /* Note: tx_flags gets modified to reflect inner protocols in
2076 * tx_enable_csum function if encap is enabled.
2077 */
2078 if (tx_flags & I40E_TX_FLAGS_IPV4) {
6b037cd4 2079 /* access ihl as u8 to avoid unaligned access on ia64 */
ffcc55c0
AD
2080 hlen = (hdr.network[0] & 0x0F) << 2;
2081 l4_proto = hdr.ipv4->protocol;
fd0a05ce 2082 } else {
ffcc55c0
AD
2083 hlen = hdr.network - skb->data;
2084 l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL);
2085 hlen -= hdr.network - skb->data;
fd0a05ce
JB
2086 }
2087
6b037cd4 2088 if (l4_proto != IPPROTO_TCP)
89232c3b
ASJ
2089 return;
2090
fd0a05ce
JB
2091 th = (struct tcphdr *)(hdr.network + hlen);
2092
55a5e60b
ASJ
2093 /* Due to lack of space, no more new filters can be programmed */
2094 if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
2095 return;
72b74869
ASJ
2096 if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) &&
2097 (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) {
52eb95ef
ASJ
2098 /* HW ATR eviction will take care of removing filters on FIN
2099 * and RST packets.
2100 */
2101 if (th->fin || th->rst)
2102 return;
2103 }
55a5e60b
ASJ
2104
2105 tx_ring->atr_count++;
2106
ce806783
ASJ
2107 /* sample on all syn/fin/rst packets or once every atr sample rate */
2108 if (!th->fin &&
2109 !th->syn &&
2110 !th->rst &&
2111 (tx_ring->atr_count < tx_ring->atr_sample_rate))
fd0a05ce
JB
2112 return;
2113
2114 tx_ring->atr_count = 0;
2115
2116 /* grab the next descriptor */
fc4ac67b
AD
2117 i = tx_ring->next_to_use;
2118 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
2119
2120 i++;
2121 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
fd0a05ce
JB
2122
2123 flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
2124 I40E_TXD_FLTR_QW0_QINDEX_MASK;
6b037cd4 2125 flex_ptype |= (tx_flags & I40E_TX_FLAGS_IPV4) ?
fd0a05ce
JB
2126 (I40E_FILTER_PCTYPE_NONF_IPV4_TCP <<
2127 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) :
2128 (I40E_FILTER_PCTYPE_NONF_IPV6_TCP <<
2129 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
2130
2131 flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
2132
2133 dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
2134
ce806783 2135 dtype_cmd |= (th->fin || th->rst) ?
fd0a05ce
JB
2136 (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
2137 I40E_TXD_FLTR_QW1_PCMD_SHIFT) :
2138 (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
2139 I40E_TXD_FLTR_QW1_PCMD_SHIFT);
2140
2141 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX <<
2142 I40E_TXD_FLTR_QW1_DEST_SHIFT;
2143
2144 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID <<
2145 I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
2146
433c47de 2147 dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
6a899024 2148 if (!(tx_flags & I40E_TX_FLAGS_UDP_TUNNEL))
60ccd45c
ASJ
2149 dtype_cmd |=
2150 ((u32)I40E_FD_ATR_STAT_IDX(pf->hw.pf_id) <<
2151 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2152 I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2153 else
2154 dtype_cmd |=
2155 ((u32)I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id) <<
2156 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2157 I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
433c47de 2158
72b74869
ASJ
2159 if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) &&
2160 (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)))
52eb95ef
ASJ
2161 dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK;
2162
fd0a05ce 2163 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
99753ea6 2164 fdir_desc->rsvd = cpu_to_le32(0);
fd0a05ce 2165 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
99753ea6 2166 fdir_desc->fd_id = cpu_to_le32(0);
fd0a05ce
JB
2167}
2168
fd0a05ce
JB
2169/**
2170 * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
2171 * @skb: send buffer
2172 * @tx_ring: ring to send buffer on
2173 * @flags: the tx flags to be set
2174 *
2175 * Checks the skb and set up correspondingly several generic transmit flags
2176 * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
2177 *
2178 * Returns error code indicate the frame should be dropped upon error and the
2179 * otherwise returns 0 to indicate the flags has been set properly.
2180 **/
38e00438 2181#ifdef I40E_FCOE
3e587cf3 2182inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
fd0a05ce
JB
2183 struct i40e_ring *tx_ring,
2184 u32 *flags)
3e587cf3
JB
2185#else
2186static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2187 struct i40e_ring *tx_ring,
2188 u32 *flags)
38e00438 2189#endif
fd0a05ce
JB
2190{
2191 __be16 protocol = skb->protocol;
2192 u32 tx_flags = 0;
2193
31eaaccf
GR
2194 if (protocol == htons(ETH_P_8021Q) &&
2195 !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) {
2196 /* When HW VLAN acceleration is turned off by the user the
2197 * stack sets the protocol to 8021q so that the driver
2198 * can take any steps required to support the SW only
2199 * VLAN handling. In our case the driver doesn't need
2200 * to take any further steps so just set the protocol
2201 * to the encapsulated ethertype.
2202 */
2203 skb->protocol = vlan_get_protocol(skb);
2204 goto out;
2205 }
2206
fd0a05ce 2207 /* if we have a HW VLAN tag being added, default to the HW one */
df8a39de
JP
2208 if (skb_vlan_tag_present(skb)) {
2209 tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
fd0a05ce
JB
2210 tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2211 /* else if it is a SW VLAN, check the next protocol and store the tag */
0e2fe46c 2212 } else if (protocol == htons(ETH_P_8021Q)) {
fd0a05ce 2213 struct vlan_hdr *vhdr, _vhdr;
6995b36c 2214
fd0a05ce
JB
2215 vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
2216 if (!vhdr)
2217 return -EINVAL;
2218
2219 protocol = vhdr->h_vlan_encapsulated_proto;
2220 tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT;
2221 tx_flags |= I40E_TX_FLAGS_SW_VLAN;
2222 }
2223
d40d00b1
NP
2224 if (!(tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED))
2225 goto out;
2226
fd0a05ce 2227 /* Insert 802.1p priority into VLAN header */
38e00438
VD
2228 if ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) ||
2229 (skb->priority != TC_PRIO_CONTROL)) {
fd0a05ce
JB
2230 tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK;
2231 tx_flags |= (skb->priority & 0x7) <<
2232 I40E_TX_FLAGS_VLAN_PRIO_SHIFT;
2233 if (tx_flags & I40E_TX_FLAGS_SW_VLAN) {
2234 struct vlan_ethhdr *vhdr;
dd225bc6
FR
2235 int rc;
2236
2237 rc = skb_cow_head(skb, 0);
2238 if (rc < 0)
2239 return rc;
fd0a05ce
JB
2240 vhdr = (struct vlan_ethhdr *)skb->data;
2241 vhdr->h_vlan_TCI = htons(tx_flags >>
2242 I40E_TX_FLAGS_VLAN_SHIFT);
2243 } else {
2244 tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2245 }
2246 }
d40d00b1
NP
2247
2248out:
fd0a05ce
JB
2249 *flags = tx_flags;
2250 return 0;
2251}
2252
fd0a05ce
JB
2253/**
2254 * i40e_tso - set up the tso context descriptor
2255 * @tx_ring: ptr to the ring to send
2256 * @skb: ptr to the skb we're sending
fd0a05ce 2257 * @hdr_len: ptr to the size of the packet header
9c883bd3 2258 * @cd_type_cmd_tso_mss: Quad Word 1
fd0a05ce
JB
2259 *
2260 * Returns 0 if no TSO can happen, 1 if tso is going, or error
2261 **/
2262static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
9c883bd3 2263 u8 *hdr_len, u64 *cd_type_cmd_tso_mss)
fd0a05ce 2264{
03f9d6a5 2265 u64 cd_cmd, cd_tso_len, cd_mss;
c777019a
AD
2266 union {
2267 struct iphdr *v4;
2268 struct ipv6hdr *v6;
2269 unsigned char *hdr;
2270 } ip;
c49a7bc3
AD
2271 union {
2272 struct tcphdr *tcp;
5453205c 2273 struct udphdr *udp;
c49a7bc3
AD
2274 unsigned char *hdr;
2275 } l4;
2276 u32 paylen, l4_offset;
fd0a05ce 2277 int err;
fd0a05ce 2278
e9f6563d
SN
2279 if (skb->ip_summed != CHECKSUM_PARTIAL)
2280 return 0;
2281
fd0a05ce
JB
2282 if (!skb_is_gso(skb))
2283 return 0;
2284
dd225bc6
FR
2285 err = skb_cow_head(skb, 0);
2286 if (err < 0)
2287 return err;
fd0a05ce 2288
c777019a
AD
2289 ip.hdr = skb_network_header(skb);
2290 l4.hdr = skb_transport_header(skb);
df23075f 2291
c777019a
AD
2292 /* initialize outer IP header fields */
2293 if (ip.v4->version == 4) {
2294 ip.v4->tot_len = 0;
2295 ip.v4->check = 0;
c49a7bc3 2296 } else {
c777019a
AD
2297 ip.v6->payload_len = 0;
2298 }
2299
5453205c
AD
2300 if (skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE |
2301 SKB_GSO_UDP_TUNNEL_CSUM)) {
2302 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) {
2303 /* determine offset of outer transport header */
2304 l4_offset = l4.hdr - skb->data;
2305
2306 /* remove payload length from outer checksum */
24d41e5e
AD
2307 paylen = skb->len - l4_offset;
2308 csum_replace_by_diff(&l4.udp->check, htonl(paylen));
5453205c
AD
2309 }
2310
c777019a
AD
2311 /* reset pointers to inner headers */
2312 ip.hdr = skb_inner_network_header(skb);
2313 l4.hdr = skb_inner_transport_header(skb);
2314
2315 /* initialize inner IP header fields */
2316 if (ip.v4->version == 4) {
2317 ip.v4->tot_len = 0;
2318 ip.v4->check = 0;
2319 } else {
2320 ip.v6->payload_len = 0;
2321 }
fd0a05ce
JB
2322 }
2323
c49a7bc3
AD
2324 /* determine offset of inner transport header */
2325 l4_offset = l4.hdr - skb->data;
2326
2327 /* remove payload length from inner checksum */
24d41e5e
AD
2328 paylen = skb->len - l4_offset;
2329 csum_replace_by_diff(&l4.tcp->check, htonl(paylen));
c49a7bc3
AD
2330
2331 /* compute length of segmentation header */
2332 *hdr_len = (l4.tcp->doff * 4) + l4_offset;
fd0a05ce
JB
2333
2334 /* find the field values */
2335 cd_cmd = I40E_TX_CTX_DESC_TSO;
2336 cd_tso_len = skb->len - *hdr_len;
2337 cd_mss = skb_shinfo(skb)->gso_size;
03f9d6a5
AD
2338 *cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
2339 (cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
2340 (cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
fd0a05ce
JB
2341 return 1;
2342}
2343
beb0dff1
JK
2344/**
2345 * i40e_tsyn - set up the tsyn context descriptor
2346 * @tx_ring: ptr to the ring to send
2347 * @skb: ptr to the skb we're sending
2348 * @tx_flags: the collected send information
9c883bd3 2349 * @cd_type_cmd_tso_mss: Quad Word 1
beb0dff1
JK
2350 *
2351 * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
2352 **/
2353static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
2354 u32 tx_flags, u64 *cd_type_cmd_tso_mss)
2355{
2356 struct i40e_pf *pf;
2357
2358 if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
2359 return 0;
2360
2361 /* Tx timestamps cannot be sampled when doing TSO */
2362 if (tx_flags & I40E_TX_FLAGS_TSO)
2363 return 0;
2364
2365 /* only timestamp the outbound packet if the user has requested it and
2366 * we are not already transmitting a packet to be timestamped
2367 */
2368 pf = i40e_netdev_to_pf(tx_ring->netdev);
22b4777d
JK
2369 if (!(pf->flags & I40E_FLAG_PTP))
2370 return 0;
2371
9ce34f02
JK
2372 if (pf->ptp_tx &&
2373 !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, &pf->state)) {
beb0dff1
JK
2374 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
2375 pf->ptp_tx_skb = skb_get(skb);
2376 } else {
2377 return 0;
2378 }
2379
2380 *cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN <<
2381 I40E_TXD_CTX_QW1_CMD_SHIFT;
2382
beb0dff1
JK
2383 return 1;
2384}
2385
fd0a05ce
JB
2386/**
2387 * i40e_tx_enable_csum - Enable Tx checksum offloads
2388 * @skb: send buffer
89232c3b 2389 * @tx_flags: pointer to Tx flags currently set
fd0a05ce
JB
2390 * @td_cmd: Tx descriptor command bits to set
2391 * @td_offset: Tx descriptor header offsets to set
554f4544 2392 * @tx_ring: Tx descriptor ring
fd0a05ce
JB
2393 * @cd_tunneling: ptr to context desc bits
2394 **/
529f1f65
AD
2395static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
2396 u32 *td_cmd, u32 *td_offset,
2397 struct i40e_ring *tx_ring,
2398 u32 *cd_tunneling)
fd0a05ce 2399{
b96b78f2
AD
2400 union {
2401 struct iphdr *v4;
2402 struct ipv6hdr *v6;
2403 unsigned char *hdr;
2404 } ip;
2405 union {
2406 struct tcphdr *tcp;
2407 struct udphdr *udp;
2408 unsigned char *hdr;
2409 } l4;
a3fd9d88 2410 unsigned char *exthdr;
d1bd743b 2411 u32 offset, cmd = 0;
a3fd9d88 2412 __be16 frag_off;
b96b78f2
AD
2413 u8 l4_proto = 0;
2414
529f1f65
AD
2415 if (skb->ip_summed != CHECKSUM_PARTIAL)
2416 return 0;
2417
b96b78f2
AD
2418 ip.hdr = skb_network_header(skb);
2419 l4.hdr = skb_transport_header(skb);
fd0a05ce 2420
475b4205
AD
2421 /* compute outer L2 header size */
2422 offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
2423
fd0a05ce 2424 if (skb->encapsulation) {
d1bd743b 2425 u32 tunnel = 0;
a0064728
AD
2426 /* define outer network header type */
2427 if (*tx_flags & I40E_TX_FLAGS_IPV4) {
475b4205
AD
2428 tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ?
2429 I40E_TX_CTX_EXT_IP_IPV4 :
2430 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
2431
a0064728
AD
2432 l4_proto = ip.v4->protocol;
2433 } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
475b4205 2434 tunnel |= I40E_TX_CTX_EXT_IP_IPV6;
a3fd9d88
AD
2435
2436 exthdr = ip.hdr + sizeof(*ip.v6);
a0064728 2437 l4_proto = ip.v6->nexthdr;
a3fd9d88
AD
2438 if (l4.hdr != exthdr)
2439 ipv6_skip_exthdr(skb, exthdr - skb->data,
2440 &l4_proto, &frag_off);
a0064728
AD
2441 }
2442
475b4205
AD
2443 /* compute outer L3 header size */
2444 tunnel |= ((l4.hdr - ip.hdr) / 4) <<
2445 I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT;
2446
2447 /* switch IP header pointer from outer to inner header */
2448 ip.hdr = skb_inner_network_header(skb);
2449
a0064728
AD
2450 /* define outer transport */
2451 switch (l4_proto) {
45991204 2452 case IPPROTO_UDP:
475b4205 2453 tunnel |= I40E_TXD_CTX_UDP_TUNNELING;
6a899024 2454 *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
45991204 2455 break;
c1d1791d 2456 case IPPROTO_GRE:
475b4205 2457 tunnel |= I40E_TXD_CTX_GRE_TUNNELING;
a0064728 2458 *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
c1d1791d 2459 break;
45991204 2460 default:
529f1f65
AD
2461 if (*tx_flags & I40E_TX_FLAGS_TSO)
2462 return -1;
2463
2464 skb_checksum_help(skb);
2465 return 0;
45991204 2466 }
b96b78f2 2467
475b4205
AD
2468 /* compute tunnel header size */
2469 tunnel |= ((ip.hdr - l4.hdr) / 2) <<
2470 I40E_TXD_CTX_QW0_NATLEN_SHIFT;
2471
5453205c
AD
2472 /* indicate if we need to offload outer UDP header */
2473 if ((*tx_flags & I40E_TX_FLAGS_TSO) &&
2474 (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
2475 tunnel |= I40E_TXD_CTX_QW0_L4T_CS_MASK;
2476
475b4205
AD
2477 /* record tunnel offload values */
2478 *cd_tunneling |= tunnel;
2479
b96b78f2 2480 /* switch L4 header pointer from outer to inner */
b96b78f2 2481 l4.hdr = skb_inner_transport_header(skb);
a0064728 2482 l4_proto = 0;
fd0a05ce 2483
a0064728
AD
2484 /* reset type as we transition from outer to inner headers */
2485 *tx_flags &= ~(I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6);
2486 if (ip.v4->version == 4)
2487 *tx_flags |= I40E_TX_FLAGS_IPV4;
2488 if (ip.v6->version == 6)
89232c3b 2489 *tx_flags |= I40E_TX_FLAGS_IPV6;
fd0a05ce
JB
2490 }
2491
2492 /* Enable IP checksum offloads */
89232c3b 2493 if (*tx_flags & I40E_TX_FLAGS_IPV4) {
b96b78f2 2494 l4_proto = ip.v4->protocol;
fd0a05ce
JB
2495 /* the stack computes the IP header already, the only time we
2496 * need the hardware to recompute it is in the case of TSO.
2497 */
475b4205
AD
2498 cmd |= (*tx_flags & I40E_TX_FLAGS_TSO) ?
2499 I40E_TX_DESC_CMD_IIPT_IPV4_CSUM :
2500 I40E_TX_DESC_CMD_IIPT_IPV4;
89232c3b 2501 } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
475b4205 2502 cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
a3fd9d88
AD
2503
2504 exthdr = ip.hdr + sizeof(*ip.v6);
2505 l4_proto = ip.v6->nexthdr;
2506 if (l4.hdr != exthdr)
2507 ipv6_skip_exthdr(skb, exthdr - skb->data,
2508 &l4_proto, &frag_off);
fd0a05ce 2509 }
b96b78f2 2510
475b4205
AD
2511 /* compute inner L3 header size */
2512 offset |= ((l4.hdr - ip.hdr) / 4) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
fd0a05ce
JB
2513
2514 /* Enable L4 checksum offloads */
b96b78f2 2515 switch (l4_proto) {
fd0a05ce
JB
2516 case IPPROTO_TCP:
2517 /* enable checksum offloads */
475b4205
AD
2518 cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
2519 offset |= l4.tcp->doff << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
fd0a05ce
JB
2520 break;
2521 case IPPROTO_SCTP:
2522 /* enable SCTP checksum offload */
475b4205
AD
2523 cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
2524 offset |= (sizeof(struct sctphdr) >> 2) <<
2525 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
fd0a05ce
JB
2526 break;
2527 case IPPROTO_UDP:
2528 /* enable UDP checksum offload */
475b4205
AD
2529 cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
2530 offset |= (sizeof(struct udphdr) >> 2) <<
2531 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
fd0a05ce
JB
2532 break;
2533 default:
529f1f65
AD
2534 if (*tx_flags & I40E_TX_FLAGS_TSO)
2535 return -1;
2536 skb_checksum_help(skb);
2537 return 0;
fd0a05ce 2538 }
475b4205
AD
2539
2540 *td_cmd |= cmd;
2541 *td_offset |= offset;
529f1f65
AD
2542
2543 return 1;
fd0a05ce
JB
2544}
2545
2546/**
2547 * i40e_create_tx_ctx Build the Tx context descriptor
2548 * @tx_ring: ring to create the descriptor on
2549 * @cd_type_cmd_tso_mss: Quad Word 1
2550 * @cd_tunneling: Quad Word 0 - bits 0-31
2551 * @cd_l2tag2: Quad Word 0 - bits 32-63
2552 **/
2553static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
2554 const u64 cd_type_cmd_tso_mss,
2555 const u32 cd_tunneling, const u32 cd_l2tag2)
2556{
2557 struct i40e_tx_context_desc *context_desc;
fc4ac67b 2558 int i = tx_ring->next_to_use;
fd0a05ce 2559
ff40dd5d
JB
2560 if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) &&
2561 !cd_tunneling && !cd_l2tag2)
fd0a05ce
JB
2562 return;
2563
2564 /* grab the next descriptor */
fc4ac67b
AD
2565 context_desc = I40E_TX_CTXTDESC(tx_ring, i);
2566
2567 i++;
2568 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
fd0a05ce
JB
2569
2570 /* cpu_to_le32 and assign to struct fields */
2571 context_desc->tunneling_params = cpu_to_le32(cd_tunneling);
2572 context_desc->l2tag2 = cpu_to_le16(cd_l2tag2);
3efbbb20 2573 context_desc->rsvd = cpu_to_le16(0);
fd0a05ce
JB
2574 context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
2575}
2576
4567dc10
ED
2577/**
2578 * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions
2579 * @tx_ring: the ring to be checked
2580 * @size: the size buffer we want to assure is available
2581 *
2582 * Returns -EBUSY if a stop is needed, else 0
2583 **/
4ec441df 2584int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
4567dc10
ED
2585{
2586 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
2587 /* Memory barrier before checking head and tail */
2588 smp_mb();
2589
2590 /* Check again in a case another CPU has just made room available. */
2591 if (likely(I40E_DESC_UNUSED(tx_ring) < size))
2592 return -EBUSY;
2593
2594 /* A reprieve! - use start_queue because it doesn't call schedule */
2595 netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
2596 ++tx_ring->tx_stats.restart_queue;
2597 return 0;
2598}
2599
71da6197 2600/**
2d37490b 2601 * __i40e_chk_linearize - Check if there are more than 8 fragments per packet
71da6197 2602 * @skb: send buffer
71da6197
AS
2603 *
2604 * Note: Our HW can't scatter-gather more than 8 fragments to build
2605 * a packet on the wire and so we need to figure out the cases where we
2606 * need to linearize the skb.
2607 **/
2d37490b 2608bool __i40e_chk_linearize(struct sk_buff *skb)
71da6197 2609{
2d37490b
AD
2610 const struct skb_frag_struct *frag, *stale;
2611 int gso_size, nr_frags, sum;
71da6197 2612
2d37490b
AD
2613 /* check to see if TSO is enabled, if so we may get a repreive */
2614 gso_size = skb_shinfo(skb)->gso_size;
2615 if (unlikely(!gso_size))
2616 return true;
71da6197 2617
2d37490b
AD
2618 /* no need to check if number of frags is less than 8 */
2619 nr_frags = skb_shinfo(skb)->nr_frags;
2620 if (nr_frags < I40E_MAX_BUFFER_TXD)
2621 return false;
71da6197 2622
2d37490b
AD
2623 /* We need to walk through the list and validate that each group
2624 * of 6 fragments totals at least gso_size. However we don't need
2625 * to perform such validation on the first or last 6 since the first
2626 * 6 cannot inherit any data from a descriptor before them, and the
2627 * last 6 cannot inherit any data from a descriptor after them.
2628 */
2629 nr_frags -= I40E_MAX_BUFFER_TXD - 1;
2630 frag = &skb_shinfo(skb)->frags[0];
2631
2632 /* Initialize size to the negative value of gso_size minus 1. We
2633 * use this as the worst case scenerio in which the frag ahead
2634 * of us only provides one byte which is why we are limited to 6
2635 * descriptors for a single transmit as the header and previous
2636 * fragment are already consuming 2 descriptors.
2637 */
2638 sum = 1 - gso_size;
2639
2640 /* Add size of frags 1 through 5 to create our initial sum */
2641 sum += skb_frag_size(++frag);
2642 sum += skb_frag_size(++frag);
2643 sum += skb_frag_size(++frag);
2644 sum += skb_frag_size(++frag);
2645 sum += skb_frag_size(++frag);
2646
2647 /* Walk through fragments adding latest fragment, testing it, and
2648 * then removing stale fragments from the sum.
2649 */
2650 stale = &skb_shinfo(skb)->frags[0];
2651 for (;;) {
2652 sum += skb_frag_size(++frag);
2653
2654 /* if sum is negative we failed to make sufficient progress */
2655 if (sum < 0)
2656 return true;
2657
2658 /* use pre-decrement to avoid processing last fragment */
2659 if (!--nr_frags)
2660 break;
2661
2662 sum -= skb_frag_size(++stale);
71da6197
AS
2663 }
2664
2d37490b 2665 return false;
71da6197
AS
2666}
2667
fd0a05ce
JB
2668/**
2669 * i40e_tx_map - Build the Tx descriptor
2670 * @tx_ring: ring to send buffer on
2671 * @skb: send buffer
2672 * @first: first buffer info buffer to use
2673 * @tx_flags: collected send information
2674 * @hdr_len: size of the packet header
2675 * @td_cmd: the command field in the descriptor
2676 * @td_offset: offset for checksum or crc
2677 **/
38e00438 2678#ifdef I40E_FCOE
3e587cf3 2679inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
fd0a05ce
JB
2680 struct i40e_tx_buffer *first, u32 tx_flags,
2681 const u8 hdr_len, u32 td_cmd, u32 td_offset)
3e587cf3
JB
2682#else
2683static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2684 struct i40e_tx_buffer *first, u32 tx_flags,
2685 const u8 hdr_len, u32 td_cmd, u32 td_offset)
38e00438 2686#endif
fd0a05ce 2687{
fd0a05ce
JB
2688 unsigned int data_len = skb->data_len;
2689 unsigned int size = skb_headlen(skb);
a5e9c572 2690 struct skb_frag_struct *frag;
fd0a05ce
JB
2691 struct i40e_tx_buffer *tx_bi;
2692 struct i40e_tx_desc *tx_desc;
a5e9c572 2693 u16 i = tx_ring->next_to_use;
fd0a05ce
JB
2694 u32 td_tag = 0;
2695 dma_addr_t dma;
2696 u16 gso_segs;
58044743
AS
2697 u16 desc_count = 0;
2698 bool tail_bump = true;
2699 bool do_rs = false;
fd0a05ce 2700
fd0a05ce
JB
2701 if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
2702 td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
2703 td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >>
2704 I40E_TX_FLAGS_VLAN_SHIFT;
2705 }
2706
a5e9c572
AD
2707 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO))
2708 gso_segs = skb_shinfo(skb)->gso_segs;
2709 else
2710 gso_segs = 1;
2711
2712 /* multiply data chunks by size of headers */
2713 first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len);
2714 first->gso_segs = gso_segs;
2715 first->skb = skb;
2716 first->tx_flags = tx_flags;
2717
2718 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
2719
fd0a05ce 2720 tx_desc = I40E_TX_DESC(tx_ring, i);
a5e9c572
AD
2721 tx_bi = first;
2722
2723 for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
5c4654da
AD
2724 unsigned int max_data = I40E_MAX_DATA_PER_TXD_ALIGNED;
2725
a5e9c572
AD
2726 if (dma_mapping_error(tx_ring->dev, dma))
2727 goto dma_error;
2728
2729 /* record length, and DMA address */
2730 dma_unmap_len_set(tx_bi, len, size);
2731 dma_unmap_addr_set(tx_bi, dma, dma);
2732
5c4654da
AD
2733 /* align size to end of page */
2734 max_data += -dma & (I40E_MAX_READ_REQ_SIZE - 1);
a5e9c572
AD
2735 tx_desc->buffer_addr = cpu_to_le64(dma);
2736
2737 while (unlikely(size > I40E_MAX_DATA_PER_TXD)) {
fd0a05ce
JB
2738 tx_desc->cmd_type_offset_bsz =
2739 build_ctob(td_cmd, td_offset,
5c4654da 2740 max_data, td_tag);
fd0a05ce 2741
fd0a05ce
JB
2742 tx_desc++;
2743 i++;
58044743
AS
2744 desc_count++;
2745
fd0a05ce
JB
2746 if (i == tx_ring->count) {
2747 tx_desc = I40E_TX_DESC(tx_ring, 0);
2748 i = 0;
2749 }
fd0a05ce 2750
5c4654da
AD
2751 dma += max_data;
2752 size -= max_data;
fd0a05ce 2753
5c4654da 2754 max_data = I40E_MAX_DATA_PER_TXD_ALIGNED;
a5e9c572
AD
2755 tx_desc->buffer_addr = cpu_to_le64(dma);
2756 }
fd0a05ce
JB
2757
2758 if (likely(!data_len))
2759 break;
2760
a5e9c572
AD
2761 tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
2762 size, td_tag);
fd0a05ce
JB
2763
2764 tx_desc++;
2765 i++;
58044743
AS
2766 desc_count++;
2767
fd0a05ce
JB
2768 if (i == tx_ring->count) {
2769 tx_desc = I40E_TX_DESC(tx_ring, 0);
2770 i = 0;
2771 }
2772
a5e9c572
AD
2773 size = skb_frag_size(frag);
2774 data_len -= size;
fd0a05ce 2775
a5e9c572
AD
2776 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
2777 DMA_TO_DEVICE);
fd0a05ce 2778
a5e9c572
AD
2779 tx_bi = &tx_ring->tx_bi[i];
2780 }
fd0a05ce 2781
a5e9c572
AD
2782 /* set next_to_watch value indicating a packet is present */
2783 first->next_to_watch = tx_desc;
2784
2785 i++;
2786 if (i == tx_ring->count)
2787 i = 0;
2788
2789 tx_ring->next_to_use = i;
2790
58044743
AS
2791 netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
2792 tx_ring->queue_index),
2793 first->bytecount);
4567dc10 2794 i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
58044743
AS
2795
2796 /* Algorithm to optimize tail and RS bit setting:
2797 * if xmit_more is supported
2798 * if xmit_more is true
2799 * do not update tail and do not mark RS bit.
2800 * if xmit_more is false and last xmit_more was false
2801 * if every packet spanned less than 4 desc
2802 * then set RS bit on 4th packet and update tail
2803 * on every packet
2804 * else
2805 * update tail and set RS bit on every packet.
2806 * if xmit_more is false and last_xmit_more was true
2807 * update tail and set RS bit.
2808 *
2809 * Optimization: wmb to be issued only in case of tail update.
2810 * Also optimize the Descriptor WB path for RS bit with the same
2811 * algorithm.
2812 *
2813 * Note: If there are less than 4 packets
2814 * pending and interrupts were disabled the service task will
2815 * trigger a force WB.
2816 */
2817 if (skb->xmit_more &&
2818 !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
2819 tx_ring->queue_index))) {
2820 tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
2821 tail_bump = false;
2822 } else if (!skb->xmit_more &&
2823 !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
2824 tx_ring->queue_index)) &&
2825 (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) &&
2826 (tx_ring->packet_stride < WB_STRIDE) &&
2827 (desc_count < WB_STRIDE)) {
2828 tx_ring->packet_stride++;
2829 } else {
2830 tx_ring->packet_stride = 0;
2831 tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
2832 do_rs = true;
2833 }
2834 if (do_rs)
2835 tx_ring->packet_stride = 0;
2836
2837 tx_desc->cmd_type_offset_bsz =
2838 build_ctob(td_cmd, td_offset, size, td_tag) |
2839 cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD :
2840 I40E_TX_DESC_CMD_EOP) <<
2841 I40E_TXD_QW1_CMD_SHIFT);
2842
a5e9c572 2843 /* notify HW of packet */
58044743 2844 if (!tail_bump)
489ce7a4 2845 prefetchw(tx_desc + 1);
a5e9c572 2846
58044743
AS
2847 if (tail_bump) {
2848 /* Force memory writes to complete before letting h/w
2849 * know there are new descriptors to fetch. (Only
2850 * applicable for weak-ordered memory model archs,
2851 * such as IA-64).
2852 */
2853 wmb();
2854 writel(i, tx_ring->tail);
2855 }
2856
fd0a05ce
JB
2857 return;
2858
2859dma_error:
a5e9c572 2860 dev_info(tx_ring->dev, "TX DMA map failed\n");
fd0a05ce
JB
2861
2862 /* clear dma mappings for failed tx_bi map */
2863 for (;;) {
2864 tx_bi = &tx_ring->tx_bi[i];
a5e9c572 2865 i40e_unmap_and_free_tx_resource(tx_ring, tx_bi);
fd0a05ce
JB
2866 if (tx_bi == first)
2867 break;
2868 if (i == 0)
2869 i = tx_ring->count;
2870 i--;
2871 }
2872
fd0a05ce
JB
2873 tx_ring->next_to_use = i;
2874}
2875
fd0a05ce
JB
2876/**
2877 * i40e_xmit_frame_ring - Sends buffer on Tx ring
2878 * @skb: send buffer
2879 * @tx_ring: ring to send buffer on
2880 *
2881 * Returns NETDEV_TX_OK if sent, else an error code
2882 **/
2883static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
2884 struct i40e_ring *tx_ring)
2885{
2886 u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
2887 u32 cd_tunneling = 0, cd_l2tag2 = 0;
2888 struct i40e_tx_buffer *first;
2889 u32 td_offset = 0;
2890 u32 tx_flags = 0;
2891 __be16 protocol;
2892 u32 td_cmd = 0;
2893 u8 hdr_len = 0;
4ec441df 2894 int tso, count;
beb0dff1 2895 int tsyn;
6995b36c 2896
b74118f0
JB
2897 /* prefetch the data, we'll need it later */
2898 prefetch(skb->data);
2899
4ec441df 2900 count = i40e_xmit_descriptor_count(skb);
2d37490b
AD
2901 if (i40e_chk_linearize(skb, count)) {
2902 if (__skb_linearize(skb))
2903 goto out_drop;
5c4654da 2904 count = i40e_txd_use_count(skb->len);
2d37490b
AD
2905 tx_ring->tx_stats.tx_linearize++;
2906 }
4ec441df
AD
2907
2908 /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
2909 * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
2910 * + 4 desc gap to avoid the cache line where head is,
2911 * + 1 desc for context descriptor,
2912 * otherwise try next time
2913 */
2914 if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
2915 tx_ring->tx_stats.tx_busy++;
fd0a05ce 2916 return NETDEV_TX_BUSY;
4ec441df 2917 }
fd0a05ce
JB
2918
2919 /* prepare the xmit flags */
2920 if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
2921 goto out_drop;
2922
2923 /* obtain protocol of skb */
3d34dd03 2924 protocol = vlan_get_protocol(skb);
fd0a05ce
JB
2925
2926 /* record the location of the first descriptor for this packet */
2927 first = &tx_ring->tx_bi[tx_ring->next_to_use];
2928
2929 /* setup IPv4/IPv6 offloads */
0e2fe46c 2930 if (protocol == htons(ETH_P_IP))
fd0a05ce 2931 tx_flags |= I40E_TX_FLAGS_IPV4;
0e2fe46c 2932 else if (protocol == htons(ETH_P_IPV6))
fd0a05ce
JB
2933 tx_flags |= I40E_TX_FLAGS_IPV6;
2934
9c883bd3 2935 tso = i40e_tso(tx_ring, skb, &hdr_len, &cd_type_cmd_tso_mss);
fd0a05ce
JB
2936
2937 if (tso < 0)
2938 goto out_drop;
2939 else if (tso)
2940 tx_flags |= I40E_TX_FLAGS_TSO;
2941
3bc67973
AD
2942 /* Always offload the checksum, since it's in the data descriptor */
2943 tso = i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset,
2944 tx_ring, &cd_tunneling);
2945 if (tso < 0)
2946 goto out_drop;
2947
beb0dff1
JK
2948 tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss);
2949
2950 if (tsyn)
2951 tx_flags |= I40E_TX_FLAGS_TSYN;
2952
259afec7
JK
2953 skb_tx_timestamp(skb);
2954
b1941306
AD
2955 /* always enable CRC insertion offload */
2956 td_cmd |= I40E_TX_DESC_CMD_ICRC;
2957
fd0a05ce
JB
2958 i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
2959 cd_tunneling, cd_l2tag2);
2960
2961 /* Add Flow Director ATR if it's enabled.
2962 *
2963 * NOTE: this must always be directly before the data descriptor.
2964 */
6b037cd4 2965 i40e_atr(tx_ring, skb, tx_flags);
fd0a05ce
JB
2966
2967 i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
2968 td_cmd, td_offset);
2969
fd0a05ce
JB
2970 return NETDEV_TX_OK;
2971
2972out_drop:
2973 dev_kfree_skb_any(skb);
2974 return NETDEV_TX_OK;
2975}
2976
2977/**
2978 * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer
2979 * @skb: send buffer
2980 * @netdev: network interface device structure
2981 *
2982 * Returns NETDEV_TX_OK if sent, else an error code
2983 **/
2984netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
2985{
2986 struct i40e_netdev_priv *np = netdev_priv(netdev);
2987 struct i40e_vsi *vsi = np->vsi;
9f65e15b 2988 struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
fd0a05ce
JB
2989
2990 /* hardware can't handle really short frames, hardware padding works
2991 * beyond this point
2992 */
a94d9e22
AD
2993 if (skb_put_padto(skb, I40E_MIN_TX_LEN))
2994 return NETDEV_TX_OK;
fd0a05ce
JB
2995
2996 return i40e_xmit_frame_ring(skb, tx_ring);
2997}