1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * IBM Power Virtual Ethernet Device Driver
5 * Copyright (C) IBM Corporation, 2003, 2010
7 * Authors: Dave Larson <larson1@us.ibm.com>
8 * Santiago Leon <santil@linux.vnet.ibm.com>
9 * Brian King <brking@linux.vnet.ibm.com>
10 * Robert Jennings <rcj@linux.vnet.ibm.com>
11 * Anton Blanchard <anton@au.ibm.com>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/errno.h>
17 #include <linux/dma-mapping.h>
18 #include <linux/kernel.h>
19 #include <linux/netdevice.h>
20 #include <linux/etherdevice.h>
21 #include <linux/skbuff.h>
22 #include <linux/init.h>
23 #include <linux/interrupt.h>
26 #include <linux/ethtool.h>
29 #include <linux/ipv6.h>
30 #include <linux/slab.h>
31 #include <asm/hvcall.h>
32 #include <linux/atomic.h>
34 #include <asm/iommu.h>
35 #include <asm/firmware.h>
37 #include <net/ip6_checksum.h>
41 static irqreturn_t
ibmveth_interrupt(int irq
, void *dev_instance
);
42 static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter
*adapter
);
43 static unsigned long ibmveth_get_desired_dma(struct vio_dev
*vdev
);
45 static struct kobj_type ktype_veth_pool
;
48 static const char ibmveth_driver_name
[] = "ibmveth";
49 static const char ibmveth_driver_string
[] = "IBM Power Virtual Ethernet Driver";
50 #define ibmveth_driver_version "1.06"
52 MODULE_AUTHOR("Santiago Leon <santil@linux.vnet.ibm.com>");
53 MODULE_DESCRIPTION("IBM Power Virtual Ethernet Driver");
54 MODULE_LICENSE("GPL");
55 MODULE_VERSION(ibmveth_driver_version
);
57 static unsigned int tx_copybreak __read_mostly
= 128;
58 module_param(tx_copybreak
, uint
, 0644);
59 MODULE_PARM_DESC(tx_copybreak
,
60 "Maximum size of packet that is copied to a new buffer on transmit");
62 static unsigned int rx_copybreak __read_mostly
= 128;
63 module_param(rx_copybreak
, uint
, 0644);
64 MODULE_PARM_DESC(rx_copybreak
,
65 "Maximum size of packet that is copied to a new buffer on receive");
67 static unsigned int rx_flush __read_mostly
= 0;
68 module_param(rx_flush
, uint
, 0644);
69 MODULE_PARM_DESC(rx_flush
, "Flush receive buffers before use");
71 static bool old_large_send __read_mostly
;
72 module_param(old_large_send
, bool, 0444);
73 MODULE_PARM_DESC(old_large_send
,
74 "Use old large send method on firmware that supports the new method");
77 char name
[ETH_GSTRING_LEN
];
81 #define IBMVETH_STAT_OFF(stat) offsetof(struct ibmveth_adapter, stat)
82 #define IBMVETH_GET_STAT(a, off) *((u64 *)(((unsigned long)(a)) + off))
84 static struct ibmveth_stat ibmveth_stats
[] = {
85 { "replenish_task_cycles", IBMVETH_STAT_OFF(replenish_task_cycles
) },
86 { "replenish_no_mem", IBMVETH_STAT_OFF(replenish_no_mem
) },
87 { "replenish_add_buff_failure",
88 IBMVETH_STAT_OFF(replenish_add_buff_failure
) },
89 { "replenish_add_buff_success",
90 IBMVETH_STAT_OFF(replenish_add_buff_success
) },
91 { "rx_invalid_buffer", IBMVETH_STAT_OFF(rx_invalid_buffer
) },
92 { "rx_no_buffer", IBMVETH_STAT_OFF(rx_no_buffer
) },
93 { "tx_map_failed", IBMVETH_STAT_OFF(tx_map_failed
) },
94 { "tx_send_failed", IBMVETH_STAT_OFF(tx_send_failed
) },
95 { "fw_enabled_ipv4_csum", IBMVETH_STAT_OFF(fw_ipv4_csum_support
) },
96 { "fw_enabled_ipv6_csum", IBMVETH_STAT_OFF(fw_ipv6_csum_support
) },
97 { "tx_large_packets", IBMVETH_STAT_OFF(tx_large_packets
) },
98 { "rx_large_packets", IBMVETH_STAT_OFF(rx_large_packets
) },
99 { "fw_enabled_large_send", IBMVETH_STAT_OFF(fw_large_send_support
) }
102 /* simple methods of getting data from the current rxq entry */
103 static inline u32
ibmveth_rxq_flags(struct ibmveth_adapter
*adapter
)
105 return be32_to_cpu(adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].flags_off
);
108 static inline int ibmveth_rxq_toggle(struct ibmveth_adapter
*adapter
)
110 return (ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_TOGGLE
) >>
111 IBMVETH_RXQ_TOGGLE_SHIFT
;
114 static inline int ibmveth_rxq_pending_buffer(struct ibmveth_adapter
*adapter
)
116 return ibmveth_rxq_toggle(adapter
) == adapter
->rx_queue
.toggle
;
119 static inline int ibmveth_rxq_buffer_valid(struct ibmveth_adapter
*adapter
)
121 return ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_VALID
;
124 static inline int ibmveth_rxq_frame_offset(struct ibmveth_adapter
*adapter
)
126 return ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_OFF_MASK
;
129 static inline int ibmveth_rxq_large_packet(struct ibmveth_adapter
*adapter
)
131 return ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_LRG_PKT
;
134 static inline int ibmveth_rxq_frame_length(struct ibmveth_adapter
*adapter
)
136 return be32_to_cpu(adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].length
);
139 static inline int ibmveth_rxq_csum_good(struct ibmveth_adapter
*adapter
)
141 return ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_CSUM_GOOD
;
144 /* setup the initial settings for a buffer pool */
145 static void ibmveth_init_buffer_pool(struct ibmveth_buff_pool
*pool
,
146 u32 pool_index
, u32 pool_size
,
147 u32 buff_size
, u32 pool_active
)
149 pool
->size
= pool_size
;
150 pool
->index
= pool_index
;
151 pool
->buff_size
= buff_size
;
152 pool
->threshold
= pool_size
* 7 / 8;
153 pool
->active
= pool_active
;
156 /* allocate and setup an buffer pool - called during open */
157 static int ibmveth_alloc_buffer_pool(struct ibmveth_buff_pool
*pool
)
161 pool
->free_map
= kmalloc_array(pool
->size
, sizeof(u16
), GFP_KERNEL
);
166 pool
->dma_addr
= kcalloc(pool
->size
, sizeof(dma_addr_t
), GFP_KERNEL
);
167 if (!pool
->dma_addr
) {
168 kfree(pool
->free_map
);
169 pool
->free_map
= NULL
;
173 pool
->skbuff
= kcalloc(pool
->size
, sizeof(void *), GFP_KERNEL
);
176 kfree(pool
->dma_addr
);
177 pool
->dma_addr
= NULL
;
179 kfree(pool
->free_map
);
180 pool
->free_map
= NULL
;
184 for (i
= 0; i
< pool
->size
; ++i
)
185 pool
->free_map
[i
] = i
;
187 atomic_set(&pool
->available
, 0);
188 pool
->producer_index
= 0;
189 pool
->consumer_index
= 0;
194 static inline void ibmveth_flush_buffer(void *addr
, unsigned long length
)
196 unsigned long offset
;
198 for (offset
= 0; offset
< length
; offset
+= SMP_CACHE_BYTES
)
199 asm("dcbfl %0,%1" :: "b" (addr
), "r" (offset
));
202 /* replenish the buffers for a pool. note that we don't need to
203 * skb_reserve these since they are used for incoming...
205 static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter
*adapter
,
206 struct ibmveth_buff_pool
*pool
)
209 u32 count
= pool
->size
- atomic_read(&pool
->available
);
210 u32 buffers_added
= 0;
212 unsigned int free_index
, index
;
214 unsigned long lpar_rc
;
219 for (i
= 0; i
< count
; ++i
) {
220 union ibmveth_buf_desc desc
;
222 skb
= netdev_alloc_skb(adapter
->netdev
, pool
->buff_size
);
225 netdev_dbg(adapter
->netdev
,
226 "replenish: unable to allocate skb\n");
227 adapter
->replenish_no_mem
++;
231 free_index
= pool
->consumer_index
;
232 pool
->consumer_index
++;
233 if (pool
->consumer_index
>= pool
->size
)
234 pool
->consumer_index
= 0;
235 index
= pool
->free_map
[free_index
];
237 BUG_ON(index
== IBM_VETH_INVALID_MAP
);
238 BUG_ON(pool
->skbuff
[index
] != NULL
);
240 dma_addr
= dma_map_single(&adapter
->vdev
->dev
, skb
->data
,
241 pool
->buff_size
, DMA_FROM_DEVICE
);
243 if (dma_mapping_error(&adapter
->vdev
->dev
, dma_addr
))
246 pool
->free_map
[free_index
] = IBM_VETH_INVALID_MAP
;
247 pool
->dma_addr
[index
] = dma_addr
;
248 pool
->skbuff
[index
] = skb
;
250 correlator
= ((u64
)pool
->index
<< 32) | index
;
251 *(u64
*)skb
->data
= correlator
;
253 desc
.fields
.flags_len
= IBMVETH_BUF_VALID
| pool
->buff_size
;
254 desc
.fields
.address
= dma_addr
;
257 unsigned int len
= min(pool
->buff_size
,
258 adapter
->netdev
->mtu
+
260 ibmveth_flush_buffer(skb
->data
, len
);
262 lpar_rc
= h_add_logical_lan_buffer(adapter
->vdev
->unit_address
,
265 if (lpar_rc
!= H_SUCCESS
) {
269 adapter
->replenish_add_buff_success
++;
274 atomic_add(buffers_added
, &(pool
->available
));
278 pool
->free_map
[free_index
] = index
;
279 pool
->skbuff
[index
] = NULL
;
280 if (pool
->consumer_index
== 0)
281 pool
->consumer_index
= pool
->size
- 1;
283 pool
->consumer_index
--;
284 if (!dma_mapping_error(&adapter
->vdev
->dev
, dma_addr
))
285 dma_unmap_single(&adapter
->vdev
->dev
,
286 pool
->dma_addr
[index
], pool
->buff_size
,
288 dev_kfree_skb_any(skb
);
289 adapter
->replenish_add_buff_failure
++;
292 atomic_add(buffers_added
, &(pool
->available
));
296 * The final 8 bytes of the buffer list is a counter of frames dropped
297 * because there was not a buffer in the buffer list capable of holding
300 static void ibmveth_update_rx_no_buffer(struct ibmveth_adapter
*adapter
)
302 __be64
*p
= adapter
->buffer_list_addr
+ 4096 - 8;
304 adapter
->rx_no_buffer
= be64_to_cpup(p
);
307 /* replenish routine */
308 static void ibmveth_replenish_task(struct ibmveth_adapter
*adapter
)
312 adapter
->replenish_task_cycles
++;
314 for (i
= (IBMVETH_NUM_BUFF_POOLS
- 1); i
>= 0; i
--) {
315 struct ibmveth_buff_pool
*pool
= &adapter
->rx_buff_pool
[i
];
318 (atomic_read(&pool
->available
) < pool
->threshold
))
319 ibmveth_replenish_buffer_pool(adapter
, pool
);
322 ibmveth_update_rx_no_buffer(adapter
);
325 /* empty and free ana buffer pool - also used to do cleanup in error paths */
326 static void ibmveth_free_buffer_pool(struct ibmveth_adapter
*adapter
,
327 struct ibmveth_buff_pool
*pool
)
331 kfree(pool
->free_map
);
332 pool
->free_map
= NULL
;
334 if (pool
->skbuff
&& pool
->dma_addr
) {
335 for (i
= 0; i
< pool
->size
; ++i
) {
336 struct sk_buff
*skb
= pool
->skbuff
[i
];
338 dma_unmap_single(&adapter
->vdev
->dev
,
342 dev_kfree_skb_any(skb
);
343 pool
->skbuff
[i
] = NULL
;
348 if (pool
->dma_addr
) {
349 kfree(pool
->dma_addr
);
350 pool
->dma_addr
= NULL
;
359 /* remove a buffer from a pool */
360 static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter
*adapter
,
363 unsigned int pool
= correlator
>> 32;
364 unsigned int index
= correlator
& 0xffffffffUL
;
365 unsigned int free_index
;
368 BUG_ON(pool
>= IBMVETH_NUM_BUFF_POOLS
);
369 BUG_ON(index
>= adapter
->rx_buff_pool
[pool
].size
);
371 skb
= adapter
->rx_buff_pool
[pool
].skbuff
[index
];
375 adapter
->rx_buff_pool
[pool
].skbuff
[index
] = NULL
;
377 dma_unmap_single(&adapter
->vdev
->dev
,
378 adapter
->rx_buff_pool
[pool
].dma_addr
[index
],
379 adapter
->rx_buff_pool
[pool
].buff_size
,
382 free_index
= adapter
->rx_buff_pool
[pool
].producer_index
;
383 adapter
->rx_buff_pool
[pool
].producer_index
++;
384 if (adapter
->rx_buff_pool
[pool
].producer_index
>=
385 adapter
->rx_buff_pool
[pool
].size
)
386 adapter
->rx_buff_pool
[pool
].producer_index
= 0;
387 adapter
->rx_buff_pool
[pool
].free_map
[free_index
] = index
;
391 atomic_dec(&(adapter
->rx_buff_pool
[pool
].available
));
394 /* get the current buffer on the rx queue */
395 static inline struct sk_buff
*ibmveth_rxq_get_buffer(struct ibmveth_adapter
*adapter
)
397 u64 correlator
= adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].correlator
;
398 unsigned int pool
= correlator
>> 32;
399 unsigned int index
= correlator
& 0xffffffffUL
;
401 BUG_ON(pool
>= IBMVETH_NUM_BUFF_POOLS
);
402 BUG_ON(index
>= adapter
->rx_buff_pool
[pool
].size
);
404 return adapter
->rx_buff_pool
[pool
].skbuff
[index
];
407 /* recycle the current buffer on the rx queue */
408 static int ibmveth_rxq_recycle_buffer(struct ibmveth_adapter
*adapter
)
410 u32 q_index
= adapter
->rx_queue
.index
;
411 u64 correlator
= adapter
->rx_queue
.queue_addr
[q_index
].correlator
;
412 unsigned int pool
= correlator
>> 32;
413 unsigned int index
= correlator
& 0xffffffffUL
;
414 union ibmveth_buf_desc desc
;
415 unsigned long lpar_rc
;
418 BUG_ON(pool
>= IBMVETH_NUM_BUFF_POOLS
);
419 BUG_ON(index
>= adapter
->rx_buff_pool
[pool
].size
);
421 if (!adapter
->rx_buff_pool
[pool
].active
) {
422 ibmveth_rxq_harvest_buffer(adapter
);
423 ibmveth_free_buffer_pool(adapter
, &adapter
->rx_buff_pool
[pool
]);
427 desc
.fields
.flags_len
= IBMVETH_BUF_VALID
|
428 adapter
->rx_buff_pool
[pool
].buff_size
;
429 desc
.fields
.address
= adapter
->rx_buff_pool
[pool
].dma_addr
[index
];
431 lpar_rc
= h_add_logical_lan_buffer(adapter
->vdev
->unit_address
, desc
.desc
);
433 if (lpar_rc
!= H_SUCCESS
) {
434 netdev_dbg(adapter
->netdev
, "h_add_logical_lan_buffer failed "
435 "during recycle rc=%ld", lpar_rc
);
436 ibmveth_remove_buffer_from_pool(adapter
, adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].correlator
);
440 if (++adapter
->rx_queue
.index
== adapter
->rx_queue
.num_slots
) {
441 adapter
->rx_queue
.index
= 0;
442 adapter
->rx_queue
.toggle
= !adapter
->rx_queue
.toggle
;
449 static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter
*adapter
)
451 ibmveth_remove_buffer_from_pool(adapter
, adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].correlator
);
453 if (++adapter
->rx_queue
.index
== adapter
->rx_queue
.num_slots
) {
454 adapter
->rx_queue
.index
= 0;
455 adapter
->rx_queue
.toggle
= !adapter
->rx_queue
.toggle
;
459 static int ibmveth_register_logical_lan(struct ibmveth_adapter
*adapter
,
460 union ibmveth_buf_desc rxq_desc
, u64 mac_address
)
462 int rc
, try_again
= 1;
465 * After a kexec the adapter will still be open, so our attempt to
466 * open it will fail. So if we get a failure we free the adapter and
467 * try again, but only once.
470 rc
= h_register_logical_lan(adapter
->vdev
->unit_address
,
471 adapter
->buffer_list_dma
, rxq_desc
.desc
,
472 adapter
->filter_list_dma
, mac_address
);
474 if (rc
!= H_SUCCESS
&& try_again
) {
476 rc
= h_free_logical_lan(adapter
->vdev
->unit_address
);
477 } while (H_IS_LONG_BUSY(rc
) || (rc
== H_BUSY
));
486 static u64
ibmveth_encode_mac_addr(u8
*mac
)
491 for (i
= 0; i
< ETH_ALEN
; i
++)
492 encoded
= (encoded
<< 8) | mac
[i
];
497 static int ibmveth_open(struct net_device
*netdev
)
499 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
502 unsigned long lpar_rc
;
504 union ibmveth_buf_desc rxq_desc
;
508 netdev_dbg(netdev
, "open starting\n");
510 napi_enable(&adapter
->napi
);
512 for(i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++)
513 rxq_entries
+= adapter
->rx_buff_pool
[i
].size
;
516 adapter
->buffer_list_addr
= (void*) get_zeroed_page(GFP_KERNEL
);
517 if (!adapter
->buffer_list_addr
) {
518 netdev_err(netdev
, "unable to allocate list pages\n");
522 adapter
->filter_list_addr
= (void*) get_zeroed_page(GFP_KERNEL
);
523 if (!adapter
->filter_list_addr
) {
524 netdev_err(netdev
, "unable to allocate filter pages\n");
525 goto out_free_buffer_list
;
528 dev
= &adapter
->vdev
->dev
;
530 adapter
->rx_queue
.queue_len
= sizeof(struct ibmveth_rx_q_entry
) *
532 adapter
->rx_queue
.queue_addr
=
533 dma_alloc_coherent(dev
, adapter
->rx_queue
.queue_len
,
534 &adapter
->rx_queue
.queue_dma
, GFP_KERNEL
);
535 if (!adapter
->rx_queue
.queue_addr
)
536 goto out_free_filter_list
;
538 adapter
->buffer_list_dma
= dma_map_single(dev
,
539 adapter
->buffer_list_addr
, 4096, DMA_BIDIRECTIONAL
);
540 if (dma_mapping_error(dev
, adapter
->buffer_list_dma
)) {
541 netdev_err(netdev
, "unable to map buffer list pages\n");
542 goto out_free_queue_mem
;
545 adapter
->filter_list_dma
= dma_map_single(dev
,
546 adapter
->filter_list_addr
, 4096, DMA_BIDIRECTIONAL
);
547 if (dma_mapping_error(dev
, adapter
->filter_list_dma
)) {
548 netdev_err(netdev
, "unable to map filter list pages\n");
549 goto out_unmap_buffer_list
;
552 adapter
->rx_queue
.index
= 0;
553 adapter
->rx_queue
.num_slots
= rxq_entries
;
554 adapter
->rx_queue
.toggle
= 1;
556 mac_address
= ibmveth_encode_mac_addr(netdev
->dev_addr
);
558 rxq_desc
.fields
.flags_len
= IBMVETH_BUF_VALID
|
559 adapter
->rx_queue
.queue_len
;
560 rxq_desc
.fields
.address
= adapter
->rx_queue
.queue_dma
;
562 netdev_dbg(netdev
, "buffer list @ 0x%p\n", adapter
->buffer_list_addr
);
563 netdev_dbg(netdev
, "filter list @ 0x%p\n", adapter
->filter_list_addr
);
564 netdev_dbg(netdev
, "receive q @ 0x%p\n", adapter
->rx_queue
.queue_addr
);
566 h_vio_signal(adapter
->vdev
->unit_address
, VIO_IRQ_DISABLE
);
568 lpar_rc
= ibmveth_register_logical_lan(adapter
, rxq_desc
, mac_address
);
570 if (lpar_rc
!= H_SUCCESS
) {
571 netdev_err(netdev
, "h_register_logical_lan failed with %ld\n",
573 netdev_err(netdev
, "buffer TCE:0x%llx filter TCE:0x%llx rxq "
574 "desc:0x%llx MAC:0x%llx\n",
575 adapter
->buffer_list_dma
,
576 adapter
->filter_list_dma
,
580 goto out_unmap_filter_list
;
583 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
584 if (!adapter
->rx_buff_pool
[i
].active
)
586 if (ibmveth_alloc_buffer_pool(&adapter
->rx_buff_pool
[i
])) {
587 netdev_err(netdev
, "unable to alloc pool\n");
588 adapter
->rx_buff_pool
[i
].active
= 0;
590 goto out_free_buffer_pools
;
594 netdev_dbg(netdev
, "registering irq 0x%x\n", netdev
->irq
);
595 rc
= request_irq(netdev
->irq
, ibmveth_interrupt
, 0, netdev
->name
,
598 netdev_err(netdev
, "unable to request irq 0x%x, rc %d\n",
601 lpar_rc
= h_free_logical_lan(adapter
->vdev
->unit_address
);
602 } while (H_IS_LONG_BUSY(lpar_rc
) || (lpar_rc
== H_BUSY
));
604 goto out_free_buffer_pools
;
608 adapter
->bounce_buffer
=
609 kmalloc(netdev
->mtu
+ IBMVETH_BUFF_OH
, GFP_KERNEL
);
610 if (!adapter
->bounce_buffer
)
613 adapter
->bounce_buffer_dma
=
614 dma_map_single(&adapter
->vdev
->dev
, adapter
->bounce_buffer
,
615 netdev
->mtu
+ IBMVETH_BUFF_OH
, DMA_BIDIRECTIONAL
);
616 if (dma_mapping_error(dev
, adapter
->bounce_buffer_dma
)) {
617 netdev_err(netdev
, "unable to map bounce buffer\n");
618 goto out_free_bounce_buffer
;
621 netdev_dbg(netdev
, "initial replenish cycle\n");
622 ibmveth_interrupt(netdev
->irq
, netdev
);
624 netif_start_queue(netdev
);
626 netdev_dbg(netdev
, "open complete\n");
630 out_free_bounce_buffer
:
631 kfree(adapter
->bounce_buffer
);
633 free_irq(netdev
->irq
, netdev
);
634 out_free_buffer_pools
:
636 if (adapter
->rx_buff_pool
[i
].active
)
637 ibmveth_free_buffer_pool(adapter
,
638 &adapter
->rx_buff_pool
[i
]);
640 out_unmap_filter_list
:
641 dma_unmap_single(dev
, adapter
->filter_list_dma
, 4096,
643 out_unmap_buffer_list
:
644 dma_unmap_single(dev
, adapter
->buffer_list_dma
, 4096,
647 dma_free_coherent(dev
, adapter
->rx_queue
.queue_len
,
648 adapter
->rx_queue
.queue_addr
,
649 adapter
->rx_queue
.queue_dma
);
650 out_free_filter_list
:
651 free_page((unsigned long)adapter
->filter_list_addr
);
652 out_free_buffer_list
:
653 free_page((unsigned long)adapter
->buffer_list_addr
);
655 napi_disable(&adapter
->napi
);
659 static int ibmveth_close(struct net_device
*netdev
)
661 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
662 struct device
*dev
= &adapter
->vdev
->dev
;
666 netdev_dbg(netdev
, "close starting\n");
668 napi_disable(&adapter
->napi
);
670 if (!adapter
->pool_config
)
671 netif_stop_queue(netdev
);
673 h_vio_signal(adapter
->vdev
->unit_address
, VIO_IRQ_DISABLE
);
676 lpar_rc
= h_free_logical_lan(adapter
->vdev
->unit_address
);
677 } while (H_IS_LONG_BUSY(lpar_rc
) || (lpar_rc
== H_BUSY
));
679 if (lpar_rc
!= H_SUCCESS
) {
680 netdev_err(netdev
, "h_free_logical_lan failed with %lx, "
681 "continuing with close\n", lpar_rc
);
684 free_irq(netdev
->irq
, netdev
);
686 ibmveth_update_rx_no_buffer(adapter
);
688 dma_unmap_single(dev
, adapter
->buffer_list_dma
, 4096,
690 free_page((unsigned long)adapter
->buffer_list_addr
);
692 dma_unmap_single(dev
, adapter
->filter_list_dma
, 4096,
694 free_page((unsigned long)adapter
->filter_list_addr
);
696 dma_free_coherent(dev
, adapter
->rx_queue
.queue_len
,
697 adapter
->rx_queue
.queue_addr
,
698 adapter
->rx_queue
.queue_dma
);
700 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++)
701 if (adapter
->rx_buff_pool
[i
].active
)
702 ibmveth_free_buffer_pool(adapter
,
703 &adapter
->rx_buff_pool
[i
]);
705 dma_unmap_single(&adapter
->vdev
->dev
, adapter
->bounce_buffer_dma
,
706 adapter
->netdev
->mtu
+ IBMVETH_BUFF_OH
,
708 kfree(adapter
->bounce_buffer
);
710 netdev_dbg(netdev
, "close complete\n");
715 static int ibmveth_set_link_ksettings(struct net_device
*dev
,
716 const struct ethtool_link_ksettings
*cmd
)
718 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
720 return ethtool_virtdev_set_link_ksettings(dev
, cmd
,
725 static int ibmveth_get_link_ksettings(struct net_device
*dev
,
726 struct ethtool_link_ksettings
*cmd
)
728 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
730 cmd
->base
.speed
= adapter
->speed
;
731 cmd
->base
.duplex
= adapter
->duplex
;
732 cmd
->base
.port
= PORT_OTHER
;
737 static void ibmveth_init_link_settings(struct net_device
*dev
)
739 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
741 adapter
->speed
= SPEED_1000
;
742 adapter
->duplex
= DUPLEX_FULL
;
745 static void netdev_get_drvinfo(struct net_device
*dev
,
746 struct ethtool_drvinfo
*info
)
748 strlcpy(info
->driver
, ibmveth_driver_name
, sizeof(info
->driver
));
749 strlcpy(info
->version
, ibmveth_driver_version
, sizeof(info
->version
));
752 static netdev_features_t
ibmveth_fix_features(struct net_device
*dev
,
753 netdev_features_t features
)
756 * Since the ibmveth firmware interface does not have the
757 * concept of separate tx/rx checksum offload enable, if rx
758 * checksum is disabled we also have to disable tx checksum
759 * offload. Once we disable rx checksum offload, we are no
760 * longer allowed to send tx buffers that are not properly
764 if (!(features
& NETIF_F_RXCSUM
))
765 features
&= ~NETIF_F_CSUM_MASK
;
770 static int ibmveth_set_csum_offload(struct net_device
*dev
, u32 data
)
772 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
773 unsigned long set_attr
, clr_attr
, ret_attr
;
774 unsigned long set_attr6
, clr_attr6
;
775 long ret
, ret4
, ret6
;
776 int rc1
= 0, rc2
= 0;
779 if (netif_running(dev
)) {
781 adapter
->pool_config
= 1;
783 adapter
->pool_config
= 0;
792 set_attr
= IBMVETH_ILLAN_IPV4_TCP_CSUM
;
793 set_attr6
= IBMVETH_ILLAN_IPV6_TCP_CSUM
;
795 clr_attr
= IBMVETH_ILLAN_IPV4_TCP_CSUM
;
796 clr_attr6
= IBMVETH_ILLAN_IPV6_TCP_CSUM
;
799 ret
= h_illan_attributes(adapter
->vdev
->unit_address
, 0, 0, &ret_attr
);
801 if (ret
== H_SUCCESS
&&
802 (ret_attr
& IBMVETH_ILLAN_PADDED_PKT_CSUM
)) {
803 ret4
= h_illan_attributes(adapter
->vdev
->unit_address
, clr_attr
,
804 set_attr
, &ret_attr
);
806 if (ret4
!= H_SUCCESS
) {
807 netdev_err(dev
, "unable to change IPv4 checksum "
808 "offload settings. %d rc=%ld\n",
811 h_illan_attributes(adapter
->vdev
->unit_address
,
812 set_attr
, clr_attr
, &ret_attr
);
815 dev
->features
&= ~NETIF_F_IP_CSUM
;
818 adapter
->fw_ipv4_csum_support
= data
;
821 ret6
= h_illan_attributes(adapter
->vdev
->unit_address
,
822 clr_attr6
, set_attr6
, &ret_attr
);
824 if (ret6
!= H_SUCCESS
) {
825 netdev_err(dev
, "unable to change IPv6 checksum "
826 "offload settings. %d rc=%ld\n",
829 h_illan_attributes(adapter
->vdev
->unit_address
,
830 set_attr6
, clr_attr6
, &ret_attr
);
833 dev
->features
&= ~NETIF_F_IPV6_CSUM
;
836 adapter
->fw_ipv6_csum_support
= data
;
838 if (ret4
== H_SUCCESS
|| ret6
== H_SUCCESS
)
839 adapter
->rx_csum
= data
;
844 netdev_err(dev
, "unable to change checksum offload settings."
845 " %d rc=%ld ret_attr=%lx\n", data
, ret
,
850 rc2
= ibmveth_open(dev
);
852 return rc1
? rc1
: rc2
;
855 static int ibmveth_set_tso(struct net_device
*dev
, u32 data
)
857 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
858 unsigned long set_attr
, clr_attr
, ret_attr
;
860 int rc1
= 0, rc2
= 0;
863 if (netif_running(dev
)) {
865 adapter
->pool_config
= 1;
867 adapter
->pool_config
= 0;
874 set_attr
= IBMVETH_ILLAN_LRG_SR_ENABLED
;
876 clr_attr
= IBMVETH_ILLAN_LRG_SR_ENABLED
;
878 ret1
= h_illan_attributes(adapter
->vdev
->unit_address
, 0, 0, &ret_attr
);
880 if (ret1
== H_SUCCESS
&& (ret_attr
& IBMVETH_ILLAN_LRG_SND_SUPPORT
) &&
882 ret2
= h_illan_attributes(adapter
->vdev
->unit_address
, clr_attr
,
883 set_attr
, &ret_attr
);
885 if (ret2
!= H_SUCCESS
) {
886 netdev_err(dev
, "unable to change tso settings. %d rc=%ld\n",
889 h_illan_attributes(adapter
->vdev
->unit_address
,
890 set_attr
, clr_attr
, &ret_attr
);
893 dev
->features
&= ~(NETIF_F_TSO
| NETIF_F_TSO6
);
897 adapter
->fw_large_send_support
= data
;
898 adapter
->large_send
= data
;
901 /* Older firmware version of large send offload does not
905 dev
->features
&= ~NETIF_F_TSO6
;
906 netdev_info(dev
, "TSO feature requires all partitions to have updated driver");
908 adapter
->large_send
= data
;
912 rc2
= ibmveth_open(dev
);
914 return rc1
? rc1
: rc2
;
917 static int ibmveth_set_features(struct net_device
*dev
,
918 netdev_features_t features
)
920 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
921 int rx_csum
= !!(features
& NETIF_F_RXCSUM
);
922 int large_send
= !!(features
& (NETIF_F_TSO
| NETIF_F_TSO6
));
923 int rc1
= 0, rc2
= 0;
925 if (rx_csum
!= adapter
->rx_csum
) {
926 rc1
= ibmveth_set_csum_offload(dev
, rx_csum
);
927 if (rc1
&& !adapter
->rx_csum
)
929 features
& ~(NETIF_F_CSUM_MASK
|
933 if (large_send
!= adapter
->large_send
) {
934 rc2
= ibmveth_set_tso(dev
, large_send
);
935 if (rc2
&& !adapter
->large_send
)
937 features
& ~(NETIF_F_TSO
| NETIF_F_TSO6
);
940 return rc1
? rc1
: rc2
;
943 static void ibmveth_get_strings(struct net_device
*dev
, u32 stringset
, u8
*data
)
947 if (stringset
!= ETH_SS_STATS
)
950 for (i
= 0; i
< ARRAY_SIZE(ibmveth_stats
); i
++, data
+= ETH_GSTRING_LEN
)
951 memcpy(data
, ibmveth_stats
[i
].name
, ETH_GSTRING_LEN
);
954 static int ibmveth_get_sset_count(struct net_device
*dev
, int sset
)
958 return ARRAY_SIZE(ibmveth_stats
);
964 static void ibmveth_get_ethtool_stats(struct net_device
*dev
,
965 struct ethtool_stats
*stats
, u64
*data
)
968 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
970 for (i
= 0; i
< ARRAY_SIZE(ibmveth_stats
); i
++)
971 data
[i
] = IBMVETH_GET_STAT(adapter
, ibmveth_stats
[i
].offset
);
974 static const struct ethtool_ops netdev_ethtool_ops
= {
975 .get_drvinfo
= netdev_get_drvinfo
,
976 .get_link
= ethtool_op_get_link
,
977 .get_strings
= ibmveth_get_strings
,
978 .get_sset_count
= ibmveth_get_sset_count
,
979 .get_ethtool_stats
= ibmveth_get_ethtool_stats
,
980 .get_link_ksettings
= ibmveth_get_link_ksettings
,
981 .set_link_ksettings
= ibmveth_set_link_ksettings
,
984 static int ibmveth_ioctl(struct net_device
*dev
, struct ifreq
*ifr
, int cmd
)
989 static int ibmveth_send(struct ibmveth_adapter
*adapter
,
990 union ibmveth_buf_desc
*descs
, unsigned long mss
)
992 unsigned long correlator
;
993 unsigned int retry_count
;
997 * The retry count sets a maximum for the number of broadcast and
998 * multicast destinations within the system.
1003 ret
= h_send_logical_lan(adapter
->vdev
->unit_address
,
1004 descs
[0].desc
, descs
[1].desc
,
1005 descs
[2].desc
, descs
[3].desc
,
1006 descs
[4].desc
, descs
[5].desc
,
1007 correlator
, &correlator
, mss
,
1008 adapter
->fw_large_send_support
);
1009 } while ((ret
== H_BUSY
) && (retry_count
--));
1011 if (ret
!= H_SUCCESS
&& ret
!= H_DROPPED
) {
1012 netdev_err(adapter
->netdev
, "tx: h_send_logical_lan failed "
1013 "with rc=%ld\n", ret
);
1020 static int ibmveth_is_packet_unsupported(struct sk_buff
*skb
,
1021 struct net_device
*netdev
)
1023 struct ethhdr
*ether_header
;
1026 ether_header
= eth_hdr(skb
);
1028 if (ether_addr_equal(ether_header
->h_dest
, netdev
->dev_addr
)) {
1029 netdev_dbg(netdev
, "veth doesn't support loopback packets, dropping packet.\n");
1030 netdev
->stats
.tx_dropped
++;
1037 static netdev_tx_t
ibmveth_start_xmit(struct sk_buff
*skb
,
1038 struct net_device
*netdev
)
1040 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1041 unsigned int desc_flags
;
1042 union ibmveth_buf_desc descs
[6];
1044 int force_bounce
= 0;
1045 dma_addr_t dma_addr
;
1046 unsigned long mss
= 0;
1048 if (ibmveth_is_packet_unsupported(skb
, netdev
))
1051 /* veth doesn't handle frag_list, so linearize the skb.
1052 * When GRO is enabled SKB's can have frag_list.
1054 if (adapter
->is_active_trunk
&&
1055 skb_has_frag_list(skb
) && __skb_linearize(skb
)) {
1056 netdev
->stats
.tx_dropped
++;
1061 * veth handles a maximum of 6 segments including the header, so
1062 * we have to linearize the skb if there are more than this.
1064 if (skb_shinfo(skb
)->nr_frags
> 5 && __skb_linearize(skb
)) {
1065 netdev
->stats
.tx_dropped
++;
1069 /* veth can't checksum offload UDP */
1070 if (skb
->ip_summed
== CHECKSUM_PARTIAL
&&
1071 ((skb
->protocol
== htons(ETH_P_IP
) &&
1072 ip_hdr(skb
)->protocol
!= IPPROTO_TCP
) ||
1073 (skb
->protocol
== htons(ETH_P_IPV6
) &&
1074 ipv6_hdr(skb
)->nexthdr
!= IPPROTO_TCP
)) &&
1075 skb_checksum_help(skb
)) {
1077 netdev_err(netdev
, "tx: failed to checksum packet\n");
1078 netdev
->stats
.tx_dropped
++;
1082 desc_flags
= IBMVETH_BUF_VALID
;
1084 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) {
1085 unsigned char *buf
= skb_transport_header(skb
) +
1088 desc_flags
|= (IBMVETH_BUF_NO_CSUM
| IBMVETH_BUF_CSUM_GOOD
);
1090 /* Need to zero out the checksum */
1094 if (skb_is_gso(skb
) && adapter
->fw_large_send_support
)
1095 desc_flags
|= IBMVETH_BUF_LRG_SND
;
1099 memset(descs
, 0, sizeof(descs
));
1102 * If a linear packet is below the rx threshold then
1103 * copy it into the static bounce buffer. This avoids the
1104 * cost of a TCE insert and remove.
1106 if (force_bounce
|| (!skb_is_nonlinear(skb
) &&
1107 (skb
->len
< tx_copybreak
))) {
1108 skb_copy_from_linear_data(skb
, adapter
->bounce_buffer
,
1111 descs
[0].fields
.flags_len
= desc_flags
| skb
->len
;
1112 descs
[0].fields
.address
= adapter
->bounce_buffer_dma
;
1114 if (ibmveth_send(adapter
, descs
, 0)) {
1115 adapter
->tx_send_failed
++;
1116 netdev
->stats
.tx_dropped
++;
1118 netdev
->stats
.tx_packets
++;
1119 netdev
->stats
.tx_bytes
+= skb
->len
;
1125 /* Map the header */
1126 dma_addr
= dma_map_single(&adapter
->vdev
->dev
, skb
->data
,
1127 skb_headlen(skb
), DMA_TO_DEVICE
);
1128 if (dma_mapping_error(&adapter
->vdev
->dev
, dma_addr
))
1131 descs
[0].fields
.flags_len
= desc_flags
| skb_headlen(skb
);
1132 descs
[0].fields
.address
= dma_addr
;
1135 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
1136 const skb_frag_t
*frag
= &skb_shinfo(skb
)->frags
[i
];
1138 dma_addr
= skb_frag_dma_map(&adapter
->vdev
->dev
, frag
, 0,
1139 skb_frag_size(frag
), DMA_TO_DEVICE
);
1141 if (dma_mapping_error(&adapter
->vdev
->dev
, dma_addr
))
1142 goto map_failed_frags
;
1144 descs
[i
+1].fields
.flags_len
= desc_flags
| skb_frag_size(frag
);
1145 descs
[i
+1].fields
.address
= dma_addr
;
1148 if (skb
->ip_summed
== CHECKSUM_PARTIAL
&& skb_is_gso(skb
)) {
1149 if (adapter
->fw_large_send_support
) {
1150 mss
= (unsigned long)skb_shinfo(skb
)->gso_size
;
1151 adapter
->tx_large_packets
++;
1152 } else if (!skb_is_gso_v6(skb
)) {
1153 /* Put -1 in the IP checksum to tell phyp it
1154 * is a largesend packet. Put the mss in
1157 ip_hdr(skb
)->check
= 0xffff;
1158 tcp_hdr(skb
)->check
=
1159 cpu_to_be16(skb_shinfo(skb
)->gso_size
);
1160 adapter
->tx_large_packets
++;
1164 if (ibmveth_send(adapter
, descs
, mss
)) {
1165 adapter
->tx_send_failed
++;
1166 netdev
->stats
.tx_dropped
++;
1168 netdev
->stats
.tx_packets
++;
1169 netdev
->stats
.tx_bytes
+= skb
->len
;
1172 dma_unmap_single(&adapter
->vdev
->dev
,
1173 descs
[0].fields
.address
,
1174 descs
[0].fields
.flags_len
& IBMVETH_BUF_LEN_MASK
,
1177 for (i
= 1; i
< skb_shinfo(skb
)->nr_frags
+ 1; i
++)
1178 dma_unmap_page(&adapter
->vdev
->dev
, descs
[i
].fields
.address
,
1179 descs
[i
].fields
.flags_len
& IBMVETH_BUF_LEN_MASK
,
1183 dev_consume_skb_any(skb
);
1184 return NETDEV_TX_OK
;
1188 for (i
= 1; i
< last
; i
++)
1189 dma_unmap_page(&adapter
->vdev
->dev
, descs
[i
].fields
.address
,
1190 descs
[i
].fields
.flags_len
& IBMVETH_BUF_LEN_MASK
,
1193 dma_unmap_single(&adapter
->vdev
->dev
,
1194 descs
[0].fields
.address
,
1195 descs
[0].fields
.flags_len
& IBMVETH_BUF_LEN_MASK
,
1198 if (!firmware_has_feature(FW_FEATURE_CMO
))
1199 netdev_err(netdev
, "tx: unable to map xmit buffer\n");
1200 adapter
->tx_map_failed
++;
1201 if (skb_linearize(skb
)) {
1202 netdev
->stats
.tx_dropped
++;
1209 static void ibmveth_rx_mss_helper(struct sk_buff
*skb
, u16 mss
, int lrg_pkt
)
1211 struct tcphdr
*tcph
;
1215 /* only TCP packets will be aggregated */
1216 if (skb
->protocol
== htons(ETH_P_IP
)) {
1217 struct iphdr
*iph
= (struct iphdr
*)skb
->data
;
1219 if (iph
->protocol
== IPPROTO_TCP
) {
1220 offset
= iph
->ihl
* 4;
1221 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV4
;
1225 } else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
1226 struct ipv6hdr
*iph6
= (struct ipv6hdr
*)skb
->data
;
1228 if (iph6
->nexthdr
== IPPROTO_TCP
) {
1229 offset
= sizeof(struct ipv6hdr
);
1230 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV6
;
1237 /* if mss is not set through Large Packet bit/mss in rx buffer,
1238 * expect that the mss will be written to the tcp header checksum.
1240 tcph
= (struct tcphdr
*)(skb
->data
+ offset
);
1242 skb_shinfo(skb
)->gso_size
= mss
;
1243 } else if (offset
) {
1244 skb_shinfo(skb
)->gso_size
= ntohs(tcph
->check
);
1248 if (skb_shinfo(skb
)->gso_size
) {
1249 hdr_len
= offset
+ tcph
->doff
* 4;
1250 skb_shinfo(skb
)->gso_segs
=
1251 DIV_ROUND_UP(skb
->len
- hdr_len
,
1252 skb_shinfo(skb
)->gso_size
);
1256 static void ibmveth_rx_csum_helper(struct sk_buff
*skb
,
1257 struct ibmveth_adapter
*adapter
)
1259 struct iphdr
*iph
= NULL
;
1260 struct ipv6hdr
*iph6
= NULL
;
1261 __be16 skb_proto
= 0;
1266 skb_proto
= be16_to_cpu(skb
->protocol
);
1268 if (skb_proto
== ETH_P_IP
) {
1269 iph
= (struct iphdr
*)skb
->data
;
1271 /* If the IP checksum is not offloaded and if the packet
1272 * is large send, the checksum must be rebuilt.
1274 if (iph
->check
== 0xffff) {
1276 iph
->check
= ip_fast_csum((unsigned char *)iph
,
1280 iphlen
= iph
->ihl
* 4;
1281 iph_proto
= iph
->protocol
;
1282 } else if (skb_proto
== ETH_P_IPV6
) {
1283 iph6
= (struct ipv6hdr
*)skb
->data
;
1284 iphlen
= sizeof(struct ipv6hdr
);
1285 iph_proto
= iph6
->nexthdr
;
1288 /* In OVS environment, when a flow is not cached, specifically for a
1289 * new TCP connection, the first packet information is passed up
1290 * the user space for finding a flow. During this process, OVS computes
1291 * checksum on the first packet when CHECKSUM_PARTIAL flag is set.
1293 * Given that we zeroed out TCP checksum field in transmit path
1294 * (refer ibmveth_start_xmit routine) as we set "no checksum bit",
1295 * OVS computed checksum will be incorrect w/o TCP pseudo checksum
1296 * in the packet. This leads to OVS dropping the packet and hence
1297 * TCP retransmissions are seen.
1299 * So, re-compute TCP pseudo header checksum.
1301 if (iph_proto
== IPPROTO_TCP
&& adapter
->is_active_trunk
) {
1302 struct tcphdr
*tcph
= (struct tcphdr
*)(skb
->data
+ iphlen
);
1304 tcphdrlen
= skb
->len
- iphlen
;
1306 /* Recompute TCP pseudo header checksum */
1307 if (skb_proto
== ETH_P_IP
)
1308 tcph
->check
= ~csum_tcpudp_magic(iph
->saddr
,
1309 iph
->daddr
, tcphdrlen
, iph_proto
, 0);
1310 else if (skb_proto
== ETH_P_IPV6
)
1311 tcph
->check
= ~csum_ipv6_magic(&iph6
->saddr
,
1312 &iph6
->daddr
, tcphdrlen
, iph_proto
, 0);
1314 /* Setup SKB fields for checksum offload */
1315 skb_partial_csum_set(skb
, iphlen
,
1316 offsetof(struct tcphdr
, check
));
1317 skb_reset_network_header(skb
);
1321 static int ibmveth_poll(struct napi_struct
*napi
, int budget
)
1323 struct ibmveth_adapter
*adapter
=
1324 container_of(napi
, struct ibmveth_adapter
, napi
);
1325 struct net_device
*netdev
= adapter
->netdev
;
1326 int frames_processed
= 0;
1327 unsigned long lpar_rc
;
1330 while (frames_processed
< budget
) {
1331 if (!ibmveth_rxq_pending_buffer(adapter
))
1335 if (!ibmveth_rxq_buffer_valid(adapter
)) {
1336 wmb(); /* suggested by larson1 */
1337 adapter
->rx_invalid_buffer
++;
1338 netdev_dbg(netdev
, "recycling invalid buffer\n");
1339 ibmveth_rxq_recycle_buffer(adapter
);
1341 struct sk_buff
*skb
, *new_skb
;
1342 int length
= ibmveth_rxq_frame_length(adapter
);
1343 int offset
= ibmveth_rxq_frame_offset(adapter
);
1344 int csum_good
= ibmveth_rxq_csum_good(adapter
);
1345 int lrg_pkt
= ibmveth_rxq_large_packet(adapter
);
1346 __sum16 iph_check
= 0;
1348 skb
= ibmveth_rxq_get_buffer(adapter
);
1350 /* if the large packet bit is set in the rx queue
1351 * descriptor, the mss will be written by PHYP eight
1352 * bytes from the start of the rx buffer, which is
1353 * skb->data at this stage
1356 __be64
*rxmss
= (__be64
*)(skb
->data
+ 8);
1358 mss
= (u16
)be64_to_cpu(*rxmss
);
1362 if (length
< rx_copybreak
)
1363 new_skb
= netdev_alloc_skb(netdev
, length
);
1366 skb_copy_to_linear_data(new_skb
,
1370 ibmveth_flush_buffer(skb
->data
,
1372 if (!ibmveth_rxq_recycle_buffer(adapter
))
1376 ibmveth_rxq_harvest_buffer(adapter
);
1377 skb_reserve(skb
, offset
);
1380 skb_put(skb
, length
);
1381 skb
->protocol
= eth_type_trans(skb
, netdev
);
1383 /* PHYP without PLSO support places a -1 in the ip
1384 * checksum for large send frames.
1386 if (skb
->protocol
== cpu_to_be16(ETH_P_IP
)) {
1387 struct iphdr
*iph
= (struct iphdr
*)skb
->data
;
1389 iph_check
= iph
->check
;
1392 if ((length
> netdev
->mtu
+ ETH_HLEN
) ||
1393 lrg_pkt
|| iph_check
== 0xffff) {
1394 ibmveth_rx_mss_helper(skb
, mss
, lrg_pkt
);
1395 adapter
->rx_large_packets
++;
1399 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1400 ibmveth_rx_csum_helper(skb
, adapter
);
1403 napi_gro_receive(napi
, skb
); /* send it up */
1405 netdev
->stats
.rx_packets
++;
1406 netdev
->stats
.rx_bytes
+= length
;
1411 ibmveth_replenish_task(adapter
);
1413 if (frames_processed
< budget
) {
1414 napi_complete_done(napi
, frames_processed
);
1416 /* We think we are done - reenable interrupts,
1417 * then check once more to make sure we are done.
1419 lpar_rc
= h_vio_signal(adapter
->vdev
->unit_address
,
1422 BUG_ON(lpar_rc
!= H_SUCCESS
);
1424 if (ibmveth_rxq_pending_buffer(adapter
) &&
1425 napi_reschedule(napi
)) {
1426 lpar_rc
= h_vio_signal(adapter
->vdev
->unit_address
,
1431 return frames_processed
;
1434 static irqreturn_t
ibmveth_interrupt(int irq
, void *dev_instance
)
1436 struct net_device
*netdev
= dev_instance
;
1437 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1438 unsigned long lpar_rc
;
1440 if (napi_schedule_prep(&adapter
->napi
)) {
1441 lpar_rc
= h_vio_signal(adapter
->vdev
->unit_address
,
1443 BUG_ON(lpar_rc
!= H_SUCCESS
);
1444 __napi_schedule(&adapter
->napi
);
1449 static void ibmveth_set_multicast_list(struct net_device
*netdev
)
1451 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1452 unsigned long lpar_rc
;
1454 if ((netdev
->flags
& IFF_PROMISC
) ||
1455 (netdev_mc_count(netdev
) > adapter
->mcastFilterSize
)) {
1456 lpar_rc
= h_multicast_ctrl(adapter
->vdev
->unit_address
,
1457 IbmVethMcastEnableRecv
|
1458 IbmVethMcastDisableFiltering
,
1460 if (lpar_rc
!= H_SUCCESS
) {
1461 netdev_err(netdev
, "h_multicast_ctrl rc=%ld when "
1462 "entering promisc mode\n", lpar_rc
);
1465 struct netdev_hw_addr
*ha
;
1466 /* clear the filter table & disable filtering */
1467 lpar_rc
= h_multicast_ctrl(adapter
->vdev
->unit_address
,
1468 IbmVethMcastEnableRecv
|
1469 IbmVethMcastDisableFiltering
|
1470 IbmVethMcastClearFilterTable
,
1472 if (lpar_rc
!= H_SUCCESS
) {
1473 netdev_err(netdev
, "h_multicast_ctrl rc=%ld when "
1474 "attempting to clear filter table\n",
1477 /* add the addresses to the filter table */
1478 netdev_for_each_mc_addr(ha
, netdev
) {
1479 /* add the multicast address to the filter table */
1481 mcast_addr
= ibmveth_encode_mac_addr(ha
->addr
);
1482 lpar_rc
= h_multicast_ctrl(adapter
->vdev
->unit_address
,
1483 IbmVethMcastAddFilter
,
1485 if (lpar_rc
!= H_SUCCESS
) {
1486 netdev_err(netdev
, "h_multicast_ctrl rc=%ld "
1487 "when adding an entry to the filter "
1488 "table\n", lpar_rc
);
1492 /* re-enable filtering */
1493 lpar_rc
= h_multicast_ctrl(adapter
->vdev
->unit_address
,
1494 IbmVethMcastEnableFiltering
,
1496 if (lpar_rc
!= H_SUCCESS
) {
1497 netdev_err(netdev
, "h_multicast_ctrl rc=%ld when "
1498 "enabling filtering\n", lpar_rc
);
1503 static int ibmveth_change_mtu(struct net_device
*dev
, int new_mtu
)
1505 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
1506 struct vio_dev
*viodev
= adapter
->vdev
;
1507 int new_mtu_oh
= new_mtu
+ IBMVETH_BUFF_OH
;
1509 int need_restart
= 0;
1511 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++)
1512 if (new_mtu_oh
<= adapter
->rx_buff_pool
[i
].buff_size
)
1515 if (i
== IBMVETH_NUM_BUFF_POOLS
)
1518 /* Deactivate all the buffer pools so that the next loop can activate
1519 only the buffer pools necessary to hold the new MTU */
1520 if (netif_running(adapter
->netdev
)) {
1522 adapter
->pool_config
= 1;
1523 ibmveth_close(adapter
->netdev
);
1524 adapter
->pool_config
= 0;
1527 /* Look for an active buffer pool that can hold the new MTU */
1528 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
1529 adapter
->rx_buff_pool
[i
].active
= 1;
1531 if (new_mtu_oh
<= adapter
->rx_buff_pool
[i
].buff_size
) {
1533 vio_cmo_set_dev_desired(viodev
,
1534 ibmveth_get_desired_dma
1537 return ibmveth_open(adapter
->netdev
);
1543 if (need_restart
&& (rc
= ibmveth_open(adapter
->netdev
)))
1549 #ifdef CONFIG_NET_POLL_CONTROLLER
1550 static void ibmveth_poll_controller(struct net_device
*dev
)
1552 ibmveth_replenish_task(netdev_priv(dev
));
1553 ibmveth_interrupt(dev
->irq
, dev
);
1558 * ibmveth_get_desired_dma - Calculate IO memory desired by the driver
1560 * @vdev: struct vio_dev for the device whose desired IO mem is to be returned
1563 * Number of bytes of IO data the driver will need to perform well.
1565 static unsigned long ibmveth_get_desired_dma(struct vio_dev
*vdev
)
1567 struct net_device
*netdev
= dev_get_drvdata(&vdev
->dev
);
1568 struct ibmveth_adapter
*adapter
;
1569 struct iommu_table
*tbl
;
1574 tbl
= get_iommu_table_base(&vdev
->dev
);
1576 /* netdev inits at probe time along with the structures we need below*/
1578 return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT
, tbl
);
1580 adapter
= netdev_priv(netdev
);
1582 ret
= IBMVETH_BUFF_LIST_SIZE
+ IBMVETH_FILT_LIST_SIZE
;
1583 ret
+= IOMMU_PAGE_ALIGN(netdev
->mtu
, tbl
);
1585 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
1586 /* add the size of the active receive buffers */
1587 if (adapter
->rx_buff_pool
[i
].active
)
1589 adapter
->rx_buff_pool
[i
].size
*
1590 IOMMU_PAGE_ALIGN(adapter
->rx_buff_pool
[i
].
1592 rxqentries
+= adapter
->rx_buff_pool
[i
].size
;
1594 /* add the size of the receive queue entries */
1595 ret
+= IOMMU_PAGE_ALIGN(
1596 rxqentries
* sizeof(struct ibmveth_rx_q_entry
), tbl
);
1601 static int ibmveth_set_mac_addr(struct net_device
*dev
, void *p
)
1603 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
1604 struct sockaddr
*addr
= p
;
1608 if (!is_valid_ether_addr(addr
->sa_data
))
1609 return -EADDRNOTAVAIL
;
1611 mac_address
= ibmveth_encode_mac_addr(addr
->sa_data
);
1612 rc
= h_change_logical_lan_mac(adapter
->vdev
->unit_address
, mac_address
);
1614 netdev_err(adapter
->netdev
, "h_change_logical_lan_mac failed with rc=%d\n", rc
);
1618 ether_addr_copy(dev
->dev_addr
, addr
->sa_data
);
1623 static const struct net_device_ops ibmveth_netdev_ops
= {
1624 .ndo_open
= ibmveth_open
,
1625 .ndo_stop
= ibmveth_close
,
1626 .ndo_start_xmit
= ibmveth_start_xmit
,
1627 .ndo_set_rx_mode
= ibmveth_set_multicast_list
,
1628 .ndo_do_ioctl
= ibmveth_ioctl
,
1629 .ndo_change_mtu
= ibmveth_change_mtu
,
1630 .ndo_fix_features
= ibmveth_fix_features
,
1631 .ndo_set_features
= ibmveth_set_features
,
1632 .ndo_validate_addr
= eth_validate_addr
,
1633 .ndo_set_mac_address
= ibmveth_set_mac_addr
,
1634 #ifdef CONFIG_NET_POLL_CONTROLLER
1635 .ndo_poll_controller
= ibmveth_poll_controller
,
1639 static int ibmveth_probe(struct vio_dev
*dev
, const struct vio_device_id
*id
)
1642 struct net_device
*netdev
;
1643 struct ibmveth_adapter
*adapter
;
1644 unsigned char *mac_addr_p
;
1645 __be32
*mcastFilterSize_p
;
1647 unsigned long ret_attr
;
1649 dev_dbg(&dev
->dev
, "entering ibmveth_probe for UA 0x%x\n",
1652 mac_addr_p
= (unsigned char *)vio_get_attribute(dev
, VETH_MAC_ADDR
,
1655 dev_err(&dev
->dev
, "Can't find VETH_MAC_ADDR attribute\n");
1658 /* Workaround for old/broken pHyp */
1661 else if (mac_len
!= 6) {
1662 dev_err(&dev
->dev
, "VETH_MAC_ADDR attribute wrong len %d\n",
1667 mcastFilterSize_p
= (__be32
*)vio_get_attribute(dev
,
1668 VETH_MCAST_FILTER_SIZE
,
1670 if (!mcastFilterSize_p
) {
1671 dev_err(&dev
->dev
, "Can't find VETH_MCAST_FILTER_SIZE "
1676 netdev
= alloc_etherdev(sizeof(struct ibmveth_adapter
));
1681 adapter
= netdev_priv(netdev
);
1682 dev_set_drvdata(&dev
->dev
, netdev
);
1684 adapter
->vdev
= dev
;
1685 adapter
->netdev
= netdev
;
1686 adapter
->mcastFilterSize
= be32_to_cpu(*mcastFilterSize_p
);
1687 adapter
->pool_config
= 0;
1688 ibmveth_init_link_settings(netdev
);
1690 netif_napi_add(netdev
, &adapter
->napi
, ibmveth_poll
, 16);
1692 netdev
->irq
= dev
->irq
;
1693 netdev
->netdev_ops
= &ibmveth_netdev_ops
;
1694 netdev
->ethtool_ops
= &netdev_ethtool_ops
;
1695 SET_NETDEV_DEV(netdev
, &dev
->dev
);
1696 netdev
->hw_features
= NETIF_F_SG
;
1697 if (vio_get_attribute(dev
, "ibm,illan-options", NULL
) != NULL
) {
1698 netdev
->hw_features
|= NETIF_F_IP_CSUM
| NETIF_F_IPV6_CSUM
|
1702 netdev
->features
|= netdev
->hw_features
;
1704 ret
= h_illan_attributes(adapter
->vdev
->unit_address
, 0, 0, &ret_attr
);
1706 /* If running older firmware, TSO should not be enabled by default */
1707 if (ret
== H_SUCCESS
&& (ret_attr
& IBMVETH_ILLAN_LRG_SND_SUPPORT
) &&
1709 netdev
->hw_features
|= NETIF_F_TSO
| NETIF_F_TSO6
;
1710 netdev
->features
|= netdev
->hw_features
;
1712 netdev
->hw_features
|= NETIF_F_TSO
;
1715 adapter
->is_active_trunk
= false;
1716 if (ret
== H_SUCCESS
&& (ret_attr
& IBMVETH_ILLAN_ACTIVE_TRUNK
)) {
1717 adapter
->is_active_trunk
= true;
1718 netdev
->hw_features
|= NETIF_F_FRAGLIST
;
1719 netdev
->features
|= NETIF_F_FRAGLIST
;
1722 netdev
->min_mtu
= IBMVETH_MIN_MTU
;
1723 netdev
->max_mtu
= ETH_MAX_MTU
- IBMVETH_BUFF_OH
;
1725 memcpy(netdev
->dev_addr
, mac_addr_p
, ETH_ALEN
);
1727 if (firmware_has_feature(FW_FEATURE_CMO
))
1728 memcpy(pool_count
, pool_count_cmo
, sizeof(pool_count
));
1730 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
1731 struct kobject
*kobj
= &adapter
->rx_buff_pool
[i
].kobj
;
1734 ibmveth_init_buffer_pool(&adapter
->rx_buff_pool
[i
], i
,
1735 pool_count
[i
], pool_size
[i
],
1737 error
= kobject_init_and_add(kobj
, &ktype_veth_pool
,
1738 &dev
->dev
.kobj
, "pool%d", i
);
1740 kobject_uevent(kobj
, KOBJ_ADD
);
1743 netdev_dbg(netdev
, "adapter @ 0x%p\n", adapter
);
1744 netdev_dbg(netdev
, "registering netdev...\n");
1746 ibmveth_set_features(netdev
, netdev
->features
);
1748 rc
= register_netdev(netdev
);
1751 netdev_dbg(netdev
, "failed to register netdev rc=%d\n", rc
);
1752 free_netdev(netdev
);
1756 netdev_dbg(netdev
, "registered\n");
1761 static void ibmveth_remove(struct vio_dev
*dev
)
1763 struct net_device
*netdev
= dev_get_drvdata(&dev
->dev
);
1764 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1767 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++)
1768 kobject_put(&adapter
->rx_buff_pool
[i
].kobj
);
1770 unregister_netdev(netdev
);
1772 free_netdev(netdev
);
1773 dev_set_drvdata(&dev
->dev
, NULL
);
1776 static struct attribute veth_active_attr
;
1777 static struct attribute veth_num_attr
;
1778 static struct attribute veth_size_attr
;
1780 static ssize_t
veth_pool_show(struct kobject
*kobj
,
1781 struct attribute
*attr
, char *buf
)
1783 struct ibmveth_buff_pool
*pool
= container_of(kobj
,
1784 struct ibmveth_buff_pool
,
1787 if (attr
== &veth_active_attr
)
1788 return sprintf(buf
, "%d\n", pool
->active
);
1789 else if (attr
== &veth_num_attr
)
1790 return sprintf(buf
, "%d\n", pool
->size
);
1791 else if (attr
== &veth_size_attr
)
1792 return sprintf(buf
, "%d\n", pool
->buff_size
);
1796 static ssize_t
veth_pool_store(struct kobject
*kobj
, struct attribute
*attr
,
1797 const char *buf
, size_t count
)
1799 struct ibmveth_buff_pool
*pool
= container_of(kobj
,
1800 struct ibmveth_buff_pool
,
1802 struct net_device
*netdev
= dev_get_drvdata(
1803 container_of(kobj
->parent
, struct device
, kobj
));
1804 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1805 long value
= simple_strtol(buf
, NULL
, 10);
1808 if (attr
== &veth_active_attr
) {
1809 if (value
&& !pool
->active
) {
1810 if (netif_running(netdev
)) {
1811 if (ibmveth_alloc_buffer_pool(pool
)) {
1813 "unable to alloc pool\n");
1817 adapter
->pool_config
= 1;
1818 ibmveth_close(netdev
);
1819 adapter
->pool_config
= 0;
1820 if ((rc
= ibmveth_open(netdev
)))
1825 } else if (!value
&& pool
->active
) {
1826 int mtu
= netdev
->mtu
+ IBMVETH_BUFF_OH
;
1828 /* Make sure there is a buffer pool with buffers that
1829 can hold a packet of the size of the MTU */
1830 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
1831 if (pool
== &adapter
->rx_buff_pool
[i
])
1833 if (!adapter
->rx_buff_pool
[i
].active
)
1835 if (mtu
<= adapter
->rx_buff_pool
[i
].buff_size
)
1839 if (i
== IBMVETH_NUM_BUFF_POOLS
) {
1840 netdev_err(netdev
, "no active pool >= MTU\n");
1844 if (netif_running(netdev
)) {
1845 adapter
->pool_config
= 1;
1846 ibmveth_close(netdev
);
1848 adapter
->pool_config
= 0;
1849 if ((rc
= ibmveth_open(netdev
)))
1854 } else if (attr
== &veth_num_attr
) {
1855 if (value
<= 0 || value
> IBMVETH_MAX_POOL_COUNT
) {
1858 if (netif_running(netdev
)) {
1859 adapter
->pool_config
= 1;
1860 ibmveth_close(netdev
);
1861 adapter
->pool_config
= 0;
1863 if ((rc
= ibmveth_open(netdev
)))
1869 } else if (attr
== &veth_size_attr
) {
1870 if (value
<= IBMVETH_BUFF_OH
|| value
> IBMVETH_MAX_BUF_SIZE
) {
1873 if (netif_running(netdev
)) {
1874 adapter
->pool_config
= 1;
1875 ibmveth_close(netdev
);
1876 adapter
->pool_config
= 0;
1877 pool
->buff_size
= value
;
1878 if ((rc
= ibmveth_open(netdev
)))
1881 pool
->buff_size
= value
;
1886 /* kick the interrupt handler to allocate/deallocate pools */
1887 ibmveth_interrupt(netdev
->irq
, netdev
);
1892 #define ATTR(_name, _mode) \
1893 struct attribute veth_##_name##_attr = { \
1894 .name = __stringify(_name), .mode = _mode, \
1897 static ATTR(active
, 0644);
1898 static ATTR(num
, 0644);
1899 static ATTR(size
, 0644);
1901 static struct attribute
*veth_pool_attrs
[] = {
1908 static const struct sysfs_ops veth_pool_ops
= {
1909 .show
= veth_pool_show
,
1910 .store
= veth_pool_store
,
1913 static struct kobj_type ktype_veth_pool
= {
1915 .sysfs_ops
= &veth_pool_ops
,
1916 .default_attrs
= veth_pool_attrs
,
1919 static int ibmveth_resume(struct device
*dev
)
1921 struct net_device
*netdev
= dev_get_drvdata(dev
);
1922 ibmveth_interrupt(netdev
->irq
, netdev
);
1926 static const struct vio_device_id ibmveth_device_table
[] = {
1927 { "network", "IBM,l-lan"},
1930 MODULE_DEVICE_TABLE(vio
, ibmveth_device_table
);
1932 static const struct dev_pm_ops ibmveth_pm_ops
= {
1933 .resume
= ibmveth_resume
1936 static struct vio_driver ibmveth_driver
= {
1937 .id_table
= ibmveth_device_table
,
1938 .probe
= ibmveth_probe
,
1939 .remove
= ibmveth_remove
,
1940 .get_desired_dma
= ibmveth_get_desired_dma
,
1941 .name
= ibmveth_driver_name
,
1942 .pm
= &ibmveth_pm_ops
,
1945 static int __init
ibmveth_module_init(void)
1947 printk(KERN_DEBUG
"%s: %s %s\n", ibmveth_driver_name
,
1948 ibmveth_driver_string
, ibmveth_driver_version
);
1950 return vio_register_driver(&ibmveth_driver
);
1953 static void __exit
ibmveth_module_exit(void)
1955 vio_unregister_driver(&ibmveth_driver
);
1958 module_init(ibmveth_module_init
);
1959 module_exit(ibmveth_module_exit
);