1 /* sunvnet.c: Sun LDOM Virtual Network Driver.
3 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8 #include <linux/module.h>
9 #include <linux/kernel.h>
10 #include <linux/types.h>
11 #include <linux/slab.h>
12 #include <linux/delay.h>
13 #include <linux/init.h>
14 #include <linux/netdevice.h>
15 #include <linux/ethtool.h>
16 #include <linux/etherdevice.h>
17 #include <linux/mutex.h>
18 #include <linux/highmem.h>
19 #include <linux/if_vlan.h>
21 #if IS_ENABLED(CONFIG_IPV6)
22 #include <linux/icmpv6.h>
27 #include <net/route.h>
34 #define DRV_MODULE_NAME "sunvnet"
35 #define DRV_MODULE_VERSION "1.0"
36 #define DRV_MODULE_RELDATE "June 25, 2007"
38 static char version
[] =
39 DRV_MODULE_NAME
".c:v" DRV_MODULE_VERSION
" (" DRV_MODULE_RELDATE
")\n";
40 MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
41 MODULE_DESCRIPTION("Sun LDOM virtual network driver");
42 MODULE_LICENSE("GPL");
43 MODULE_VERSION(DRV_MODULE_VERSION
);
45 #define VNET_MAX_TXQS 16
47 /* Heuristic for the number of times to exponentially backoff and
48 * retry sending an LDC trigger when EAGAIN is encountered
50 #define VNET_MAX_RETRIES 10
52 static int __vnet_tx_trigger(struct vnet_port
*port
, u32 start
);
53 static void vnet_port_reset(struct vnet_port
*port
);
55 /* Ordered from largest major to lowest */
56 static struct vio_version vnet_versions
[] = {
57 { .major
= 1, .minor
= 8 },
58 { .major
= 1, .minor
= 7 },
59 { .major
= 1, .minor
= 6 },
60 { .major
= 1, .minor
= 0 },
63 static inline u32
vnet_tx_dring_avail(struct vio_dring_state
*dr
)
65 return vio_dring_avail(dr
, VNET_TX_RING_SIZE
);
68 static int vnet_handle_unknown(struct vnet_port
*port
, void *arg
)
70 struct vio_msg_tag
*pkt
= arg
;
72 pr_err("Received unknown msg [%02x:%02x:%04x:%08x]\n",
73 pkt
->type
, pkt
->stype
, pkt
->stype_env
, pkt
->sid
);
74 pr_err("Resetting connection\n");
76 ldc_disconnect(port
->vio
.lp
);
81 static int vnet_port_alloc_tx_ring(struct vnet_port
*port
);
83 static int vnet_send_attr(struct vio_driver_state
*vio
)
85 struct vnet_port
*port
= to_vnet_port(vio
);
86 struct net_device
*dev
= port
->vp
->dev
;
87 struct vio_net_attr_info pkt
;
88 int framelen
= ETH_FRAME_LEN
;
91 err
= vnet_port_alloc_tx_ring(to_vnet_port(vio
));
95 memset(&pkt
, 0, sizeof(pkt
));
96 pkt
.tag
.type
= VIO_TYPE_CTRL
;
97 pkt
.tag
.stype
= VIO_SUBTYPE_INFO
;
98 pkt
.tag
.stype_env
= VIO_ATTR_INFO
;
99 pkt
.tag
.sid
= vio_send_sid(vio
);
100 if (vio_version_before(vio
, 1, 2))
101 pkt
.xfer_mode
= VIO_DRING_MODE
;
103 pkt
.xfer_mode
= VIO_NEW_DRING_MODE
;
104 pkt
.addr_type
= VNET_ADDR_ETHERMAC
;
106 for (i
= 0; i
< 6; i
++)
107 pkt
.addr
|= (u64
)dev
->dev_addr
[i
] << ((5 - i
) * 8);
108 if (vio_version_after(vio
, 1, 3)) {
110 port
->rmtu
= min(VNET_MAXPACKET
, port
->rmtu
);
111 pkt
.mtu
= port
->rmtu
;
113 port
->rmtu
= VNET_MAXPACKET
;
114 pkt
.mtu
= port
->rmtu
;
116 if (vio_version_after_eq(vio
, 1, 6))
117 pkt
.options
= VIO_TX_DRING
;
118 } else if (vio_version_before(vio
, 1, 3)) {
121 pkt
.mtu
= framelen
+ VLAN_HLEN
;
125 if (vio_version_after_eq(vio
, 1, 7) && port
->tso
) {
126 pkt
.cflags
|= VNET_LSO_IPV4_CAPAB
;
128 port
->tsolen
= VNET_MAXTSO
;
129 pkt
.ipv4_lso_maxlen
= port
->tsolen
;
132 pkt
.plnk_updt
= PHYSLINK_UPDATE_NONE
;
134 viodbg(HS
, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
135 "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
136 "cflags[0x%04x] lso_max[%u]\n",
137 pkt
.xfer_mode
, pkt
.addr_type
,
138 (unsigned long long)pkt
.addr
,
139 pkt
.ack_freq
, pkt
.plnk_updt
, pkt
.options
,
140 (unsigned long long)pkt
.mtu
, pkt
.cflags
, pkt
.ipv4_lso_maxlen
);
143 return vio_ldc_send(vio
, &pkt
, sizeof(pkt
));
146 static int handle_attr_info(struct vio_driver_state
*vio
,
147 struct vio_net_attr_info
*pkt
)
149 struct vnet_port
*port
= to_vnet_port(vio
);
153 viodbg(HS
, "GOT NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
154 "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
155 " (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
156 pkt
->xfer_mode
, pkt
->addr_type
,
157 (unsigned long long)pkt
->addr
,
158 pkt
->ack_freq
, pkt
->plnk_updt
, pkt
->options
,
159 (unsigned long long)pkt
->mtu
, port
->rmtu
, pkt
->cflags
,
160 pkt
->ipv4_lso_maxlen
);
162 pkt
->tag
.sid
= vio_send_sid(vio
);
164 xfer_mode
= pkt
->xfer_mode
;
165 /* for version < 1.2, VIO_DRING_MODE = 0x3 and no bitmask */
166 if (vio_version_before(vio
, 1, 2) && xfer_mode
== VIO_DRING_MODE
)
167 xfer_mode
= VIO_NEW_DRING_MODE
;
170 * < v1.3 - ETH_FRAME_LEN exactly
171 * > v1.3 - MIN(pkt.mtu, VNET_MAXPACKET, port->rmtu) and change
173 * = v1.3 - ETH_FRAME_LEN + VLAN_HLEN exactly
175 if (vio_version_before(vio
, 1, 3)) {
176 localmtu
= ETH_FRAME_LEN
;
177 } else if (vio_version_after(vio
, 1, 3)) {
178 localmtu
= port
->rmtu
? port
->rmtu
: VNET_MAXPACKET
;
179 localmtu
= min(pkt
->mtu
, localmtu
);
182 localmtu
= ETH_FRAME_LEN
+ VLAN_HLEN
;
184 port
->rmtu
= localmtu
;
186 /* LSO negotiation */
187 if (vio_version_after_eq(vio
, 1, 7))
188 port
->tso
&= !!(pkt
->cflags
& VNET_LSO_IPV4_CAPAB
);
193 port
->tsolen
= VNET_MAXTSO
;
194 port
->tsolen
= min(port
->tsolen
, pkt
->ipv4_lso_maxlen
);
195 if (port
->tsolen
< VNET_MINTSO
) {
198 pkt
->cflags
&= ~VNET_LSO_IPV4_CAPAB
;
200 pkt
->ipv4_lso_maxlen
= port
->tsolen
;
202 pkt
->cflags
&= ~VNET_LSO_IPV4_CAPAB
;
203 pkt
->ipv4_lso_maxlen
= 0;
206 /* for version >= 1.6, ACK packet mode we support */
207 if (vio_version_after_eq(vio
, 1, 6)) {
208 pkt
->xfer_mode
= VIO_NEW_DRING_MODE
;
209 pkt
->options
= VIO_TX_DRING
;
212 if (!(xfer_mode
| VIO_NEW_DRING_MODE
) ||
213 pkt
->addr_type
!= VNET_ADDR_ETHERMAC
||
214 pkt
->mtu
!= localmtu
) {
215 viodbg(HS
, "SEND NET ATTR NACK\n");
217 pkt
->tag
.stype
= VIO_SUBTYPE_NACK
;
219 (void) vio_ldc_send(vio
, pkt
, sizeof(*pkt
));
223 viodbg(HS
, "SEND NET ATTR ACK xmode[0x%x] atype[0x%x] "
224 "addr[%llx] ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] "
225 "mtu[%llu] (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
226 pkt
->xfer_mode
, pkt
->addr_type
,
227 (unsigned long long)pkt
->addr
,
228 pkt
->ack_freq
, pkt
->plnk_updt
, pkt
->options
,
229 (unsigned long long)pkt
->mtu
, port
->rmtu
, pkt
->cflags
,
230 pkt
->ipv4_lso_maxlen
);
232 pkt
->tag
.stype
= VIO_SUBTYPE_ACK
;
234 return vio_ldc_send(vio
, pkt
, sizeof(*pkt
));
239 static int handle_attr_ack(struct vio_driver_state
*vio
,
240 struct vio_net_attr_info
*pkt
)
242 viodbg(HS
, "GOT NET ATTR ACK\n");
247 static int handle_attr_nack(struct vio_driver_state
*vio
,
248 struct vio_net_attr_info
*pkt
)
250 viodbg(HS
, "GOT NET ATTR NACK\n");
255 static int vnet_handle_attr(struct vio_driver_state
*vio
, void *arg
)
257 struct vio_net_attr_info
*pkt
= arg
;
259 switch (pkt
->tag
.stype
) {
260 case VIO_SUBTYPE_INFO
:
261 return handle_attr_info(vio
, pkt
);
263 case VIO_SUBTYPE_ACK
:
264 return handle_attr_ack(vio
, pkt
);
266 case VIO_SUBTYPE_NACK
:
267 return handle_attr_nack(vio
, pkt
);
274 static void vnet_handshake_complete(struct vio_driver_state
*vio
)
276 struct vio_dring_state
*dr
;
278 dr
= &vio
->drings
[VIO_DRIVER_RX_RING
];
279 dr
->snd_nxt
= dr
->rcv_nxt
= 1;
281 dr
= &vio
->drings
[VIO_DRIVER_TX_RING
];
282 dr
->snd_nxt
= dr
->rcv_nxt
= 1;
285 /* The hypervisor interface that implements copying to/from imported
286 * memory from another domain requires that copies are done to 8-byte
287 * aligned buffers, and that the lengths of such copies are also 8-byte
290 * So we align skb->data to an 8-byte multiple and pad-out the data
291 * area so we can round the copy length up to the next multiple of
294 * The transmitter puts the actual start of the packet 6 bytes into
295 * the buffer it sends over, so that the IP headers after the ethernet
296 * header are aligned properly. These 6 bytes are not in the descriptor
297 * length, they are simply implied. This offset is represented using
298 * the VNET_PACKET_SKIP macro.
300 static struct sk_buff
*alloc_and_align_skb(struct net_device
*dev
,
303 struct sk_buff
*skb
= netdev_alloc_skb(dev
, len
+VNET_PACKET_SKIP
+8+8);
304 unsigned long addr
, off
;
309 addr
= (unsigned long) skb
->data
;
310 off
= ((addr
+ 7UL) & ~7UL) - addr
;
312 skb_reserve(skb
, off
);
317 static inline void vnet_fullcsum(struct sk_buff
*skb
)
319 struct iphdr
*iph
= ip_hdr(skb
);
320 int offset
= skb_transport_offset(skb
);
322 if (skb
->protocol
!= htons(ETH_P_IP
))
324 if (iph
->protocol
!= IPPROTO_TCP
&&
325 iph
->protocol
!= IPPROTO_UDP
)
327 skb
->ip_summed
= CHECKSUM_NONE
;
330 if (iph
->protocol
== IPPROTO_TCP
) {
331 struct tcphdr
*ptcp
= tcp_hdr(skb
);
334 skb
->csum
= skb_checksum(skb
, offset
, skb
->len
- offset
, 0);
335 ptcp
->check
= csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
336 skb
->len
- offset
, IPPROTO_TCP
,
338 } else if (iph
->protocol
== IPPROTO_UDP
) {
339 struct udphdr
*pudp
= udp_hdr(skb
);
342 skb
->csum
= skb_checksum(skb
, offset
, skb
->len
- offset
, 0);
343 pudp
->check
= csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
344 skb
->len
- offset
, IPPROTO_UDP
,
349 static int vnet_rx_one(struct vnet_port
*port
, struct vio_net_desc
*desc
)
351 struct net_device
*dev
= port
->vp
->dev
;
352 unsigned int len
= desc
->size
;
353 unsigned int copy_len
;
359 if (port
->tso
&& port
->tsolen
> port
->rmtu
)
360 maxlen
= port
->tsolen
;
363 if (unlikely(len
< ETH_ZLEN
|| len
> maxlen
)) {
364 dev
->stats
.rx_length_errors
++;
368 skb
= alloc_and_align_skb(dev
, len
);
370 if (unlikely(!skb
)) {
371 dev
->stats
.rx_missed_errors
++;
375 copy_len
= (len
+ VNET_PACKET_SKIP
+ 7U) & ~7U;
376 skb_put(skb
, copy_len
);
377 err
= ldc_copy(port
->vio
.lp
, LDC_COPY_IN
,
378 skb
->data
, copy_len
, 0,
379 desc
->cookies
, desc
->ncookies
);
380 if (unlikely(err
< 0)) {
381 dev
->stats
.rx_frame_errors
++;
385 skb_pull(skb
, VNET_PACKET_SKIP
);
387 skb
->protocol
= eth_type_trans(skb
, dev
);
389 if (vio_version_after_eq(&port
->vio
, 1, 8)) {
390 struct vio_net_dext
*dext
= vio_net_ext(desc
);
392 if (dext
->flags
& VNET_PKT_HCK_IPV4_HDRCKSUM
) {
393 if (skb
->protocol
== ETH_P_IP
) {
394 struct iphdr
*iph
= (struct iphdr
*)skb
->data
;
400 if ((dext
->flags
& VNET_PKT_HCK_FULLCKSUM
) &&
401 skb
->ip_summed
== CHECKSUM_NONE
)
403 if (dext
->flags
& VNET_PKT_HCK_IPV4_HDRCKSUM_OK
) {
404 skb
->ip_summed
= CHECKSUM_PARTIAL
;
406 if (dext
->flags
& VNET_PKT_HCK_FULLCKSUM_OK
)
411 skb
->ip_summed
= port
->switch_port
? CHECKSUM_NONE
: CHECKSUM_PARTIAL
;
413 dev
->stats
.rx_packets
++;
414 dev
->stats
.rx_bytes
+= len
;
415 napi_gro_receive(&port
->napi
, skb
);
422 dev
->stats
.rx_dropped
++;
426 static int vnet_send_ack(struct vnet_port
*port
, struct vio_dring_state
*dr
,
427 u32 start
, u32 end
, u8 vio_dring_state
)
429 struct vio_dring_data hdr
= {
431 .type
= VIO_TYPE_DATA
,
432 .stype
= VIO_SUBTYPE_ACK
,
433 .stype_env
= VIO_DRING_DATA
,
434 .sid
= vio_send_sid(&port
->vio
),
436 .dring_ident
= dr
->ident
,
439 .state
= vio_dring_state
,
444 hdr
.seq
= dr
->snd_nxt
;
447 err
= vio_ldc_send(&port
->vio
, &hdr
, sizeof(hdr
));
453 if ((delay
<<= 1) > 128)
455 if (retries
++ > VNET_MAX_RETRIES
) {
456 pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n",
457 port
->raddr
[0], port
->raddr
[1],
458 port
->raddr
[2], port
->raddr
[3],
459 port
->raddr
[4], port
->raddr
[5]);
462 } while (err
== -EAGAIN
);
464 if (err
<= 0 && vio_dring_state
== VIO_DRING_STOPPED
) {
465 port
->stop_rx_idx
= end
;
466 port
->stop_rx
= true;
468 port
->stop_rx_idx
= 0;
469 port
->stop_rx
= false;
475 static struct vio_net_desc
*get_rx_desc(struct vnet_port
*port
,
476 struct vio_dring_state
*dr
,
479 struct vio_net_desc
*desc
= port
->vio
.desc_buf
;
482 err
= ldc_get_dring_entry(port
->vio
.lp
, desc
, dr
->entry_size
,
483 (index
* dr
->entry_size
),
484 dr
->cookies
, dr
->ncookies
);
491 static int put_rx_desc(struct vnet_port
*port
,
492 struct vio_dring_state
*dr
,
493 struct vio_net_desc
*desc
,
498 err
= ldc_put_dring_entry(port
->vio
.lp
, desc
, dr
->entry_size
,
499 (index
* dr
->entry_size
),
500 dr
->cookies
, dr
->ncookies
);
507 static int vnet_walk_rx_one(struct vnet_port
*port
,
508 struct vio_dring_state
*dr
,
509 u32 index
, int *needs_ack
)
511 struct vio_net_desc
*desc
= get_rx_desc(port
, dr
, index
);
512 struct vio_driver_state
*vio
= &port
->vio
;
515 BUG_ON(desc
== NULL
);
517 return PTR_ERR(desc
);
519 if (desc
->hdr
.state
!= VIO_DESC_READY
)
524 viodbg(DATA
, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%llx:%llx]\n",
525 desc
->hdr
.state
, desc
->hdr
.ack
,
526 desc
->size
, desc
->ncookies
,
527 desc
->cookies
[0].cookie_addr
,
528 desc
->cookies
[0].cookie_size
);
530 err
= vnet_rx_one(port
, desc
);
531 if (err
== -ECONNRESET
)
533 desc
->hdr
.state
= VIO_DESC_DONE
;
534 err
= put_rx_desc(port
, dr
, desc
, index
);
537 *needs_ack
= desc
->hdr
.ack
;
541 static int vnet_walk_rx(struct vnet_port
*port
, struct vio_dring_state
*dr
,
542 u32 start
, u32 end
, int *npkts
, int budget
)
544 struct vio_driver_state
*vio
= &port
->vio
;
545 int ack_start
= -1, ack_end
= -1;
546 bool send_ack
= true;
548 end
= (end
== (u32
) -1) ? vio_dring_prev(dr
, start
)
549 : vio_dring_next(dr
, end
);
551 viodbg(DATA
, "vnet_walk_rx start[%08x] end[%08x]\n", start
, end
);
553 while (start
!= end
) {
554 int ack
= 0, err
= vnet_walk_rx_one(port
, dr
, start
, &ack
);
555 if (err
== -ECONNRESET
)
563 start
= vio_dring_next(dr
, start
);
564 if (ack
&& start
!= end
) {
565 err
= vnet_send_ack(port
, dr
, ack_start
, ack_end
,
567 if (err
== -ECONNRESET
)
571 if ((*npkts
) >= budget
) {
576 if (unlikely(ack_start
== -1))
577 ack_start
= ack_end
= vio_dring_prev(dr
, start
);
579 port
->napi_resume
= false;
580 return vnet_send_ack(port
, dr
, ack_start
, ack_end
,
583 port
->napi_resume
= true;
584 port
->napi_stop_idx
= ack_end
;
589 static int vnet_rx(struct vnet_port
*port
, void *msgbuf
, int *npkts
,
592 struct vio_dring_data
*pkt
= msgbuf
;
593 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_RX_RING
];
594 struct vio_driver_state
*vio
= &port
->vio
;
596 viodbg(DATA
, "vnet_rx stype_env[%04x] seq[%016llx] rcv_nxt[%016llx]\n",
597 pkt
->tag
.stype_env
, pkt
->seq
, dr
->rcv_nxt
);
599 if (unlikely(pkt
->tag
.stype_env
!= VIO_DRING_DATA
))
601 if (unlikely(pkt
->seq
!= dr
->rcv_nxt
)) {
602 pr_err("RX out of sequence seq[0x%llx] rcv_nxt[0x%llx]\n",
603 pkt
->seq
, dr
->rcv_nxt
);
607 if (!port
->napi_resume
)
610 /* XXX Validate pkt->start_idx and pkt->end_idx XXX */
612 return vnet_walk_rx(port
, dr
, pkt
->start_idx
, pkt
->end_idx
,
616 static int idx_is_pending(struct vio_dring_state
*dr
, u32 end
)
621 while (idx
!= dr
->prod
) {
626 idx
= vio_dring_next(dr
, idx
);
631 static int vnet_ack(struct vnet_port
*port
, void *msgbuf
)
633 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
634 struct vio_dring_data
*pkt
= msgbuf
;
635 struct net_device
*dev
;
638 struct vio_net_desc
*desc
;
639 struct netdev_queue
*txq
;
641 if (unlikely(pkt
->tag
.stype_env
!= VIO_DRING_DATA
))
648 if (unlikely(!idx_is_pending(dr
, end
))) {
649 netif_tx_unlock(dev
);
653 /* sync for race conditions with vnet_start_xmit() and tell xmit it
654 * is time to send a trigger.
656 dr
->cons
= vio_dring_next(dr
, end
);
657 desc
= vio_dring_entry(dr
, dr
->cons
);
658 if (desc
->hdr
.state
== VIO_DESC_READY
&& !port
->start_cons
) {
659 /* vnet_start_xmit() just populated this dring but missed
660 * sending the "start" LDC message to the consumer.
661 * Send a "start" trigger on its behalf.
663 if (__vnet_tx_trigger(port
, dr
->cons
) > 0)
664 port
->start_cons
= false;
666 port
->start_cons
= true;
668 port
->start_cons
= true;
670 netif_tx_unlock(dev
);
672 txq
= netdev_get_tx_queue(dev
, port
->q_index
);
673 if (unlikely(netif_tx_queue_stopped(txq
) &&
674 vnet_tx_dring_avail(dr
) >= VNET_TX_WAKEUP_THRESH(dr
)))
680 static int vnet_nack(struct vnet_port
*port
, void *msgbuf
)
682 /* XXX just reset or similar XXX */
686 static int handle_mcast(struct vnet_port
*port
, void *msgbuf
)
688 struct vio_net_mcast_info
*pkt
= msgbuf
;
690 if (pkt
->tag
.stype
!= VIO_SUBTYPE_ACK
)
691 pr_err("%s: Got unexpected MCAST reply [%02x:%02x:%04x:%08x]\n",
701 /* Got back a STOPPED LDC message on port. If the queue is stopped,
702 * wake it up so that we'll send out another START message at the
705 static void maybe_tx_wakeup(struct vnet_port
*port
)
707 struct netdev_queue
*txq
;
709 txq
= netdev_get_tx_queue(port
->vp
->dev
, port
->q_index
);
710 __netif_tx_lock(txq
, smp_processor_id());
711 if (likely(netif_tx_queue_stopped(txq
))) {
712 struct vio_dring_state
*dr
;
714 dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
715 netif_tx_wake_queue(txq
);
717 __netif_tx_unlock(txq
);
720 static inline bool port_is_up(struct vnet_port
*vnet
)
722 struct vio_driver_state
*vio
= &vnet
->vio
;
724 return !!(vio
->hs_state
& VIO_HS_COMPLETE
);
727 static int vnet_event_napi(struct vnet_port
*port
, int budget
)
729 struct vio_driver_state
*vio
= &port
->vio
;
732 int event
= (port
->rx_event
& LDC_EVENT_RESET
);
735 if (unlikely(event
== LDC_EVENT_RESET
||
736 event
== LDC_EVENT_UP
)) {
737 vio_link_state_change(vio
, event
);
739 if (event
== LDC_EVENT_RESET
) {
740 vnet_port_reset(port
);
746 /* We may have multiple LDC events in rx_event. Unroll send_events() */
747 event
= (port
->rx_event
& LDC_EVENT_UP
);
748 port
->rx_event
&= ~(LDC_EVENT_RESET
|LDC_EVENT_UP
);
749 if (event
== LDC_EVENT_UP
)
751 event
= port
->rx_event
;
752 if (!(event
& LDC_EVENT_DATA_READY
))
755 /* we dont expect any other bits than RESET, UP, DATA_READY */
756 BUG_ON(event
!= LDC_EVENT_DATA_READY
);
761 struct vio_msg_tag tag
;
765 if (port
->napi_resume
) {
766 struct vio_dring_data
*pkt
=
767 (struct vio_dring_data
*)&msgbuf
;
768 struct vio_dring_state
*dr
=
769 &port
->vio
.drings
[VIO_DRIVER_RX_RING
];
771 pkt
->tag
.type
= VIO_TYPE_DATA
;
772 pkt
->tag
.stype
= VIO_SUBTYPE_INFO
;
773 pkt
->tag
.stype_env
= VIO_DRING_DATA
;
774 pkt
->seq
= dr
->rcv_nxt
;
775 pkt
->start_idx
= vio_dring_next(dr
, port
->napi_stop_idx
);
779 err
= ldc_read(vio
->lp
, &msgbuf
, sizeof(msgbuf
));
780 if (unlikely(err
< 0)) {
781 if (err
== -ECONNRESET
)
787 viodbg(DATA
, "TAG [%02x:%02x:%04x:%08x]\n",
790 msgbuf
.tag
.stype_env
,
792 err
= vio_validate_sid(vio
, &msgbuf
.tag
);
796 if (likely(msgbuf
.tag
.type
== VIO_TYPE_DATA
)) {
797 if (msgbuf
.tag
.stype
== VIO_SUBTYPE_INFO
) {
798 if (!port_is_up(port
)) {
799 /* failures like handshake_failure()
800 * may have cleaned up dring, but
801 * NAPI polling may bring us here.
806 err
= vnet_rx(port
, &msgbuf
, &npkts
, budget
);
811 } else if (msgbuf
.tag
.stype
== VIO_SUBTYPE_ACK
) {
812 err
= vnet_ack(port
, &msgbuf
);
815 } else if (msgbuf
.tag
.stype
== VIO_SUBTYPE_NACK
) {
816 err
= vnet_nack(port
, &msgbuf
);
818 } else if (msgbuf
.tag
.type
== VIO_TYPE_CTRL
) {
819 if (msgbuf
.tag
.stype_env
== VNET_MCAST_INFO
)
820 err
= handle_mcast(port
, &msgbuf
);
822 err
= vio_control_pkt_engine(vio
, &msgbuf
);
826 err
= vnet_handle_unknown(port
, &msgbuf
);
828 if (err
== -ECONNRESET
)
831 if (unlikely(tx_wakeup
&& err
!= -ECONNRESET
))
832 maybe_tx_wakeup(port
);
836 static int vnet_poll(struct napi_struct
*napi
, int budget
)
838 struct vnet_port
*port
= container_of(napi
, struct vnet_port
, napi
);
839 struct vio_driver_state
*vio
= &port
->vio
;
840 int processed
= vnet_event_napi(port
, budget
);
842 if (processed
< budget
) {
844 port
->rx_event
&= ~LDC_EVENT_DATA_READY
;
845 vio_set_intr(vio
->vdev
->rx_ino
, HV_INTR_ENABLED
);
850 static void vnet_event(void *arg
, int event
)
852 struct vnet_port
*port
= arg
;
853 struct vio_driver_state
*vio
= &port
->vio
;
855 port
->rx_event
|= event
;
856 vio_set_intr(vio
->vdev
->rx_ino
, HV_INTR_DISABLED
);
857 napi_schedule(&port
->napi
);
861 static int __vnet_tx_trigger(struct vnet_port
*port
, u32 start
)
863 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
864 struct vio_dring_data hdr
= {
866 .type
= VIO_TYPE_DATA
,
867 .stype
= VIO_SUBTYPE_INFO
,
868 .stype_env
= VIO_DRING_DATA
,
869 .sid
= vio_send_sid(&port
->vio
),
871 .dring_ident
= dr
->ident
,
879 err
= vnet_send_ack(port
,
880 &port
->vio
.drings
[VIO_DRIVER_RX_RING
],
881 port
->stop_rx_idx
, -1,
887 hdr
.seq
= dr
->snd_nxt
;
890 err
= vio_ldc_send(&port
->vio
, &hdr
, sizeof(hdr
));
896 if ((delay
<<= 1) > 128)
898 if (retries
++ > VNET_MAX_RETRIES
)
900 } while (err
== -EAGAIN
);
905 struct vnet_port
*__tx_port_find(struct vnet
*vp
, struct sk_buff
*skb
)
907 unsigned int hash
= vnet_hashfn(skb
->data
);
908 struct hlist_head
*hp
= &vp
->port_hash
[hash
];
909 struct vnet_port
*port
;
911 hlist_for_each_entry_rcu(port
, hp
, hash
) {
912 if (!port_is_up(port
))
914 if (ether_addr_equal(port
->raddr
, skb
->data
))
917 list_for_each_entry_rcu(port
, &vp
->port_list
, list
) {
918 if (!port
->switch_port
)
920 if (!port_is_up(port
))
927 static struct sk_buff
*vnet_clean_tx_ring(struct vnet_port
*port
,
930 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
931 struct sk_buff
*skb
= NULL
;
937 for (i
= 0; i
< VNET_TX_RING_SIZE
; ++i
) {
938 struct vio_net_desc
*d
;
942 txi
= VNET_TX_RING_SIZE
-1;
944 d
= vio_dring_entry(dr
, txi
);
946 if (d
->hdr
.state
== VIO_DESC_READY
) {
950 if (port
->tx_bufs
[txi
].skb
) {
951 if (d
->hdr
.state
!= VIO_DESC_DONE
)
952 pr_notice("invalid ring buffer state %d\n",
954 BUG_ON(port
->tx_bufs
[txi
].skb
->next
);
956 port
->tx_bufs
[txi
].skb
->next
= skb
;
957 skb
= port
->tx_bufs
[txi
].skb
;
958 port
->tx_bufs
[txi
].skb
= NULL
;
960 ldc_unmap(port
->vio
.lp
,
961 port
->tx_bufs
[txi
].cookies
,
962 port
->tx_bufs
[txi
].ncookies
);
963 } else if (d
->hdr
.state
== VIO_DESC_FREE
)
965 d
->hdr
.state
= VIO_DESC_FREE
;
970 static inline void vnet_free_skbs(struct sk_buff
*skb
)
972 struct sk_buff
*next
;
982 static void vnet_clean_timer_expire(unsigned long port0
)
984 struct vnet_port
*port
= (struct vnet_port
*)port0
;
985 struct sk_buff
*freeskbs
;
988 netif_tx_lock(port
->vp
->dev
);
989 freeskbs
= vnet_clean_tx_ring(port
, &pending
);
990 netif_tx_unlock(port
->vp
->dev
);
992 vnet_free_skbs(freeskbs
);
995 (void)mod_timer(&port
->clean_timer
,
996 jiffies
+ VNET_CLEAN_TIMEOUT
);
998 del_timer(&port
->clean_timer
);
1001 static inline int vnet_skb_map(struct ldc_channel
*lp
, struct sk_buff
*skb
,
1002 struct ldc_trans_cookie
*cookies
, int ncookies
,
1003 unsigned int map_perm
)
1005 int i
, nc
, err
, blen
;
1008 blen
= skb_headlen(skb
);
1009 if (blen
< ETH_ZLEN
)
1011 blen
+= VNET_PACKET_SKIP
;
1012 blen
+= 8 - (blen
& 7);
1014 err
= ldc_map_single(lp
, skb
->data
-VNET_PACKET_SKIP
, blen
, cookies
,
1015 ncookies
, map_perm
);
1020 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
1021 skb_frag_t
*f
= &skb_shinfo(skb
)->frags
[i
];
1024 if (nc
< ncookies
) {
1025 vaddr
= kmap_atomic(skb_frag_page(f
));
1026 blen
= skb_frag_size(f
);
1027 blen
+= 8 - (blen
& 7);
1028 err
= ldc_map_single(lp
, vaddr
+ f
->page_offset
,
1029 blen
, cookies
+ nc
, ncookies
- nc
,
1031 kunmap_atomic(vaddr
);
1037 ldc_unmap(lp
, cookies
, nc
);
1045 static inline struct sk_buff
*vnet_skb_shape(struct sk_buff
*skb
, int ncookies
)
1047 struct sk_buff
*nskb
;
1048 int i
, len
, pad
, docopy
;
1052 if (len
< ETH_ZLEN
) {
1053 pad
+= ETH_ZLEN
- skb
->len
;
1056 len
+= VNET_PACKET_SKIP
;
1057 pad
+= 8 - (len
& 7);
1059 /* make sure we have enough cookies and alignment in every frag */
1060 docopy
= skb_shinfo(skb
)->nr_frags
>= ncookies
;
1061 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
1062 skb_frag_t
*f
= &skb_shinfo(skb
)->frags
[i
];
1064 docopy
|= f
->page_offset
& 7;
1066 if (((unsigned long)skb
->data
& 7) != VNET_PACKET_SKIP
||
1067 skb_tailroom(skb
) < pad
||
1068 skb_headroom(skb
) < VNET_PACKET_SKIP
|| docopy
) {
1069 int start
= 0, offset
;
1072 len
= skb
->len
> ETH_ZLEN
? skb
->len
: ETH_ZLEN
;
1073 nskb
= alloc_and_align_skb(skb
->dev
, len
);
1078 skb_reserve(nskb
, VNET_PACKET_SKIP
);
1080 nskb
->protocol
= skb
->protocol
;
1081 offset
= skb_mac_header(skb
) - skb
->data
;
1082 skb_set_mac_header(nskb
, offset
);
1083 offset
= skb_network_header(skb
) - skb
->data
;
1084 skb_set_network_header(nskb
, offset
);
1085 offset
= skb_transport_header(skb
) - skb
->data
;
1086 skb_set_transport_header(nskb
, offset
);
1089 nskb
->csum_offset
= skb
->csum_offset
;
1090 nskb
->ip_summed
= skb
->ip_summed
;
1092 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
1093 start
= skb_checksum_start_offset(skb
);
1095 struct iphdr
*iph
= ip_hdr(nskb
);
1096 int offset
= start
+ nskb
->csum_offset
;
1098 if (skb_copy_bits(skb
, 0, nskb
->data
, start
)) {
1099 dev_kfree_skb(nskb
);
1103 *(__sum16
*)(skb
->data
+ offset
) = 0;
1104 csum
= skb_copy_and_csum_bits(skb
, start
,
1106 skb
->len
- start
, 0);
1107 if (iph
->protocol
== IPPROTO_TCP
||
1108 iph
->protocol
== IPPROTO_UDP
) {
1109 csum
= csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
1111 iph
->protocol
, csum
);
1113 *(__sum16
*)(nskb
->data
+ offset
) = csum
;
1115 nskb
->ip_summed
= CHECKSUM_NONE
;
1116 } else if (skb_copy_bits(skb
, 0, nskb
->data
, skb
->len
)) {
1117 dev_kfree_skb(nskb
);
1121 (void)skb_put(nskb
, skb
->len
);
1122 if (skb_is_gso(skb
)) {
1123 skb_shinfo(nskb
)->gso_size
= skb_shinfo(skb
)->gso_size
;
1124 skb_shinfo(nskb
)->gso_type
= skb_shinfo(skb
)->gso_type
;
1126 nskb
->queue_mapping
= skb
->queue_mapping
;
1134 vnet_select_queue(struct net_device
*dev
, struct sk_buff
*skb
,
1135 void *accel_priv
, select_queue_fallback_t fallback
)
1137 struct vnet
*vp
= netdev_priv(dev
);
1138 struct vnet_port
*port
= __tx_port_find(vp
, skb
);
1142 return port
->q_index
;
1145 static int vnet_start_xmit(struct sk_buff
*skb
, struct net_device
*dev
);
1147 static int vnet_handle_offloads(struct vnet_port
*port
, struct sk_buff
*skb
)
1149 struct net_device
*dev
= port
->vp
->dev
;
1150 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
1151 struct sk_buff
*segs
;
1152 int maclen
, datalen
;
1154 int gso_size
, gso_type
, gso_segs
;
1155 int hlen
= skb_transport_header(skb
) - skb_mac_header(skb
);
1156 int proto
= IPPROTO_IP
;
1158 if (skb
->protocol
== htons(ETH_P_IP
))
1159 proto
= ip_hdr(skb
)->protocol
;
1160 else if (skb
->protocol
== htons(ETH_P_IPV6
))
1161 proto
= ipv6_hdr(skb
)->nexthdr
;
1163 if (proto
== IPPROTO_TCP
)
1164 hlen
+= tcp_hdr(skb
)->doff
* 4;
1165 else if (proto
== IPPROTO_UDP
)
1166 hlen
+= sizeof(struct udphdr
);
1168 pr_err("vnet_handle_offloads GSO with unknown transport "
1169 "protocol %d tproto %d\n", skb
->protocol
, proto
);
1170 hlen
= 128; /* XXX */
1172 datalen
= port
->tsolen
- hlen
;
1174 gso_size
= skb_shinfo(skb
)->gso_size
;
1175 gso_type
= skb_shinfo(skb
)->gso_type
;
1176 gso_segs
= skb_shinfo(skb
)->gso_segs
;
1178 if (port
->tso
&& gso_size
< datalen
)
1179 gso_segs
= DIV_ROUND_UP(skb
->len
- hlen
, datalen
);
1181 if (unlikely(vnet_tx_dring_avail(dr
) < gso_segs
)) {
1182 struct netdev_queue
*txq
;
1184 txq
= netdev_get_tx_queue(dev
, port
->q_index
);
1185 netif_tx_stop_queue(txq
);
1186 if (vnet_tx_dring_avail(dr
) < skb_shinfo(skb
)->gso_segs
)
1187 return NETDEV_TX_BUSY
;
1188 netif_tx_wake_queue(txq
);
1191 maclen
= skb_network_header(skb
) - skb_mac_header(skb
);
1192 skb_pull(skb
, maclen
);
1194 if (port
->tso
&& gso_size
< datalen
) {
1195 /* segment to TSO size */
1196 skb_shinfo(skb
)->gso_size
= datalen
;
1197 skb_shinfo(skb
)->gso_segs
= gso_segs
;
1199 segs
= skb_gso_segment(skb
, dev
->features
& ~NETIF_F_TSO
);
1201 /* restore gso_size & gso_segs */
1202 skb_shinfo(skb
)->gso_size
= gso_size
;
1203 skb_shinfo(skb
)->gso_segs
= DIV_ROUND_UP(skb
->len
- hlen
,
1206 segs
= skb_gso_segment(skb
, dev
->features
& ~NETIF_F_TSO
);
1208 dev
->stats
.tx_dropped
++;
1209 dev_kfree_skb_any(skb
);
1210 return NETDEV_TX_OK
;
1213 skb_push(skb
, maclen
);
1214 skb_reset_mac_header(skb
);
1218 struct sk_buff
*curr
= segs
;
1222 if (port
->tso
&& curr
->len
> dev
->mtu
) {
1223 skb_shinfo(curr
)->gso_size
= gso_size
;
1224 skb_shinfo(curr
)->gso_type
= gso_type
;
1225 skb_shinfo(curr
)->gso_segs
=
1226 DIV_ROUND_UP(curr
->len
- hlen
, gso_size
);
1228 skb_shinfo(curr
)->gso_size
= 0;
1230 skb_push(curr
, maclen
);
1231 skb_reset_mac_header(curr
);
1232 memcpy(skb_mac_header(curr
), skb_mac_header(skb
),
1234 curr
->csum_start
= skb_transport_header(curr
) - curr
->head
;
1235 if (ip_hdr(curr
)->protocol
== IPPROTO_TCP
)
1236 curr
->csum_offset
= offsetof(struct tcphdr
, check
);
1237 else if (ip_hdr(curr
)->protocol
== IPPROTO_UDP
)
1238 curr
->csum_offset
= offsetof(struct udphdr
, check
);
1240 if (!(status
& NETDEV_TX_MASK
))
1241 status
= vnet_start_xmit(curr
, dev
);
1242 if (status
& NETDEV_TX_MASK
)
1243 dev_kfree_skb_any(curr
);
1246 if (!(status
& NETDEV_TX_MASK
))
1247 dev_kfree_skb_any(skb
);
1251 static int vnet_start_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
1253 struct vnet
*vp
= netdev_priv(dev
);
1254 struct vnet_port
*port
= NULL
;
1255 struct vio_dring_state
*dr
;
1256 struct vio_net_desc
*d
;
1258 struct sk_buff
*freeskbs
= NULL
;
1260 unsigned pending
= 0;
1261 struct netdev_queue
*txq
;
1264 port
= __tx_port_find(vp
, skb
);
1265 if (unlikely(!port
)) {
1270 if (skb_is_gso(skb
) && skb
->len
> port
->tsolen
) {
1271 err
= vnet_handle_offloads(port
, skb
);
1276 if (!skb_is_gso(skb
) && skb
->len
> port
->rmtu
) {
1277 unsigned long localmtu
= port
->rmtu
- ETH_HLEN
;
1279 if (vio_version_after_eq(&port
->vio
, 1, 3))
1280 localmtu
-= VLAN_HLEN
;
1282 if (skb
->protocol
== htons(ETH_P_IP
)) {
1284 struct rtable
*rt
= NULL
;
1286 memset(&fl4
, 0, sizeof(fl4
));
1287 fl4
.flowi4_oif
= dev
->ifindex
;
1288 fl4
.flowi4_tos
= RT_TOS(ip_hdr(skb
)->tos
);
1289 fl4
.daddr
= ip_hdr(skb
)->daddr
;
1290 fl4
.saddr
= ip_hdr(skb
)->saddr
;
1292 rt
= ip_route_output_key(dev_net(dev
), &fl4
);
1295 skb_dst_set(skb
, &rt
->dst
);
1296 icmp_send(skb
, ICMP_DEST_UNREACH
,
1301 #if IS_ENABLED(CONFIG_IPV6)
1302 else if (skb
->protocol
== htons(ETH_P_IPV6
))
1303 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, localmtu
);
1308 skb
= vnet_skb_shape(skb
, 2);
1313 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
1316 dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
1317 i
= skb_get_queue_mapping(skb
);
1318 txq
= netdev_get_tx_queue(dev
, i
);
1319 if (unlikely(vnet_tx_dring_avail(dr
) < 1)) {
1320 if (!netif_tx_queue_stopped(txq
)) {
1321 netif_tx_stop_queue(txq
);
1323 /* This is a hard error, log it. */
1324 netdev_err(dev
, "BUG! Tx Ring full when queue awake!\n");
1325 dev
->stats
.tx_errors
++;
1328 return NETDEV_TX_BUSY
;
1331 d
= vio_dring_cur(dr
);
1335 freeskbs
= vnet_clean_tx_ring(port
, &pending
);
1337 BUG_ON(port
->tx_bufs
[txi
].skb
);
1343 err
= vnet_skb_map(port
->vio
.lp
, skb
, port
->tx_bufs
[txi
].cookies
, 2,
1344 (LDC_MAP_SHADOW
| LDC_MAP_DIRECT
| LDC_MAP_RW
));
1346 netdev_info(dev
, "tx buffer map error %d\n", err
);
1350 port
->tx_bufs
[txi
].skb
= skb
;
1352 port
->tx_bufs
[txi
].ncookies
= err
;
1354 /* We don't rely on the ACKs to free the skb in vnet_start_xmit(),
1355 * thus it is safe to not set VIO_ACK_ENABLE for each transmission:
1356 * the protocol itself does not require it as long as the peer
1357 * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED.
1359 * An ACK for every packet in the ring is expensive as the
1360 * sending of LDC messages is slow and affects performance.
1362 d
->hdr
.ack
= VIO_ACK_DISABLE
;
1364 d
->ncookies
= port
->tx_bufs
[txi
].ncookies
;
1365 for (i
= 0; i
< d
->ncookies
; i
++)
1366 d
->cookies
[i
] = port
->tx_bufs
[txi
].cookies
[i
];
1367 if (vio_version_after_eq(&port
->vio
, 1, 7)) {
1368 struct vio_net_dext
*dext
= vio_net_ext(d
);
1370 memset(dext
, 0, sizeof(*dext
));
1371 if (skb_is_gso(port
->tx_bufs
[txi
].skb
)) {
1372 dext
->ipv4_lso_mss
= skb_shinfo(port
->tx_bufs
[txi
].skb
)
1374 dext
->flags
|= VNET_PKT_IPV4_LSO
;
1376 if (vio_version_after_eq(&port
->vio
, 1, 8) &&
1377 !port
->switch_port
) {
1378 dext
->flags
|= VNET_PKT_HCK_IPV4_HDRCKSUM_OK
;
1379 dext
->flags
|= VNET_PKT_HCK_FULLCKSUM_OK
;
1383 /* This has to be a non-SMP write barrier because we are writing
1384 * to memory which is shared with the peer LDOM.
1388 d
->hdr
.state
= VIO_DESC_READY
;
1390 /* Exactly one ldc "start" trigger (for dr->cons) needs to be sent
1391 * to notify the consumer that some descriptors are READY.
1392 * After that "start" trigger, no additional triggers are needed until
1393 * a DRING_STOPPED is received from the consumer. The dr->cons field
1394 * (set up by vnet_ack()) has the value of the next dring index
1395 * that has not yet been ack-ed. We send a "start" trigger here
1396 * if, and only if, start_cons is true (reset it afterward). Conversely,
1397 * vnet_ack() should check if the dring corresponding to cons
1398 * is marked READY, but start_cons was false.
1399 * If so, vnet_ack() should send out the missed "start" trigger.
1401 * Note that the wmb() above makes sure the cookies et al. are
1402 * not globally visible before the VIO_DESC_READY, and that the
1403 * stores are ordered correctly by the compiler. The consumer will
1404 * not proceed until the VIO_DESC_READY is visible assuring that
1405 * the consumer does not observe anything related to descriptors
1406 * out of order. The HV trap from the LDC start trigger is the
1407 * producer to consumer announcement that work is available to the
1410 if (!port
->start_cons
)
1411 goto ldc_start_done
; /* previous trigger suffices */
1413 err
= __vnet_tx_trigger(port
, dr
->cons
);
1414 if (unlikely(err
< 0)) {
1415 netdev_info(dev
, "TX trigger error %d\n", err
);
1416 d
->hdr
.state
= VIO_DESC_FREE
;
1417 dev
->stats
.tx_carrier_errors
++;
1422 port
->start_cons
= false;
1424 dev
->stats
.tx_packets
++;
1425 dev
->stats
.tx_bytes
+= port
->tx_bufs
[txi
].skb
->len
;
1427 dr
->prod
= (dr
->prod
+ 1) & (VNET_TX_RING_SIZE
- 1);
1428 if (unlikely(vnet_tx_dring_avail(dr
) < 1)) {
1429 netif_tx_stop_queue(txq
);
1430 if (vnet_tx_dring_avail(dr
) > VNET_TX_WAKEUP_THRESH(dr
))
1431 netif_tx_wake_queue(txq
);
1434 (void)mod_timer(&port
->clean_timer
, jiffies
+ VNET_CLEAN_TIMEOUT
);
1437 vnet_free_skbs(freeskbs
);
1439 return NETDEV_TX_OK
;
1443 (void)mod_timer(&port
->clean_timer
,
1444 jiffies
+ VNET_CLEAN_TIMEOUT
);
1446 del_timer(&port
->clean_timer
);
1451 vnet_free_skbs(freeskbs
);
1452 dev
->stats
.tx_dropped
++;
1453 return NETDEV_TX_OK
;
1456 static void vnet_tx_timeout(struct net_device
*dev
)
1458 /* XXX Implement me XXX */
1461 static int vnet_open(struct net_device
*dev
)
1463 netif_carrier_on(dev
);
1464 netif_tx_start_all_queues(dev
);
1469 static int vnet_close(struct net_device
*dev
)
1471 netif_tx_stop_all_queues(dev
);
1472 netif_carrier_off(dev
);
1477 static struct vnet_mcast_entry
*__vnet_mc_find(struct vnet
*vp
, u8
*addr
)
1479 struct vnet_mcast_entry
*m
;
1481 for (m
= vp
->mcast_list
; m
; m
= m
->next
) {
1482 if (ether_addr_equal(m
->addr
, addr
))
1488 static void __update_mc_list(struct vnet
*vp
, struct net_device
*dev
)
1490 struct netdev_hw_addr
*ha
;
1492 netdev_for_each_mc_addr(ha
, dev
) {
1493 struct vnet_mcast_entry
*m
;
1495 m
= __vnet_mc_find(vp
, ha
->addr
);
1502 m
= kzalloc(sizeof(*m
), GFP_ATOMIC
);
1505 memcpy(m
->addr
, ha
->addr
, ETH_ALEN
);
1508 m
->next
= vp
->mcast_list
;
1514 static void __send_mc_list(struct vnet
*vp
, struct vnet_port
*port
)
1516 struct vio_net_mcast_info info
;
1517 struct vnet_mcast_entry
*m
, **pp
;
1520 memset(&info
, 0, sizeof(info
));
1522 info
.tag
.type
= VIO_TYPE_CTRL
;
1523 info
.tag
.stype
= VIO_SUBTYPE_INFO
;
1524 info
.tag
.stype_env
= VNET_MCAST_INFO
;
1525 info
.tag
.sid
= vio_send_sid(&port
->vio
);
1529 for (m
= vp
->mcast_list
; m
; m
= m
->next
) {
1533 memcpy(&info
.mcast_addr
[n_addrs
* ETH_ALEN
],
1535 if (++n_addrs
== VNET_NUM_MCAST
) {
1536 info
.count
= n_addrs
;
1538 (void) vio_ldc_send(&port
->vio
, &info
,
1544 info
.count
= n_addrs
;
1545 (void) vio_ldc_send(&port
->vio
, &info
, sizeof(info
));
1551 pp
= &vp
->mcast_list
;
1552 while ((m
= *pp
) != NULL
) {
1559 memcpy(&info
.mcast_addr
[n_addrs
* ETH_ALEN
],
1561 if (++n_addrs
== VNET_NUM_MCAST
) {
1562 info
.count
= n_addrs
;
1563 (void) vio_ldc_send(&port
->vio
, &info
,
1572 info
.count
= n_addrs
;
1573 (void) vio_ldc_send(&port
->vio
, &info
, sizeof(info
));
1577 static void vnet_set_rx_mode(struct net_device
*dev
)
1579 struct vnet
*vp
= netdev_priv(dev
);
1580 struct vnet_port
*port
;
1583 list_for_each_entry_rcu(port
, &vp
->port_list
, list
) {
1585 if (port
->switch_port
) {
1586 __update_mc_list(vp
, dev
);
1587 __send_mc_list(vp
, port
);
1594 static int vnet_change_mtu(struct net_device
*dev
, int new_mtu
)
1596 if (new_mtu
< 68 || new_mtu
> 65535)
1603 static int vnet_set_mac_addr(struct net_device
*dev
, void *p
)
1608 static void vnet_get_drvinfo(struct net_device
*dev
,
1609 struct ethtool_drvinfo
*info
)
1611 strlcpy(info
->driver
, DRV_MODULE_NAME
, sizeof(info
->driver
));
1612 strlcpy(info
->version
, DRV_MODULE_VERSION
, sizeof(info
->version
));
1615 static u32
vnet_get_msglevel(struct net_device
*dev
)
1617 struct vnet
*vp
= netdev_priv(dev
);
1618 return vp
->msg_enable
;
1621 static void vnet_set_msglevel(struct net_device
*dev
, u32 value
)
1623 struct vnet
*vp
= netdev_priv(dev
);
1624 vp
->msg_enable
= value
;
1627 static const struct ethtool_ops vnet_ethtool_ops
= {
1628 .get_drvinfo
= vnet_get_drvinfo
,
1629 .get_msglevel
= vnet_get_msglevel
,
1630 .set_msglevel
= vnet_set_msglevel
,
1631 .get_link
= ethtool_op_get_link
,
1634 static void vnet_port_free_tx_bufs(struct vnet_port
*port
)
1636 struct vio_dring_state
*dr
;
1639 dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
1641 if (dr
->base
== NULL
)
1644 for (i
= 0; i
< VNET_TX_RING_SIZE
; i
++) {
1645 struct vio_net_desc
*d
;
1646 void *skb
= port
->tx_bufs
[i
].skb
;
1651 d
= vio_dring_entry(dr
, i
);
1653 ldc_unmap(port
->vio
.lp
,
1654 port
->tx_bufs
[i
].cookies
,
1655 port
->tx_bufs
[i
].ncookies
);
1657 port
->tx_bufs
[i
].skb
= NULL
;
1658 d
->hdr
.state
= VIO_DESC_FREE
;
1660 ldc_free_exp_dring(port
->vio
.lp
, dr
->base
,
1661 (dr
->entry_size
* dr
->num_entries
),
1662 dr
->cookies
, dr
->ncookies
);
1665 dr
->num_entries
= 0;
1670 static void vnet_port_reset(struct vnet_port
*port
)
1672 del_timer(&port
->clean_timer
);
1673 vnet_port_free_tx_bufs(port
);
1679 static int vnet_port_alloc_tx_ring(struct vnet_port
*port
)
1681 struct vio_dring_state
*dr
;
1682 unsigned long len
, elen
;
1683 int i
, err
, ncookies
;
1686 dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
1688 elen
= sizeof(struct vio_net_desc
) +
1689 sizeof(struct ldc_trans_cookie
) * 2;
1690 if (vio_version_after_eq(&port
->vio
, 1, 7))
1691 elen
+= sizeof(struct vio_net_dext
);
1692 len
= VNET_TX_RING_SIZE
* elen
;
1694 ncookies
= VIO_MAX_RING_COOKIES
;
1695 dring
= ldc_alloc_exp_dring(port
->vio
.lp
, len
,
1696 dr
->cookies
, &ncookies
,
1700 if (IS_ERR(dring
)) {
1701 err
= PTR_ERR(dring
);
1706 dr
->entry_size
= elen
;
1707 dr
->num_entries
= VNET_TX_RING_SIZE
;
1708 dr
->prod
= dr
->cons
= 0;
1709 port
->start_cons
= true; /* need an initial trigger */
1710 dr
->pending
= VNET_TX_RING_SIZE
;
1711 dr
->ncookies
= ncookies
;
1713 for (i
= 0; i
< VNET_TX_RING_SIZE
; ++i
) {
1714 struct vio_net_desc
*d
;
1716 d
= vio_dring_entry(dr
, i
);
1717 d
->hdr
.state
= VIO_DESC_FREE
;
1722 vnet_port_free_tx_bufs(port
);
1727 #ifdef CONFIG_NET_POLL_CONTROLLER
1728 static void vnet_poll_controller(struct net_device
*dev
)
1730 struct vnet
*vp
= netdev_priv(dev
);
1731 struct vnet_port
*port
;
1732 unsigned long flags
;
1734 spin_lock_irqsave(&vp
->lock
, flags
);
1735 if (!list_empty(&vp
->port_list
)) {
1736 port
= list_entry(vp
->port_list
.next
, struct vnet_port
, list
);
1737 napi_schedule(&port
->napi
);
1739 spin_unlock_irqrestore(&vp
->lock
, flags
);
1742 static LIST_HEAD(vnet_list
);
1743 static DEFINE_MUTEX(vnet_list_mutex
);
1745 static const struct net_device_ops vnet_ops
= {
1746 .ndo_open
= vnet_open
,
1747 .ndo_stop
= vnet_close
,
1748 .ndo_set_rx_mode
= vnet_set_rx_mode
,
1749 .ndo_set_mac_address
= vnet_set_mac_addr
,
1750 .ndo_validate_addr
= eth_validate_addr
,
1751 .ndo_tx_timeout
= vnet_tx_timeout
,
1752 .ndo_change_mtu
= vnet_change_mtu
,
1753 .ndo_start_xmit
= vnet_start_xmit
,
1754 .ndo_select_queue
= vnet_select_queue
,
1755 #ifdef CONFIG_NET_POLL_CONTROLLER
1756 .ndo_poll_controller
= vnet_poll_controller
,
1760 static struct vnet
*vnet_new(const u64
*local_mac
)
1762 struct net_device
*dev
;
1766 dev
= alloc_etherdev_mqs(sizeof(*vp
), VNET_MAX_TXQS
, 1);
1768 return ERR_PTR(-ENOMEM
);
1769 dev
->needed_headroom
= VNET_PACKET_SKIP
+ 8;
1770 dev
->needed_tailroom
= 8;
1772 for (i
= 0; i
< ETH_ALEN
; i
++)
1773 dev
->dev_addr
[i
] = (*local_mac
>> (5 - i
) * 8) & 0xff;
1775 vp
= netdev_priv(dev
);
1777 spin_lock_init(&vp
->lock
);
1780 INIT_LIST_HEAD(&vp
->port_list
);
1781 for (i
= 0; i
< VNET_PORT_HASH_SIZE
; i
++)
1782 INIT_HLIST_HEAD(&vp
->port_hash
[i
]);
1783 INIT_LIST_HEAD(&vp
->list
);
1784 vp
->local_mac
= *local_mac
;
1786 dev
->netdev_ops
= &vnet_ops
;
1787 dev
->ethtool_ops
= &vnet_ethtool_ops
;
1788 dev
->watchdog_timeo
= VNET_TX_TIMEOUT
;
1790 dev
->hw_features
= NETIF_F_TSO
| NETIF_F_GSO
| NETIF_F_GSO_SOFTWARE
|
1791 NETIF_F_HW_CSUM
| NETIF_F_SG
;
1792 dev
->features
= dev
->hw_features
;
1794 err
= register_netdev(dev
);
1796 pr_err("Cannot register net device, aborting\n");
1797 goto err_out_free_dev
;
1800 netdev_info(dev
, "Sun LDOM vnet %pM\n", dev
->dev_addr
);
1802 list_add(&vp
->list
, &vnet_list
);
1809 return ERR_PTR(err
);
1812 static struct vnet
*vnet_find_or_create(const u64
*local_mac
)
1814 struct vnet
*iter
, *vp
;
1816 mutex_lock(&vnet_list_mutex
);
1818 list_for_each_entry(iter
, &vnet_list
, list
) {
1819 if (iter
->local_mac
== *local_mac
) {
1825 vp
= vnet_new(local_mac
);
1826 mutex_unlock(&vnet_list_mutex
);
1831 static void vnet_cleanup(void)
1834 struct net_device
*dev
;
1836 mutex_lock(&vnet_list_mutex
);
1837 while (!list_empty(&vnet_list
)) {
1838 vp
= list_first_entry(&vnet_list
, struct vnet
, list
);
1839 list_del(&vp
->list
);
1841 /* vio_unregister_driver() should have cleaned up port_list */
1842 BUG_ON(!list_empty(&vp
->port_list
));
1843 unregister_netdev(dev
);
1846 mutex_unlock(&vnet_list_mutex
);
1849 static const char *local_mac_prop
= "local-mac-address";
1851 static struct vnet
*vnet_find_parent(struct mdesc_handle
*hp
,
1854 const u64
*local_mac
= NULL
;
1857 mdesc_for_each_arc(a
, hp
, port_node
, MDESC_ARC_TYPE_BACK
) {
1858 u64 target
= mdesc_arc_target(hp
, a
);
1861 name
= mdesc_get_property(hp
, target
, "name", NULL
);
1862 if (!name
|| strcmp(name
, "network"))
1865 local_mac
= mdesc_get_property(hp
, target
,
1866 local_mac_prop
, NULL
);
1871 return ERR_PTR(-ENODEV
);
1873 return vnet_find_or_create(local_mac
);
1876 static struct ldc_channel_config vnet_ldc_cfg
= {
1877 .event
= vnet_event
,
1879 .mode
= LDC_MODE_UNRELIABLE
,
1882 static struct vio_driver_ops vnet_vio_ops
= {
1883 .send_attr
= vnet_send_attr
,
1884 .handle_attr
= vnet_handle_attr
,
1885 .handshake_complete
= vnet_handshake_complete
,
1888 static void print_version(void)
1890 printk_once(KERN_INFO
"%s", version
);
1893 const char *remote_macaddr_prop
= "remote-mac-address";
1896 vnet_port_add_txq(struct vnet_port
*port
)
1898 struct vnet
*vp
= port
->vp
;
1902 n
= n
& (VNET_MAX_TXQS
- 1);
1904 netif_tx_wake_queue(netdev_get_tx_queue(vp
->dev
, port
->q_index
));
1908 vnet_port_rm_txq(struct vnet_port
*port
)
1911 netif_tx_stop_queue(netdev_get_tx_queue(port
->vp
->dev
, port
->q_index
));
1914 static int vnet_port_probe(struct vio_dev
*vdev
, const struct vio_device_id
*id
)
1916 struct mdesc_handle
*hp
;
1917 struct vnet_port
*port
;
1918 unsigned long flags
;
1921 int len
, i
, err
, switch_port
;
1927 vp
= vnet_find_parent(hp
, vdev
->mp
);
1929 pr_err("Cannot find port parent vnet\n");
1931 goto err_out_put_mdesc
;
1934 rmac
= mdesc_get_property(hp
, vdev
->mp
, remote_macaddr_prop
, &len
);
1937 pr_err("Port lacks %s property\n", remote_macaddr_prop
);
1938 goto err_out_put_mdesc
;
1941 port
= kzalloc(sizeof(*port
), GFP_KERNEL
);
1944 goto err_out_put_mdesc
;
1946 for (i
= 0; i
< ETH_ALEN
; i
++)
1947 port
->raddr
[i
] = (*rmac
>> (5 - i
) * 8) & 0xff;
1951 err
= vio_driver_init(&port
->vio
, vdev
, VDEV_NETWORK
,
1952 vnet_versions
, ARRAY_SIZE(vnet_versions
),
1953 &vnet_vio_ops
, vp
->dev
->name
);
1955 goto err_out_free_port
;
1957 err
= vio_ldc_alloc(&port
->vio
, &vnet_ldc_cfg
, port
);
1959 goto err_out_free_port
;
1961 netif_napi_add(port
->vp
->dev
, &port
->napi
, vnet_poll
, NAPI_POLL_WEIGHT
);
1963 INIT_HLIST_NODE(&port
->hash
);
1964 INIT_LIST_HEAD(&port
->list
);
1967 if (mdesc_get_property(hp
, vdev
->mp
, "switch-port", NULL
) != NULL
)
1969 port
->switch_port
= switch_port
;
1973 spin_lock_irqsave(&vp
->lock
, flags
);
1975 list_add_rcu(&port
->list
, &vp
->port_list
);
1977 list_add_tail_rcu(&port
->list
, &vp
->port_list
);
1978 hlist_add_head_rcu(&port
->hash
,
1979 &vp
->port_hash
[vnet_hashfn(port
->raddr
)]);
1980 vnet_port_add_txq(port
);
1981 spin_unlock_irqrestore(&vp
->lock
, flags
);
1983 dev_set_drvdata(&vdev
->dev
, port
);
1985 pr_info("%s: PORT ( remote-mac %pM%s )\n",
1986 vp
->dev
->name
, port
->raddr
, switch_port
? " switch-port" : "");
1988 setup_timer(&port
->clean_timer
, vnet_clean_timer_expire
,
1989 (unsigned long)port
);
1991 napi_enable(&port
->napi
);
1992 vio_port_up(&port
->vio
);
2006 static int vnet_port_remove(struct vio_dev
*vdev
)
2008 struct vnet_port
*port
= dev_get_drvdata(&vdev
->dev
);
2012 del_timer_sync(&port
->vio
.timer
);
2014 napi_disable(&port
->napi
);
2016 list_del_rcu(&port
->list
);
2017 hlist_del_rcu(&port
->hash
);
2020 del_timer_sync(&port
->clean_timer
);
2021 vnet_port_rm_txq(port
);
2022 netif_napi_del(&port
->napi
);
2023 vnet_port_free_tx_bufs(port
);
2024 vio_ldc_free(&port
->vio
);
2026 dev_set_drvdata(&vdev
->dev
, NULL
);
2034 static const struct vio_device_id vnet_port_match
[] = {
2036 .type
= "vnet-port",
2040 MODULE_DEVICE_TABLE(vio
, vnet_port_match
);
2042 static struct vio_driver vnet_port_driver
= {
2043 .id_table
= vnet_port_match
,
2044 .probe
= vnet_port_probe
,
2045 .remove
= vnet_port_remove
,
2046 .name
= "vnet_port",
2049 static int __init
vnet_init(void)
2051 return vio_register_driver(&vnet_port_driver
);
2054 static void __exit
vnet_exit(void)
2056 vio_unregister_driver(&vnet_port_driver
);
2060 module_init(vnet_init
);
2061 module_exit(vnet_exit
);