1 /* sunvnet.c: Sun LDOM Virtual Network Driver.
3 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8 #include <linux/module.h>
9 #include <linux/kernel.h>
10 #include <linux/types.h>
11 #include <linux/slab.h>
12 #include <linux/delay.h>
13 #include <linux/init.h>
14 #include <linux/netdevice.h>
15 #include <linux/ethtool.h>
16 #include <linux/etherdevice.h>
17 #include <linux/mutex.h>
18 #include <linux/highmem.h>
19 #include <linux/if_vlan.h>
21 #if IS_ENABLED(CONFIG_IPV6)
22 #include <linux/icmpv6.h>
27 #include <net/route.h>
34 #define DRV_MODULE_NAME "sunvnet"
35 #define DRV_MODULE_VERSION "1.0"
36 #define DRV_MODULE_RELDATE "June 25, 2007"
38 static char version
[] =
39 DRV_MODULE_NAME
".c:v" DRV_MODULE_VERSION
" (" DRV_MODULE_RELDATE
")\n";
40 MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
41 MODULE_DESCRIPTION("Sun LDOM virtual network driver");
42 MODULE_LICENSE("GPL");
43 MODULE_VERSION(DRV_MODULE_VERSION
);
45 #define VNET_MAX_TXQS 16
47 /* Heuristic for the number of times to exponentially backoff and
48 * retry sending an LDC trigger when EAGAIN is encountered
50 #define VNET_MAX_RETRIES 10
52 static int __vnet_tx_trigger(struct vnet_port
*port
, u32 start
);
53 static void vnet_port_reset(struct vnet_port
*port
);
55 /* Ordered from largest major to lowest */
56 static struct vio_version vnet_versions
[] = {
57 { .major
= 1, .minor
= 8 },
58 { .major
= 1, .minor
= 7 },
59 { .major
= 1, .minor
= 6 },
60 { .major
= 1, .minor
= 0 },
63 static inline u32
vnet_tx_dring_avail(struct vio_dring_state
*dr
)
65 return vio_dring_avail(dr
, VNET_TX_RING_SIZE
);
68 static int vnet_handle_unknown(struct vnet_port
*port
, void *arg
)
70 struct vio_msg_tag
*pkt
= arg
;
72 pr_err("Received unknown msg [%02x:%02x:%04x:%08x]\n",
73 pkt
->type
, pkt
->stype
, pkt
->stype_env
, pkt
->sid
);
74 pr_err("Resetting connection\n");
76 ldc_disconnect(port
->vio
.lp
);
81 static int vnet_port_alloc_tx_ring(struct vnet_port
*port
);
83 static int vnet_send_attr(struct vio_driver_state
*vio
)
85 struct vnet_port
*port
= to_vnet_port(vio
);
86 struct net_device
*dev
= port
->vp
->dev
;
87 struct vio_net_attr_info pkt
;
88 int framelen
= ETH_FRAME_LEN
;
91 err
= vnet_port_alloc_tx_ring(to_vnet_port(vio
));
95 memset(&pkt
, 0, sizeof(pkt
));
96 pkt
.tag
.type
= VIO_TYPE_CTRL
;
97 pkt
.tag
.stype
= VIO_SUBTYPE_INFO
;
98 pkt
.tag
.stype_env
= VIO_ATTR_INFO
;
99 pkt
.tag
.sid
= vio_send_sid(vio
);
100 if (vio_version_before(vio
, 1, 2))
101 pkt
.xfer_mode
= VIO_DRING_MODE
;
103 pkt
.xfer_mode
= VIO_NEW_DRING_MODE
;
104 pkt
.addr_type
= VNET_ADDR_ETHERMAC
;
106 for (i
= 0; i
< 6; i
++)
107 pkt
.addr
|= (u64
)dev
->dev_addr
[i
] << ((5 - i
) * 8);
108 if (vio_version_after(vio
, 1, 3)) {
110 port
->rmtu
= min(VNET_MAXPACKET
, port
->rmtu
);
111 pkt
.mtu
= port
->rmtu
;
113 port
->rmtu
= VNET_MAXPACKET
;
114 pkt
.mtu
= port
->rmtu
;
116 if (vio_version_after_eq(vio
, 1, 6))
117 pkt
.options
= VIO_TX_DRING
;
118 } else if (vio_version_before(vio
, 1, 3)) {
121 pkt
.mtu
= framelen
+ VLAN_HLEN
;
125 if (vio_version_after_eq(vio
, 1, 7) && port
->tso
) {
126 pkt
.cflags
|= VNET_LSO_IPV4_CAPAB
;
128 port
->tsolen
= VNET_MAXTSO
;
129 pkt
.ipv4_lso_maxlen
= port
->tsolen
;
132 pkt
.plnk_updt
= PHYSLINK_UPDATE_NONE
;
134 viodbg(HS
, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
135 "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
136 "cflags[0x%04x] lso_max[%u]\n",
137 pkt
.xfer_mode
, pkt
.addr_type
,
138 (unsigned long long)pkt
.addr
,
139 pkt
.ack_freq
, pkt
.plnk_updt
, pkt
.options
,
140 (unsigned long long)pkt
.mtu
, pkt
.cflags
, pkt
.ipv4_lso_maxlen
);
143 return vio_ldc_send(vio
, &pkt
, sizeof(pkt
));
146 static int handle_attr_info(struct vio_driver_state
*vio
,
147 struct vio_net_attr_info
*pkt
)
149 struct vnet_port
*port
= to_vnet_port(vio
);
153 viodbg(HS
, "GOT NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
154 "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
155 " (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
156 pkt
->xfer_mode
, pkt
->addr_type
,
157 (unsigned long long)pkt
->addr
,
158 pkt
->ack_freq
, pkt
->plnk_updt
, pkt
->options
,
159 (unsigned long long)pkt
->mtu
, port
->rmtu
, pkt
->cflags
,
160 pkt
->ipv4_lso_maxlen
);
162 pkt
->tag
.sid
= vio_send_sid(vio
);
164 xfer_mode
= pkt
->xfer_mode
;
165 /* for version < 1.2, VIO_DRING_MODE = 0x3 and no bitmask */
166 if (vio_version_before(vio
, 1, 2) && xfer_mode
== VIO_DRING_MODE
)
167 xfer_mode
= VIO_NEW_DRING_MODE
;
170 * < v1.3 - ETH_FRAME_LEN exactly
171 * > v1.3 - MIN(pkt.mtu, VNET_MAXPACKET, port->rmtu) and change
173 * = v1.3 - ETH_FRAME_LEN + VLAN_HLEN exactly
175 if (vio_version_before(vio
, 1, 3)) {
176 localmtu
= ETH_FRAME_LEN
;
177 } else if (vio_version_after(vio
, 1, 3)) {
178 localmtu
= port
->rmtu
? port
->rmtu
: VNET_MAXPACKET
;
179 localmtu
= min(pkt
->mtu
, localmtu
);
182 localmtu
= ETH_FRAME_LEN
+ VLAN_HLEN
;
184 port
->rmtu
= localmtu
;
186 /* LSO negotiation */
187 if (vio_version_after_eq(vio
, 1, 7))
188 port
->tso
&= !!(pkt
->cflags
& VNET_LSO_IPV4_CAPAB
);
193 port
->tsolen
= VNET_MAXTSO
;
194 port
->tsolen
= min(port
->tsolen
, pkt
->ipv4_lso_maxlen
);
195 if (port
->tsolen
< VNET_MINTSO
) {
198 pkt
->cflags
&= ~VNET_LSO_IPV4_CAPAB
;
200 pkt
->ipv4_lso_maxlen
= port
->tsolen
;
202 pkt
->cflags
&= ~VNET_LSO_IPV4_CAPAB
;
203 pkt
->ipv4_lso_maxlen
= 0;
206 /* for version >= 1.6, ACK packet mode we support */
207 if (vio_version_after_eq(vio
, 1, 6)) {
208 pkt
->xfer_mode
= VIO_NEW_DRING_MODE
;
209 pkt
->options
= VIO_TX_DRING
;
212 if (!(xfer_mode
| VIO_NEW_DRING_MODE
) ||
213 pkt
->addr_type
!= VNET_ADDR_ETHERMAC
||
214 pkt
->mtu
!= localmtu
) {
215 viodbg(HS
, "SEND NET ATTR NACK\n");
217 pkt
->tag
.stype
= VIO_SUBTYPE_NACK
;
219 (void) vio_ldc_send(vio
, pkt
, sizeof(*pkt
));
223 viodbg(HS
, "SEND NET ATTR ACK xmode[0x%x] atype[0x%x] "
224 "addr[%llx] ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] "
225 "mtu[%llu] (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
226 pkt
->xfer_mode
, pkt
->addr_type
,
227 (unsigned long long)pkt
->addr
,
228 pkt
->ack_freq
, pkt
->plnk_updt
, pkt
->options
,
229 (unsigned long long)pkt
->mtu
, port
->rmtu
, pkt
->cflags
,
230 pkt
->ipv4_lso_maxlen
);
232 pkt
->tag
.stype
= VIO_SUBTYPE_ACK
;
234 return vio_ldc_send(vio
, pkt
, sizeof(*pkt
));
239 static int handle_attr_ack(struct vio_driver_state
*vio
,
240 struct vio_net_attr_info
*pkt
)
242 viodbg(HS
, "GOT NET ATTR ACK\n");
247 static int handle_attr_nack(struct vio_driver_state
*vio
,
248 struct vio_net_attr_info
*pkt
)
250 viodbg(HS
, "GOT NET ATTR NACK\n");
255 static int vnet_handle_attr(struct vio_driver_state
*vio
, void *arg
)
257 struct vio_net_attr_info
*pkt
= arg
;
259 switch (pkt
->tag
.stype
) {
260 case VIO_SUBTYPE_INFO
:
261 return handle_attr_info(vio
, pkt
);
263 case VIO_SUBTYPE_ACK
:
264 return handle_attr_ack(vio
, pkt
);
266 case VIO_SUBTYPE_NACK
:
267 return handle_attr_nack(vio
, pkt
);
274 static void vnet_handshake_complete(struct vio_driver_state
*vio
)
276 struct vio_dring_state
*dr
;
278 dr
= &vio
->drings
[VIO_DRIVER_RX_RING
];
279 dr
->snd_nxt
= dr
->rcv_nxt
= 1;
281 dr
= &vio
->drings
[VIO_DRIVER_TX_RING
];
282 dr
->snd_nxt
= dr
->rcv_nxt
= 1;
285 /* The hypervisor interface that implements copying to/from imported
286 * memory from another domain requires that copies are done to 8-byte
287 * aligned buffers, and that the lengths of such copies are also 8-byte
290 * So we align skb->data to an 8-byte multiple and pad-out the data
291 * area so we can round the copy length up to the next multiple of
294 * The transmitter puts the actual start of the packet 6 bytes into
295 * the buffer it sends over, so that the IP headers after the ethernet
296 * header are aligned properly. These 6 bytes are not in the descriptor
297 * length, they are simply implied. This offset is represented using
298 * the VNET_PACKET_SKIP macro.
300 static struct sk_buff
*alloc_and_align_skb(struct net_device
*dev
,
303 struct sk_buff
*skb
= netdev_alloc_skb(dev
, len
+VNET_PACKET_SKIP
+8+8);
304 unsigned long addr
, off
;
309 addr
= (unsigned long) skb
->data
;
310 off
= ((addr
+ 7UL) & ~7UL) - addr
;
312 skb_reserve(skb
, off
);
317 static inline void vnet_fullcsum(struct sk_buff
*skb
)
319 struct iphdr
*iph
= ip_hdr(skb
);
320 int offset
= skb_transport_offset(skb
);
322 if (skb
->protocol
!= htons(ETH_P_IP
))
324 if (iph
->protocol
!= IPPROTO_TCP
&&
325 iph
->protocol
!= IPPROTO_UDP
)
327 skb
->ip_summed
= CHECKSUM_NONE
;
330 if (iph
->protocol
== IPPROTO_TCP
) {
331 struct tcphdr
*ptcp
= tcp_hdr(skb
);
334 skb
->csum
= skb_checksum(skb
, offset
, skb
->len
- offset
, 0);
335 ptcp
->check
= csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
336 skb
->len
- offset
, IPPROTO_TCP
,
338 } else if (iph
->protocol
== IPPROTO_UDP
) {
339 struct udphdr
*pudp
= udp_hdr(skb
);
342 skb
->csum
= skb_checksum(skb
, offset
, skb
->len
- offset
, 0);
343 pudp
->check
= csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
344 skb
->len
- offset
, IPPROTO_UDP
,
349 static int vnet_rx_one(struct vnet_port
*port
, struct vio_net_desc
*desc
)
351 struct net_device
*dev
= port
->vp
->dev
;
352 unsigned int len
= desc
->size
;
353 unsigned int copy_len
;
359 if (port
->tso
&& port
->tsolen
> port
->rmtu
)
360 maxlen
= port
->tsolen
;
363 if (unlikely(len
< ETH_ZLEN
|| len
> maxlen
)) {
364 dev
->stats
.rx_length_errors
++;
368 skb
= alloc_and_align_skb(dev
, len
);
370 if (unlikely(!skb
)) {
371 dev
->stats
.rx_missed_errors
++;
375 copy_len
= (len
+ VNET_PACKET_SKIP
+ 7U) & ~7U;
376 skb_put(skb
, copy_len
);
377 err
= ldc_copy(port
->vio
.lp
, LDC_COPY_IN
,
378 skb
->data
, copy_len
, 0,
379 desc
->cookies
, desc
->ncookies
);
380 if (unlikely(err
< 0)) {
381 dev
->stats
.rx_frame_errors
++;
385 skb_pull(skb
, VNET_PACKET_SKIP
);
387 skb
->protocol
= eth_type_trans(skb
, dev
);
389 if (vio_version_after_eq(&port
->vio
, 1, 8)) {
390 struct vio_net_dext
*dext
= vio_net_ext(desc
);
392 skb_reset_network_header(skb
);
394 if (dext
->flags
& VNET_PKT_HCK_IPV4_HDRCKSUM
) {
395 if (skb
->protocol
== ETH_P_IP
) {
396 struct iphdr
*iph
= ip_hdr(skb
);
402 if ((dext
->flags
& VNET_PKT_HCK_FULLCKSUM
) &&
403 skb
->ip_summed
== CHECKSUM_NONE
) {
404 if (skb
->protocol
== htons(ETH_P_IP
)) {
405 struct iphdr
*iph
= ip_hdr(skb
);
406 int ihl
= iph
->ihl
* 4;
408 skb_reset_transport_header(skb
);
409 skb_set_transport_header(skb
, ihl
);
413 if (dext
->flags
& VNET_PKT_HCK_IPV4_HDRCKSUM_OK
) {
414 skb
->ip_summed
= CHECKSUM_PARTIAL
;
416 if (dext
->flags
& VNET_PKT_HCK_FULLCKSUM_OK
)
421 skb
->ip_summed
= port
->switch_port
? CHECKSUM_NONE
: CHECKSUM_PARTIAL
;
423 dev
->stats
.rx_packets
++;
424 dev
->stats
.rx_bytes
+= len
;
425 napi_gro_receive(&port
->napi
, skb
);
432 dev
->stats
.rx_dropped
++;
436 static int vnet_send_ack(struct vnet_port
*port
, struct vio_dring_state
*dr
,
437 u32 start
, u32 end
, u8 vio_dring_state
)
439 struct vio_dring_data hdr
= {
441 .type
= VIO_TYPE_DATA
,
442 .stype
= VIO_SUBTYPE_ACK
,
443 .stype_env
= VIO_DRING_DATA
,
444 .sid
= vio_send_sid(&port
->vio
),
446 .dring_ident
= dr
->ident
,
449 .state
= vio_dring_state
,
454 hdr
.seq
= dr
->snd_nxt
;
457 err
= vio_ldc_send(&port
->vio
, &hdr
, sizeof(hdr
));
463 if ((delay
<<= 1) > 128)
465 if (retries
++ > VNET_MAX_RETRIES
) {
466 pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n",
467 port
->raddr
[0], port
->raddr
[1],
468 port
->raddr
[2], port
->raddr
[3],
469 port
->raddr
[4], port
->raddr
[5]);
472 } while (err
== -EAGAIN
);
474 if (err
<= 0 && vio_dring_state
== VIO_DRING_STOPPED
) {
475 port
->stop_rx_idx
= end
;
476 port
->stop_rx
= true;
478 port
->stop_rx_idx
= 0;
479 port
->stop_rx
= false;
485 static struct vio_net_desc
*get_rx_desc(struct vnet_port
*port
,
486 struct vio_dring_state
*dr
,
489 struct vio_net_desc
*desc
= port
->vio
.desc_buf
;
492 err
= ldc_get_dring_entry(port
->vio
.lp
, desc
, dr
->entry_size
,
493 (index
* dr
->entry_size
),
494 dr
->cookies
, dr
->ncookies
);
501 static int put_rx_desc(struct vnet_port
*port
,
502 struct vio_dring_state
*dr
,
503 struct vio_net_desc
*desc
,
508 err
= ldc_put_dring_entry(port
->vio
.lp
, desc
, dr
->entry_size
,
509 (index
* dr
->entry_size
),
510 dr
->cookies
, dr
->ncookies
);
517 static int vnet_walk_rx_one(struct vnet_port
*port
,
518 struct vio_dring_state
*dr
,
519 u32 index
, int *needs_ack
)
521 struct vio_net_desc
*desc
= get_rx_desc(port
, dr
, index
);
522 struct vio_driver_state
*vio
= &port
->vio
;
525 BUG_ON(desc
== NULL
);
527 return PTR_ERR(desc
);
529 if (desc
->hdr
.state
!= VIO_DESC_READY
)
534 viodbg(DATA
, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%llx:%llx]\n",
535 desc
->hdr
.state
, desc
->hdr
.ack
,
536 desc
->size
, desc
->ncookies
,
537 desc
->cookies
[0].cookie_addr
,
538 desc
->cookies
[0].cookie_size
);
540 err
= vnet_rx_one(port
, desc
);
541 if (err
== -ECONNRESET
)
543 desc
->hdr
.state
= VIO_DESC_DONE
;
544 err
= put_rx_desc(port
, dr
, desc
, index
);
547 *needs_ack
= desc
->hdr
.ack
;
551 static int vnet_walk_rx(struct vnet_port
*port
, struct vio_dring_state
*dr
,
552 u32 start
, u32 end
, int *npkts
, int budget
)
554 struct vio_driver_state
*vio
= &port
->vio
;
555 int ack_start
= -1, ack_end
= -1;
556 bool send_ack
= true;
558 end
= (end
== (u32
) -1) ? vio_dring_prev(dr
, start
)
559 : vio_dring_next(dr
, end
);
561 viodbg(DATA
, "vnet_walk_rx start[%08x] end[%08x]\n", start
, end
);
563 while (start
!= end
) {
564 int ack
= 0, err
= vnet_walk_rx_one(port
, dr
, start
, &ack
);
565 if (err
== -ECONNRESET
)
573 start
= vio_dring_next(dr
, start
);
574 if (ack
&& start
!= end
) {
575 err
= vnet_send_ack(port
, dr
, ack_start
, ack_end
,
577 if (err
== -ECONNRESET
)
581 if ((*npkts
) >= budget
) {
586 if (unlikely(ack_start
== -1))
587 ack_start
= ack_end
= vio_dring_prev(dr
, start
);
589 port
->napi_resume
= false;
590 return vnet_send_ack(port
, dr
, ack_start
, ack_end
,
593 port
->napi_resume
= true;
594 port
->napi_stop_idx
= ack_end
;
599 static int vnet_rx(struct vnet_port
*port
, void *msgbuf
, int *npkts
,
602 struct vio_dring_data
*pkt
= msgbuf
;
603 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_RX_RING
];
604 struct vio_driver_state
*vio
= &port
->vio
;
606 viodbg(DATA
, "vnet_rx stype_env[%04x] seq[%016llx] rcv_nxt[%016llx]\n",
607 pkt
->tag
.stype_env
, pkt
->seq
, dr
->rcv_nxt
);
609 if (unlikely(pkt
->tag
.stype_env
!= VIO_DRING_DATA
))
611 if (unlikely(pkt
->seq
!= dr
->rcv_nxt
)) {
612 pr_err("RX out of sequence seq[0x%llx] rcv_nxt[0x%llx]\n",
613 pkt
->seq
, dr
->rcv_nxt
);
617 if (!port
->napi_resume
)
620 /* XXX Validate pkt->start_idx and pkt->end_idx XXX */
622 return vnet_walk_rx(port
, dr
, pkt
->start_idx
, pkt
->end_idx
,
626 static int idx_is_pending(struct vio_dring_state
*dr
, u32 end
)
631 while (idx
!= dr
->prod
) {
636 idx
= vio_dring_next(dr
, idx
);
641 static int vnet_ack(struct vnet_port
*port
, void *msgbuf
)
643 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
644 struct vio_dring_data
*pkt
= msgbuf
;
645 struct net_device
*dev
;
648 struct vio_net_desc
*desc
;
649 struct netdev_queue
*txq
;
651 if (unlikely(pkt
->tag
.stype_env
!= VIO_DRING_DATA
))
658 if (unlikely(!idx_is_pending(dr
, end
))) {
659 netif_tx_unlock(dev
);
663 /* sync for race conditions with vnet_start_xmit() and tell xmit it
664 * is time to send a trigger.
666 dr
->cons
= vio_dring_next(dr
, end
);
667 desc
= vio_dring_entry(dr
, dr
->cons
);
668 if (desc
->hdr
.state
== VIO_DESC_READY
&& !port
->start_cons
) {
669 /* vnet_start_xmit() just populated this dring but missed
670 * sending the "start" LDC message to the consumer.
671 * Send a "start" trigger on its behalf.
673 if (__vnet_tx_trigger(port
, dr
->cons
) > 0)
674 port
->start_cons
= false;
676 port
->start_cons
= true;
678 port
->start_cons
= true;
680 netif_tx_unlock(dev
);
682 txq
= netdev_get_tx_queue(dev
, port
->q_index
);
683 if (unlikely(netif_tx_queue_stopped(txq
) &&
684 vnet_tx_dring_avail(dr
) >= VNET_TX_WAKEUP_THRESH(dr
)))
690 static int vnet_nack(struct vnet_port
*port
, void *msgbuf
)
692 /* XXX just reset or similar XXX */
696 static int handle_mcast(struct vnet_port
*port
, void *msgbuf
)
698 struct vio_net_mcast_info
*pkt
= msgbuf
;
700 if (pkt
->tag
.stype
!= VIO_SUBTYPE_ACK
)
701 pr_err("%s: Got unexpected MCAST reply [%02x:%02x:%04x:%08x]\n",
711 /* Got back a STOPPED LDC message on port. If the queue is stopped,
712 * wake it up so that we'll send out another START message at the
715 static void maybe_tx_wakeup(struct vnet_port
*port
)
717 struct netdev_queue
*txq
;
719 txq
= netdev_get_tx_queue(port
->vp
->dev
, port
->q_index
);
720 __netif_tx_lock(txq
, smp_processor_id());
721 if (likely(netif_tx_queue_stopped(txq
))) {
722 struct vio_dring_state
*dr
;
724 dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
725 netif_tx_wake_queue(txq
);
727 __netif_tx_unlock(txq
);
730 static inline bool port_is_up(struct vnet_port
*vnet
)
732 struct vio_driver_state
*vio
= &vnet
->vio
;
734 return !!(vio
->hs_state
& VIO_HS_COMPLETE
);
737 static int vnet_event_napi(struct vnet_port
*port
, int budget
)
739 struct vio_driver_state
*vio
= &port
->vio
;
742 int event
= (port
->rx_event
& LDC_EVENT_RESET
);
745 if (unlikely(event
== LDC_EVENT_RESET
||
746 event
== LDC_EVENT_UP
)) {
747 vio_link_state_change(vio
, event
);
749 if (event
== LDC_EVENT_RESET
) {
750 vnet_port_reset(port
);
756 /* We may have multiple LDC events in rx_event. Unroll send_events() */
757 event
= (port
->rx_event
& LDC_EVENT_UP
);
758 port
->rx_event
&= ~(LDC_EVENT_RESET
|LDC_EVENT_UP
);
759 if (event
== LDC_EVENT_UP
)
761 event
= port
->rx_event
;
762 if (!(event
& LDC_EVENT_DATA_READY
))
765 /* we dont expect any other bits than RESET, UP, DATA_READY */
766 BUG_ON(event
!= LDC_EVENT_DATA_READY
);
771 struct vio_msg_tag tag
;
775 if (port
->napi_resume
) {
776 struct vio_dring_data
*pkt
=
777 (struct vio_dring_data
*)&msgbuf
;
778 struct vio_dring_state
*dr
=
779 &port
->vio
.drings
[VIO_DRIVER_RX_RING
];
781 pkt
->tag
.type
= VIO_TYPE_DATA
;
782 pkt
->tag
.stype
= VIO_SUBTYPE_INFO
;
783 pkt
->tag
.stype_env
= VIO_DRING_DATA
;
784 pkt
->seq
= dr
->rcv_nxt
;
785 pkt
->start_idx
= vio_dring_next(dr
, port
->napi_stop_idx
);
789 err
= ldc_read(vio
->lp
, &msgbuf
, sizeof(msgbuf
));
790 if (unlikely(err
< 0)) {
791 if (err
== -ECONNRESET
)
797 viodbg(DATA
, "TAG [%02x:%02x:%04x:%08x]\n",
800 msgbuf
.tag
.stype_env
,
802 err
= vio_validate_sid(vio
, &msgbuf
.tag
);
806 if (likely(msgbuf
.tag
.type
== VIO_TYPE_DATA
)) {
807 if (msgbuf
.tag
.stype
== VIO_SUBTYPE_INFO
) {
808 if (!port_is_up(port
)) {
809 /* failures like handshake_failure()
810 * may have cleaned up dring, but
811 * NAPI polling may bring us here.
816 err
= vnet_rx(port
, &msgbuf
, &npkts
, budget
);
821 } else if (msgbuf
.tag
.stype
== VIO_SUBTYPE_ACK
) {
822 err
= vnet_ack(port
, &msgbuf
);
825 } else if (msgbuf
.tag
.stype
== VIO_SUBTYPE_NACK
) {
826 err
= vnet_nack(port
, &msgbuf
);
828 } else if (msgbuf
.tag
.type
== VIO_TYPE_CTRL
) {
829 if (msgbuf
.tag
.stype_env
== VNET_MCAST_INFO
)
830 err
= handle_mcast(port
, &msgbuf
);
832 err
= vio_control_pkt_engine(vio
, &msgbuf
);
836 err
= vnet_handle_unknown(port
, &msgbuf
);
838 if (err
== -ECONNRESET
)
841 if (unlikely(tx_wakeup
&& err
!= -ECONNRESET
))
842 maybe_tx_wakeup(port
);
846 static int vnet_poll(struct napi_struct
*napi
, int budget
)
848 struct vnet_port
*port
= container_of(napi
, struct vnet_port
, napi
);
849 struct vio_driver_state
*vio
= &port
->vio
;
850 int processed
= vnet_event_napi(port
, budget
);
852 if (processed
< budget
) {
854 port
->rx_event
&= ~LDC_EVENT_DATA_READY
;
855 vio_set_intr(vio
->vdev
->rx_ino
, HV_INTR_ENABLED
);
860 static void vnet_event(void *arg
, int event
)
862 struct vnet_port
*port
= arg
;
863 struct vio_driver_state
*vio
= &port
->vio
;
865 port
->rx_event
|= event
;
866 vio_set_intr(vio
->vdev
->rx_ino
, HV_INTR_DISABLED
);
867 napi_schedule(&port
->napi
);
871 static int __vnet_tx_trigger(struct vnet_port
*port
, u32 start
)
873 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
874 struct vio_dring_data hdr
= {
876 .type
= VIO_TYPE_DATA
,
877 .stype
= VIO_SUBTYPE_INFO
,
878 .stype_env
= VIO_DRING_DATA
,
879 .sid
= vio_send_sid(&port
->vio
),
881 .dring_ident
= dr
->ident
,
889 err
= vnet_send_ack(port
,
890 &port
->vio
.drings
[VIO_DRIVER_RX_RING
],
891 port
->stop_rx_idx
, -1,
897 hdr
.seq
= dr
->snd_nxt
;
900 err
= vio_ldc_send(&port
->vio
, &hdr
, sizeof(hdr
));
906 if ((delay
<<= 1) > 128)
908 if (retries
++ > VNET_MAX_RETRIES
)
910 } while (err
== -EAGAIN
);
915 struct vnet_port
*__tx_port_find(struct vnet
*vp
, struct sk_buff
*skb
)
917 unsigned int hash
= vnet_hashfn(skb
->data
);
918 struct hlist_head
*hp
= &vp
->port_hash
[hash
];
919 struct vnet_port
*port
;
921 hlist_for_each_entry_rcu(port
, hp
, hash
) {
922 if (!port_is_up(port
))
924 if (ether_addr_equal(port
->raddr
, skb
->data
))
927 list_for_each_entry_rcu(port
, &vp
->port_list
, list
) {
928 if (!port
->switch_port
)
930 if (!port_is_up(port
))
937 static struct sk_buff
*vnet_clean_tx_ring(struct vnet_port
*port
,
940 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
941 struct sk_buff
*skb
= NULL
;
947 for (i
= 0; i
< VNET_TX_RING_SIZE
; ++i
) {
948 struct vio_net_desc
*d
;
952 txi
= VNET_TX_RING_SIZE
-1;
954 d
= vio_dring_entry(dr
, txi
);
956 if (d
->hdr
.state
== VIO_DESC_READY
) {
960 if (port
->tx_bufs
[txi
].skb
) {
961 if (d
->hdr
.state
!= VIO_DESC_DONE
)
962 pr_notice("invalid ring buffer state %d\n",
964 BUG_ON(port
->tx_bufs
[txi
].skb
->next
);
966 port
->tx_bufs
[txi
].skb
->next
= skb
;
967 skb
= port
->tx_bufs
[txi
].skb
;
968 port
->tx_bufs
[txi
].skb
= NULL
;
970 ldc_unmap(port
->vio
.lp
,
971 port
->tx_bufs
[txi
].cookies
,
972 port
->tx_bufs
[txi
].ncookies
);
973 } else if (d
->hdr
.state
== VIO_DESC_FREE
)
975 d
->hdr
.state
= VIO_DESC_FREE
;
980 static inline void vnet_free_skbs(struct sk_buff
*skb
)
982 struct sk_buff
*next
;
992 static void vnet_clean_timer_expire(unsigned long port0
)
994 struct vnet_port
*port
= (struct vnet_port
*)port0
;
995 struct sk_buff
*freeskbs
;
998 netif_tx_lock(port
->vp
->dev
);
999 freeskbs
= vnet_clean_tx_ring(port
, &pending
);
1000 netif_tx_unlock(port
->vp
->dev
);
1002 vnet_free_skbs(freeskbs
);
1005 (void)mod_timer(&port
->clean_timer
,
1006 jiffies
+ VNET_CLEAN_TIMEOUT
);
1008 del_timer(&port
->clean_timer
);
1011 static inline int vnet_skb_map(struct ldc_channel
*lp
, struct sk_buff
*skb
,
1012 struct ldc_trans_cookie
*cookies
, int ncookies
,
1013 unsigned int map_perm
)
1015 int i
, nc
, err
, blen
;
1018 blen
= skb_headlen(skb
);
1019 if (blen
< ETH_ZLEN
)
1021 blen
+= VNET_PACKET_SKIP
;
1022 blen
+= 8 - (blen
& 7);
1024 err
= ldc_map_single(lp
, skb
->data
-VNET_PACKET_SKIP
, blen
, cookies
,
1025 ncookies
, map_perm
);
1030 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
1031 skb_frag_t
*f
= &skb_shinfo(skb
)->frags
[i
];
1034 if (nc
< ncookies
) {
1035 vaddr
= kmap_atomic(skb_frag_page(f
));
1036 blen
= skb_frag_size(f
);
1037 blen
+= 8 - (blen
& 7);
1038 err
= ldc_map_single(lp
, vaddr
+ f
->page_offset
,
1039 blen
, cookies
+ nc
, ncookies
- nc
,
1041 kunmap_atomic(vaddr
);
1047 ldc_unmap(lp
, cookies
, nc
);
1055 static inline struct sk_buff
*vnet_skb_shape(struct sk_buff
*skb
, int ncookies
)
1057 struct sk_buff
*nskb
;
1058 int i
, len
, pad
, docopy
;
1062 if (len
< ETH_ZLEN
) {
1063 pad
+= ETH_ZLEN
- skb
->len
;
1066 len
+= VNET_PACKET_SKIP
;
1067 pad
+= 8 - (len
& 7);
1069 /* make sure we have enough cookies and alignment in every frag */
1070 docopy
= skb_shinfo(skb
)->nr_frags
>= ncookies
;
1071 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
1072 skb_frag_t
*f
= &skb_shinfo(skb
)->frags
[i
];
1074 docopy
|= f
->page_offset
& 7;
1076 if (((unsigned long)skb
->data
& 7) != VNET_PACKET_SKIP
||
1077 skb_tailroom(skb
) < pad
||
1078 skb_headroom(skb
) < VNET_PACKET_SKIP
|| docopy
) {
1079 int start
= 0, offset
;
1082 len
= skb
->len
> ETH_ZLEN
? skb
->len
: ETH_ZLEN
;
1083 nskb
= alloc_and_align_skb(skb
->dev
, len
);
1088 skb_reserve(nskb
, VNET_PACKET_SKIP
);
1090 nskb
->protocol
= skb
->protocol
;
1091 offset
= skb_mac_header(skb
) - skb
->data
;
1092 skb_set_mac_header(nskb
, offset
);
1093 offset
= skb_network_header(skb
) - skb
->data
;
1094 skb_set_network_header(nskb
, offset
);
1095 offset
= skb_transport_header(skb
) - skb
->data
;
1096 skb_set_transport_header(nskb
, offset
);
1099 nskb
->csum_offset
= skb
->csum_offset
;
1100 nskb
->ip_summed
= skb
->ip_summed
;
1102 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
1103 start
= skb_checksum_start_offset(skb
);
1105 struct iphdr
*iph
= ip_hdr(nskb
);
1106 int offset
= start
+ nskb
->csum_offset
;
1108 if (skb_copy_bits(skb
, 0, nskb
->data
, start
)) {
1109 dev_kfree_skb(nskb
);
1113 *(__sum16
*)(skb
->data
+ offset
) = 0;
1114 csum
= skb_copy_and_csum_bits(skb
, start
,
1116 skb
->len
- start
, 0);
1117 if (iph
->protocol
== IPPROTO_TCP
||
1118 iph
->protocol
== IPPROTO_UDP
) {
1119 csum
= csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
1121 iph
->protocol
, csum
);
1123 *(__sum16
*)(nskb
->data
+ offset
) = csum
;
1125 nskb
->ip_summed
= CHECKSUM_NONE
;
1126 } else if (skb_copy_bits(skb
, 0, nskb
->data
, skb
->len
)) {
1127 dev_kfree_skb(nskb
);
1131 (void)skb_put(nskb
, skb
->len
);
1132 if (skb_is_gso(skb
)) {
1133 skb_shinfo(nskb
)->gso_size
= skb_shinfo(skb
)->gso_size
;
1134 skb_shinfo(nskb
)->gso_type
= skb_shinfo(skb
)->gso_type
;
1136 nskb
->queue_mapping
= skb
->queue_mapping
;
1144 vnet_select_queue(struct net_device
*dev
, struct sk_buff
*skb
,
1145 void *accel_priv
, select_queue_fallback_t fallback
)
1147 struct vnet
*vp
= netdev_priv(dev
);
1148 struct vnet_port
*port
= __tx_port_find(vp
, skb
);
1152 return port
->q_index
;
1155 static int vnet_start_xmit(struct sk_buff
*skb
, struct net_device
*dev
);
1157 static int vnet_handle_offloads(struct vnet_port
*port
, struct sk_buff
*skb
)
1159 struct net_device
*dev
= port
->vp
->dev
;
1160 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
1161 struct sk_buff
*segs
;
1162 int maclen
, datalen
;
1164 int gso_size
, gso_type
, gso_segs
;
1165 int hlen
= skb_transport_header(skb
) - skb_mac_header(skb
);
1166 int proto
= IPPROTO_IP
;
1168 if (skb
->protocol
== htons(ETH_P_IP
))
1169 proto
= ip_hdr(skb
)->protocol
;
1170 else if (skb
->protocol
== htons(ETH_P_IPV6
))
1171 proto
= ipv6_hdr(skb
)->nexthdr
;
1173 if (proto
== IPPROTO_TCP
)
1174 hlen
+= tcp_hdr(skb
)->doff
* 4;
1175 else if (proto
== IPPROTO_UDP
)
1176 hlen
+= sizeof(struct udphdr
);
1178 pr_err("vnet_handle_offloads GSO with unknown transport "
1179 "protocol %d tproto %d\n", skb
->protocol
, proto
);
1180 hlen
= 128; /* XXX */
1182 datalen
= port
->tsolen
- hlen
;
1184 gso_size
= skb_shinfo(skb
)->gso_size
;
1185 gso_type
= skb_shinfo(skb
)->gso_type
;
1186 gso_segs
= skb_shinfo(skb
)->gso_segs
;
1188 if (port
->tso
&& gso_size
< datalen
)
1189 gso_segs
= DIV_ROUND_UP(skb
->len
- hlen
, datalen
);
1191 if (unlikely(vnet_tx_dring_avail(dr
) < gso_segs
)) {
1192 struct netdev_queue
*txq
;
1194 txq
= netdev_get_tx_queue(dev
, port
->q_index
);
1195 netif_tx_stop_queue(txq
);
1196 if (vnet_tx_dring_avail(dr
) < skb_shinfo(skb
)->gso_segs
)
1197 return NETDEV_TX_BUSY
;
1198 netif_tx_wake_queue(txq
);
1201 maclen
= skb_network_header(skb
) - skb_mac_header(skb
);
1202 skb_pull(skb
, maclen
);
1204 if (port
->tso
&& gso_size
< datalen
) {
1205 if (skb_unclone(skb
, GFP_ATOMIC
))
1208 /* segment to TSO size */
1209 skb_shinfo(skb
)->gso_size
= datalen
;
1210 skb_shinfo(skb
)->gso_segs
= gso_segs
;
1212 segs
= skb_gso_segment(skb
, dev
->features
& ~NETIF_F_TSO
);
1216 skb_push(skb
, maclen
);
1217 skb_reset_mac_header(skb
);
1221 struct sk_buff
*curr
= segs
;
1225 if (port
->tso
&& curr
->len
> dev
->mtu
) {
1226 skb_shinfo(curr
)->gso_size
= gso_size
;
1227 skb_shinfo(curr
)->gso_type
= gso_type
;
1228 skb_shinfo(curr
)->gso_segs
=
1229 DIV_ROUND_UP(curr
->len
- hlen
, gso_size
);
1231 skb_shinfo(curr
)->gso_size
= 0;
1233 skb_push(curr
, maclen
);
1234 skb_reset_mac_header(curr
);
1235 memcpy(skb_mac_header(curr
), skb_mac_header(skb
),
1237 curr
->csum_start
= skb_transport_header(curr
) - curr
->head
;
1238 if (ip_hdr(curr
)->protocol
== IPPROTO_TCP
)
1239 curr
->csum_offset
= offsetof(struct tcphdr
, check
);
1240 else if (ip_hdr(curr
)->protocol
== IPPROTO_UDP
)
1241 curr
->csum_offset
= offsetof(struct udphdr
, check
);
1243 if (!(status
& NETDEV_TX_MASK
))
1244 status
= vnet_start_xmit(curr
, dev
);
1245 if (status
& NETDEV_TX_MASK
)
1246 dev_kfree_skb_any(curr
);
1249 if (!(status
& NETDEV_TX_MASK
))
1250 dev_kfree_skb_any(skb
);
1253 dev
->stats
.tx_dropped
++;
1254 dev_kfree_skb_any(skb
);
1255 return NETDEV_TX_OK
;
1258 static int vnet_start_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
1260 struct vnet
*vp
= netdev_priv(dev
);
1261 struct vnet_port
*port
= NULL
;
1262 struct vio_dring_state
*dr
;
1263 struct vio_net_desc
*d
;
1265 struct sk_buff
*freeskbs
= NULL
;
1267 unsigned pending
= 0;
1268 struct netdev_queue
*txq
;
1271 port
= __tx_port_find(vp
, skb
);
1272 if (unlikely(!port
)) {
1277 if (skb_is_gso(skb
) && skb
->len
> port
->tsolen
) {
1278 err
= vnet_handle_offloads(port
, skb
);
1283 if (!skb_is_gso(skb
) && skb
->len
> port
->rmtu
) {
1284 unsigned long localmtu
= port
->rmtu
- ETH_HLEN
;
1286 if (vio_version_after_eq(&port
->vio
, 1, 3))
1287 localmtu
-= VLAN_HLEN
;
1289 if (skb
->protocol
== htons(ETH_P_IP
)) {
1291 struct rtable
*rt
= NULL
;
1293 memset(&fl4
, 0, sizeof(fl4
));
1294 fl4
.flowi4_oif
= dev
->ifindex
;
1295 fl4
.flowi4_tos
= RT_TOS(ip_hdr(skb
)->tos
);
1296 fl4
.daddr
= ip_hdr(skb
)->daddr
;
1297 fl4
.saddr
= ip_hdr(skb
)->saddr
;
1299 rt
= ip_route_output_key(dev_net(dev
), &fl4
);
1302 skb_dst_set(skb
, &rt
->dst
);
1303 icmp_send(skb
, ICMP_DEST_UNREACH
,
1308 #if IS_ENABLED(CONFIG_IPV6)
1309 else if (skb
->protocol
== htons(ETH_P_IPV6
))
1310 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, localmtu
);
1315 skb
= vnet_skb_shape(skb
, 2);
1320 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
1323 dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
1324 i
= skb_get_queue_mapping(skb
);
1325 txq
= netdev_get_tx_queue(dev
, i
);
1326 if (unlikely(vnet_tx_dring_avail(dr
) < 1)) {
1327 if (!netif_tx_queue_stopped(txq
)) {
1328 netif_tx_stop_queue(txq
);
1330 /* This is a hard error, log it. */
1331 netdev_err(dev
, "BUG! Tx Ring full when queue awake!\n");
1332 dev
->stats
.tx_errors
++;
1335 return NETDEV_TX_BUSY
;
1338 d
= vio_dring_cur(dr
);
1342 freeskbs
= vnet_clean_tx_ring(port
, &pending
);
1344 BUG_ON(port
->tx_bufs
[txi
].skb
);
1350 err
= vnet_skb_map(port
->vio
.lp
, skb
, port
->tx_bufs
[txi
].cookies
, 2,
1351 (LDC_MAP_SHADOW
| LDC_MAP_DIRECT
| LDC_MAP_RW
));
1353 netdev_info(dev
, "tx buffer map error %d\n", err
);
1357 port
->tx_bufs
[txi
].skb
= skb
;
1359 port
->tx_bufs
[txi
].ncookies
= err
;
1361 /* We don't rely on the ACKs to free the skb in vnet_start_xmit(),
1362 * thus it is safe to not set VIO_ACK_ENABLE for each transmission:
1363 * the protocol itself does not require it as long as the peer
1364 * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED.
1366 * An ACK for every packet in the ring is expensive as the
1367 * sending of LDC messages is slow and affects performance.
1369 d
->hdr
.ack
= VIO_ACK_DISABLE
;
1371 d
->ncookies
= port
->tx_bufs
[txi
].ncookies
;
1372 for (i
= 0; i
< d
->ncookies
; i
++)
1373 d
->cookies
[i
] = port
->tx_bufs
[txi
].cookies
[i
];
1374 if (vio_version_after_eq(&port
->vio
, 1, 7)) {
1375 struct vio_net_dext
*dext
= vio_net_ext(d
);
1377 memset(dext
, 0, sizeof(*dext
));
1378 if (skb_is_gso(port
->tx_bufs
[txi
].skb
)) {
1379 dext
->ipv4_lso_mss
= skb_shinfo(port
->tx_bufs
[txi
].skb
)
1381 dext
->flags
|= VNET_PKT_IPV4_LSO
;
1383 if (vio_version_after_eq(&port
->vio
, 1, 8) &&
1384 !port
->switch_port
) {
1385 dext
->flags
|= VNET_PKT_HCK_IPV4_HDRCKSUM_OK
;
1386 dext
->flags
|= VNET_PKT_HCK_FULLCKSUM_OK
;
1390 /* This has to be a non-SMP write barrier because we are writing
1391 * to memory which is shared with the peer LDOM.
1395 d
->hdr
.state
= VIO_DESC_READY
;
1397 /* Exactly one ldc "start" trigger (for dr->cons) needs to be sent
1398 * to notify the consumer that some descriptors are READY.
1399 * After that "start" trigger, no additional triggers are needed until
1400 * a DRING_STOPPED is received from the consumer. The dr->cons field
1401 * (set up by vnet_ack()) has the value of the next dring index
1402 * that has not yet been ack-ed. We send a "start" trigger here
1403 * if, and only if, start_cons is true (reset it afterward). Conversely,
1404 * vnet_ack() should check if the dring corresponding to cons
1405 * is marked READY, but start_cons was false.
1406 * If so, vnet_ack() should send out the missed "start" trigger.
1408 * Note that the dma_wmb() above makes sure the cookies et al. are
1409 * not globally visible before the VIO_DESC_READY, and that the
1410 * stores are ordered correctly by the compiler. The consumer will
1411 * not proceed until the VIO_DESC_READY is visible assuring that
1412 * the consumer does not observe anything related to descriptors
1413 * out of order. The HV trap from the LDC start trigger is the
1414 * producer to consumer announcement that work is available to the
1417 if (!port
->start_cons
)
1418 goto ldc_start_done
; /* previous trigger suffices */
1420 err
= __vnet_tx_trigger(port
, dr
->cons
);
1421 if (unlikely(err
< 0)) {
1422 netdev_info(dev
, "TX trigger error %d\n", err
);
1423 d
->hdr
.state
= VIO_DESC_FREE
;
1424 skb
= port
->tx_bufs
[txi
].skb
;
1425 port
->tx_bufs
[txi
].skb
= NULL
;
1426 dev
->stats
.tx_carrier_errors
++;
1431 port
->start_cons
= false;
1433 dev
->stats
.tx_packets
++;
1434 dev
->stats
.tx_bytes
+= port
->tx_bufs
[txi
].skb
->len
;
1436 dr
->prod
= (dr
->prod
+ 1) & (VNET_TX_RING_SIZE
- 1);
1437 if (unlikely(vnet_tx_dring_avail(dr
) < 1)) {
1438 netif_tx_stop_queue(txq
);
1439 if (vnet_tx_dring_avail(dr
) > VNET_TX_WAKEUP_THRESH(dr
))
1440 netif_tx_wake_queue(txq
);
1443 (void)mod_timer(&port
->clean_timer
, jiffies
+ VNET_CLEAN_TIMEOUT
);
1446 vnet_free_skbs(freeskbs
);
1448 return NETDEV_TX_OK
;
1452 (void)mod_timer(&port
->clean_timer
,
1453 jiffies
+ VNET_CLEAN_TIMEOUT
);
1455 del_timer(&port
->clean_timer
);
1460 vnet_free_skbs(freeskbs
);
1461 dev
->stats
.tx_dropped
++;
1462 return NETDEV_TX_OK
;
1465 static void vnet_tx_timeout(struct net_device
*dev
)
1467 /* XXX Implement me XXX */
1470 static int vnet_open(struct net_device
*dev
)
1472 netif_carrier_on(dev
);
1473 netif_tx_start_all_queues(dev
);
1478 static int vnet_close(struct net_device
*dev
)
1480 netif_tx_stop_all_queues(dev
);
1481 netif_carrier_off(dev
);
1486 static struct vnet_mcast_entry
*__vnet_mc_find(struct vnet
*vp
, u8
*addr
)
1488 struct vnet_mcast_entry
*m
;
1490 for (m
= vp
->mcast_list
; m
; m
= m
->next
) {
1491 if (ether_addr_equal(m
->addr
, addr
))
1497 static void __update_mc_list(struct vnet
*vp
, struct net_device
*dev
)
1499 struct netdev_hw_addr
*ha
;
1501 netdev_for_each_mc_addr(ha
, dev
) {
1502 struct vnet_mcast_entry
*m
;
1504 m
= __vnet_mc_find(vp
, ha
->addr
);
1511 m
= kzalloc(sizeof(*m
), GFP_ATOMIC
);
1514 memcpy(m
->addr
, ha
->addr
, ETH_ALEN
);
1517 m
->next
= vp
->mcast_list
;
1523 static void __send_mc_list(struct vnet
*vp
, struct vnet_port
*port
)
1525 struct vio_net_mcast_info info
;
1526 struct vnet_mcast_entry
*m
, **pp
;
1529 memset(&info
, 0, sizeof(info
));
1531 info
.tag
.type
= VIO_TYPE_CTRL
;
1532 info
.tag
.stype
= VIO_SUBTYPE_INFO
;
1533 info
.tag
.stype_env
= VNET_MCAST_INFO
;
1534 info
.tag
.sid
= vio_send_sid(&port
->vio
);
1538 for (m
= vp
->mcast_list
; m
; m
= m
->next
) {
1542 memcpy(&info
.mcast_addr
[n_addrs
* ETH_ALEN
],
1544 if (++n_addrs
== VNET_NUM_MCAST
) {
1545 info
.count
= n_addrs
;
1547 (void) vio_ldc_send(&port
->vio
, &info
,
1553 info
.count
= n_addrs
;
1554 (void) vio_ldc_send(&port
->vio
, &info
, sizeof(info
));
1560 pp
= &vp
->mcast_list
;
1561 while ((m
= *pp
) != NULL
) {
1568 memcpy(&info
.mcast_addr
[n_addrs
* ETH_ALEN
],
1570 if (++n_addrs
== VNET_NUM_MCAST
) {
1571 info
.count
= n_addrs
;
1572 (void) vio_ldc_send(&port
->vio
, &info
,
1581 info
.count
= n_addrs
;
1582 (void) vio_ldc_send(&port
->vio
, &info
, sizeof(info
));
1586 static void vnet_set_rx_mode(struct net_device
*dev
)
1588 struct vnet
*vp
= netdev_priv(dev
);
1589 struct vnet_port
*port
;
1592 list_for_each_entry_rcu(port
, &vp
->port_list
, list
) {
1594 if (port
->switch_port
) {
1595 __update_mc_list(vp
, dev
);
1596 __send_mc_list(vp
, port
);
1603 static int vnet_change_mtu(struct net_device
*dev
, int new_mtu
)
1605 if (new_mtu
< 68 || new_mtu
> 65535)
1612 static int vnet_set_mac_addr(struct net_device
*dev
, void *p
)
1617 static void vnet_get_drvinfo(struct net_device
*dev
,
1618 struct ethtool_drvinfo
*info
)
1620 strlcpy(info
->driver
, DRV_MODULE_NAME
, sizeof(info
->driver
));
1621 strlcpy(info
->version
, DRV_MODULE_VERSION
, sizeof(info
->version
));
1624 static u32
vnet_get_msglevel(struct net_device
*dev
)
1626 struct vnet
*vp
= netdev_priv(dev
);
1627 return vp
->msg_enable
;
1630 static void vnet_set_msglevel(struct net_device
*dev
, u32 value
)
1632 struct vnet
*vp
= netdev_priv(dev
);
1633 vp
->msg_enable
= value
;
1636 static const struct ethtool_ops vnet_ethtool_ops
= {
1637 .get_drvinfo
= vnet_get_drvinfo
,
1638 .get_msglevel
= vnet_get_msglevel
,
1639 .set_msglevel
= vnet_set_msglevel
,
1640 .get_link
= ethtool_op_get_link
,
1643 static void vnet_port_free_tx_bufs(struct vnet_port
*port
)
1645 struct vio_dring_state
*dr
;
1648 dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
1650 if (dr
->base
== NULL
)
1653 for (i
= 0; i
< VNET_TX_RING_SIZE
; i
++) {
1654 struct vio_net_desc
*d
;
1655 void *skb
= port
->tx_bufs
[i
].skb
;
1660 d
= vio_dring_entry(dr
, i
);
1662 ldc_unmap(port
->vio
.lp
,
1663 port
->tx_bufs
[i
].cookies
,
1664 port
->tx_bufs
[i
].ncookies
);
1666 port
->tx_bufs
[i
].skb
= NULL
;
1667 d
->hdr
.state
= VIO_DESC_FREE
;
1669 ldc_free_exp_dring(port
->vio
.lp
, dr
->base
,
1670 (dr
->entry_size
* dr
->num_entries
),
1671 dr
->cookies
, dr
->ncookies
);
1674 dr
->num_entries
= 0;
1679 static void vnet_port_reset(struct vnet_port
*port
)
1681 del_timer(&port
->clean_timer
);
1682 vnet_port_free_tx_bufs(port
);
1688 static int vnet_port_alloc_tx_ring(struct vnet_port
*port
)
1690 struct vio_dring_state
*dr
;
1691 unsigned long len
, elen
;
1692 int i
, err
, ncookies
;
1695 dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
1697 elen
= sizeof(struct vio_net_desc
) +
1698 sizeof(struct ldc_trans_cookie
) * 2;
1699 if (vio_version_after_eq(&port
->vio
, 1, 7))
1700 elen
+= sizeof(struct vio_net_dext
);
1701 len
= VNET_TX_RING_SIZE
* elen
;
1703 ncookies
= VIO_MAX_RING_COOKIES
;
1704 dring
= ldc_alloc_exp_dring(port
->vio
.lp
, len
,
1705 dr
->cookies
, &ncookies
,
1709 if (IS_ERR(dring
)) {
1710 err
= PTR_ERR(dring
);
1715 dr
->entry_size
= elen
;
1716 dr
->num_entries
= VNET_TX_RING_SIZE
;
1717 dr
->prod
= dr
->cons
= 0;
1718 port
->start_cons
= true; /* need an initial trigger */
1719 dr
->pending
= VNET_TX_RING_SIZE
;
1720 dr
->ncookies
= ncookies
;
1722 for (i
= 0; i
< VNET_TX_RING_SIZE
; ++i
) {
1723 struct vio_net_desc
*d
;
1725 d
= vio_dring_entry(dr
, i
);
1726 d
->hdr
.state
= VIO_DESC_FREE
;
1731 vnet_port_free_tx_bufs(port
);
1736 #ifdef CONFIG_NET_POLL_CONTROLLER
1737 static void vnet_poll_controller(struct net_device
*dev
)
1739 struct vnet
*vp
= netdev_priv(dev
);
1740 struct vnet_port
*port
;
1741 unsigned long flags
;
1743 spin_lock_irqsave(&vp
->lock
, flags
);
1744 if (!list_empty(&vp
->port_list
)) {
1745 port
= list_entry(vp
->port_list
.next
, struct vnet_port
, list
);
1746 napi_schedule(&port
->napi
);
1748 spin_unlock_irqrestore(&vp
->lock
, flags
);
1751 static LIST_HEAD(vnet_list
);
1752 static DEFINE_MUTEX(vnet_list_mutex
);
1754 static const struct net_device_ops vnet_ops
= {
1755 .ndo_open
= vnet_open
,
1756 .ndo_stop
= vnet_close
,
1757 .ndo_set_rx_mode
= vnet_set_rx_mode
,
1758 .ndo_set_mac_address
= vnet_set_mac_addr
,
1759 .ndo_validate_addr
= eth_validate_addr
,
1760 .ndo_tx_timeout
= vnet_tx_timeout
,
1761 .ndo_change_mtu
= vnet_change_mtu
,
1762 .ndo_start_xmit
= vnet_start_xmit
,
1763 .ndo_select_queue
= vnet_select_queue
,
1764 #ifdef CONFIG_NET_POLL_CONTROLLER
1765 .ndo_poll_controller
= vnet_poll_controller
,
1769 static struct vnet
*vnet_new(const u64
*local_mac
,
1770 struct vio_dev
*vdev
)
1772 struct net_device
*dev
;
1776 dev
= alloc_etherdev_mqs(sizeof(*vp
), VNET_MAX_TXQS
, 1);
1778 return ERR_PTR(-ENOMEM
);
1779 dev
->needed_headroom
= VNET_PACKET_SKIP
+ 8;
1780 dev
->needed_tailroom
= 8;
1782 for (i
= 0; i
< ETH_ALEN
; i
++)
1783 dev
->dev_addr
[i
] = (*local_mac
>> (5 - i
) * 8) & 0xff;
1785 vp
= netdev_priv(dev
);
1787 spin_lock_init(&vp
->lock
);
1790 INIT_LIST_HEAD(&vp
->port_list
);
1791 for (i
= 0; i
< VNET_PORT_HASH_SIZE
; i
++)
1792 INIT_HLIST_HEAD(&vp
->port_hash
[i
]);
1793 INIT_LIST_HEAD(&vp
->list
);
1794 vp
->local_mac
= *local_mac
;
1796 dev
->netdev_ops
= &vnet_ops
;
1797 dev
->ethtool_ops
= &vnet_ethtool_ops
;
1798 dev
->watchdog_timeo
= VNET_TX_TIMEOUT
;
1800 dev
->hw_features
= NETIF_F_TSO
| NETIF_F_GSO
| NETIF_F_GSO_SOFTWARE
|
1801 NETIF_F_HW_CSUM
| NETIF_F_SG
;
1802 dev
->features
= dev
->hw_features
;
1804 SET_NETDEV_DEV(dev
, &vdev
->dev
);
1806 err
= register_netdev(dev
);
1808 pr_err("Cannot register net device, aborting\n");
1809 goto err_out_free_dev
;
1812 netdev_info(dev
, "Sun LDOM vnet %pM\n", dev
->dev_addr
);
1814 list_add(&vp
->list
, &vnet_list
);
1821 return ERR_PTR(err
);
1824 static struct vnet
*vnet_find_or_create(const u64
*local_mac
,
1825 struct vio_dev
*vdev
)
1827 struct vnet
*iter
, *vp
;
1829 mutex_lock(&vnet_list_mutex
);
1831 list_for_each_entry(iter
, &vnet_list
, list
) {
1832 if (iter
->local_mac
== *local_mac
) {
1838 vp
= vnet_new(local_mac
, vdev
);
1839 mutex_unlock(&vnet_list_mutex
);
1844 static void vnet_cleanup(void)
1847 struct net_device
*dev
;
1849 mutex_lock(&vnet_list_mutex
);
1850 while (!list_empty(&vnet_list
)) {
1851 vp
= list_first_entry(&vnet_list
, struct vnet
, list
);
1852 list_del(&vp
->list
);
1854 /* vio_unregister_driver() should have cleaned up port_list */
1855 BUG_ON(!list_empty(&vp
->port_list
));
1856 unregister_netdev(dev
);
1859 mutex_unlock(&vnet_list_mutex
);
1862 static const char *local_mac_prop
= "local-mac-address";
1864 static struct vnet
*vnet_find_parent(struct mdesc_handle
*hp
,
1866 struct vio_dev
*vdev
)
1868 const u64
*local_mac
= NULL
;
1871 mdesc_for_each_arc(a
, hp
, port_node
, MDESC_ARC_TYPE_BACK
) {
1872 u64 target
= mdesc_arc_target(hp
, a
);
1875 name
= mdesc_get_property(hp
, target
, "name", NULL
);
1876 if (!name
|| strcmp(name
, "network"))
1879 local_mac
= mdesc_get_property(hp
, target
,
1880 local_mac_prop
, NULL
);
1885 return ERR_PTR(-ENODEV
);
1887 return vnet_find_or_create(local_mac
, vdev
);
1890 static struct ldc_channel_config vnet_ldc_cfg
= {
1891 .event
= vnet_event
,
1893 .mode
= LDC_MODE_UNRELIABLE
,
1896 static struct vio_driver_ops vnet_vio_ops
= {
1897 .send_attr
= vnet_send_attr
,
1898 .handle_attr
= vnet_handle_attr
,
1899 .handshake_complete
= vnet_handshake_complete
,
1902 static void print_version(void)
1904 printk_once(KERN_INFO
"%s", version
);
1907 const char *remote_macaddr_prop
= "remote-mac-address";
1910 vnet_port_add_txq(struct vnet_port
*port
)
1912 struct vnet
*vp
= port
->vp
;
1916 n
= n
& (VNET_MAX_TXQS
- 1);
1918 netif_tx_wake_queue(netdev_get_tx_queue(vp
->dev
, port
->q_index
));
1922 vnet_port_rm_txq(struct vnet_port
*port
)
1925 netif_tx_stop_queue(netdev_get_tx_queue(port
->vp
->dev
, port
->q_index
));
1928 static int vnet_port_probe(struct vio_dev
*vdev
, const struct vio_device_id
*id
)
1930 struct mdesc_handle
*hp
;
1931 struct vnet_port
*port
;
1932 unsigned long flags
;
1935 int len
, i
, err
, switch_port
;
1941 vp
= vnet_find_parent(hp
, vdev
->mp
, vdev
);
1943 pr_err("Cannot find port parent vnet\n");
1945 goto err_out_put_mdesc
;
1948 rmac
= mdesc_get_property(hp
, vdev
->mp
, remote_macaddr_prop
, &len
);
1951 pr_err("Port lacks %s property\n", remote_macaddr_prop
);
1952 goto err_out_put_mdesc
;
1955 port
= kzalloc(sizeof(*port
), GFP_KERNEL
);
1958 goto err_out_put_mdesc
;
1960 for (i
= 0; i
< ETH_ALEN
; i
++)
1961 port
->raddr
[i
] = (*rmac
>> (5 - i
) * 8) & 0xff;
1965 err
= vio_driver_init(&port
->vio
, vdev
, VDEV_NETWORK
,
1966 vnet_versions
, ARRAY_SIZE(vnet_versions
),
1967 &vnet_vio_ops
, vp
->dev
->name
);
1969 goto err_out_free_port
;
1971 err
= vio_ldc_alloc(&port
->vio
, &vnet_ldc_cfg
, port
);
1973 goto err_out_free_port
;
1975 netif_napi_add(port
->vp
->dev
, &port
->napi
, vnet_poll
, NAPI_POLL_WEIGHT
);
1977 INIT_HLIST_NODE(&port
->hash
);
1978 INIT_LIST_HEAD(&port
->list
);
1981 if (mdesc_get_property(hp
, vdev
->mp
, "switch-port", NULL
) != NULL
)
1983 port
->switch_port
= switch_port
;
1987 spin_lock_irqsave(&vp
->lock
, flags
);
1989 list_add_rcu(&port
->list
, &vp
->port_list
);
1991 list_add_tail_rcu(&port
->list
, &vp
->port_list
);
1992 hlist_add_head_rcu(&port
->hash
,
1993 &vp
->port_hash
[vnet_hashfn(port
->raddr
)]);
1994 vnet_port_add_txq(port
);
1995 spin_unlock_irqrestore(&vp
->lock
, flags
);
1997 dev_set_drvdata(&vdev
->dev
, port
);
1999 pr_info("%s: PORT ( remote-mac %pM%s )\n",
2000 vp
->dev
->name
, port
->raddr
, switch_port
? " switch-port" : "");
2002 setup_timer(&port
->clean_timer
, vnet_clean_timer_expire
,
2003 (unsigned long)port
);
2005 napi_enable(&port
->napi
);
2006 vio_port_up(&port
->vio
);
2020 static int vnet_port_remove(struct vio_dev
*vdev
)
2022 struct vnet_port
*port
= dev_get_drvdata(&vdev
->dev
);
2026 del_timer_sync(&port
->vio
.timer
);
2028 napi_disable(&port
->napi
);
2030 list_del_rcu(&port
->list
);
2031 hlist_del_rcu(&port
->hash
);
2034 del_timer_sync(&port
->clean_timer
);
2035 vnet_port_rm_txq(port
);
2036 netif_napi_del(&port
->napi
);
2037 vnet_port_free_tx_bufs(port
);
2038 vio_ldc_free(&port
->vio
);
2040 dev_set_drvdata(&vdev
->dev
, NULL
);
2048 static const struct vio_device_id vnet_port_match
[] = {
2050 .type
= "vnet-port",
2054 MODULE_DEVICE_TABLE(vio
, vnet_port_match
);
2056 static struct vio_driver vnet_port_driver
= {
2057 .id_table
= vnet_port_match
,
2058 .probe
= vnet_port_probe
,
2059 .remove
= vnet_port_remove
,
2060 .name
= "vnet_port",
2063 static int __init
vnet_init(void)
2065 return vio_register_driver(&vnet_port_driver
);
2068 static void __exit
vnet_exit(void)
2070 vio_unregister_driver(&vnet_port_driver
);
2074 module_init(vnet_init
);
2075 module_exit(vnet_exit
);