2 * Copyright (c) 2009, Microsoft Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, see <http://www.gnu.org/licenses/>.
17 * Haiyang Zhang <haiyangz@microsoft.com>
18 * Hank Janssen <hjanssen@microsoft.com>
20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22 #include <linux/init.h>
23 #include <linux/atomic.h>
24 #include <linux/module.h>
25 #include <linux/highmem.h>
26 #include <linux/device.h>
28 #include <linux/delay.h>
29 #include <linux/netdevice.h>
30 #include <linux/inetdevice.h>
31 #include <linux/etherdevice.h>
32 #include <linux/skbuff.h>
33 #include <linux/if_vlan.h>
35 #include <linux/slab.h>
37 #include <net/route.h>
39 #include <net/pkt_sched.h>
41 #include "hyperv_net.h"
43 #define RING_SIZE_MIN 64
44 #define LINKCHANGE_INT (2 * HZ)
46 static int ring_size
= 128;
47 module_param(ring_size
, int, S_IRUGO
);
48 MODULE_PARM_DESC(ring_size
, "Ring buffer size (# of pages)");
50 static const u32 default_msg
= NETIF_MSG_DRV
| NETIF_MSG_PROBE
|
51 NETIF_MSG_LINK
| NETIF_MSG_IFUP
|
52 NETIF_MSG_IFDOWN
| NETIF_MSG_RX_ERR
|
55 static int debug
= -1;
56 module_param(debug
, int, S_IRUGO
);
57 MODULE_PARM_DESC(debug
, "Debug level (0=none,...,16=all)");
59 static void do_set_multicast(struct work_struct
*w
)
61 struct net_device_context
*ndevctx
=
62 container_of(w
, struct net_device_context
, work
);
63 struct hv_device
*device_obj
= ndevctx
->device_ctx
;
64 struct net_device
*ndev
= hv_get_drvdata(device_obj
);
65 struct netvsc_device
*nvdev
= ndevctx
->nvdev
;
66 struct rndis_device
*rdev
;
71 rdev
= nvdev
->extension
;
75 if (ndev
->flags
& IFF_PROMISC
)
76 rndis_filter_set_packet_filter(rdev
,
77 NDIS_PACKET_TYPE_PROMISCUOUS
);
79 rndis_filter_set_packet_filter(rdev
,
80 NDIS_PACKET_TYPE_BROADCAST
|
81 NDIS_PACKET_TYPE_ALL_MULTICAST
|
82 NDIS_PACKET_TYPE_DIRECTED
);
85 static void netvsc_set_multicast_list(struct net_device
*net
)
87 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
89 schedule_work(&net_device_ctx
->work
);
92 static int netvsc_open(struct net_device
*net
)
94 struct netvsc_device
*nvdev
= net_device_to_netvsc_device(net
);
95 struct rndis_device
*rdev
;
98 netif_carrier_off(net
);
100 /* Open up the device */
101 ret
= rndis_filter_open(nvdev
);
103 netdev_err(net
, "unable to open device (ret %d).\n", ret
);
107 netif_tx_wake_all_queues(net
);
109 rdev
= nvdev
->extension
;
110 if (!rdev
->link_state
)
111 netif_carrier_on(net
);
116 static int netvsc_close(struct net_device
*net
)
118 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
119 struct netvsc_device
*nvdev
= net_device_ctx
->nvdev
;
121 u32 aread
, awrite
, i
, msec
= 10, retry
= 0, retry_max
= 20;
122 struct vmbus_channel
*chn
;
124 netif_tx_disable(net
);
126 /* Make sure netvsc_set_multicast_list doesn't re-enable filter! */
127 cancel_work_sync(&net_device_ctx
->work
);
128 ret
= rndis_filter_close(nvdev
);
130 netdev_err(net
, "unable to close device (ret %d).\n", ret
);
134 /* Ensure pending bytes in ring are read */
137 for (i
= 0; i
< nvdev
->num_chn
; i
++) {
138 chn
= nvdev
->chan_table
[i
].channel
;
142 hv_get_ringbuffer_availbytes(&chn
->inbound
, &aread
,
148 hv_get_ringbuffer_availbytes(&chn
->outbound
, &aread
,
156 if (retry
> retry_max
|| aread
== 0)
166 netdev_err(net
, "Ring buffer not empty after closing rndis\n");
173 static void *init_ppi_data(struct rndis_message
*msg
, u32 ppi_size
,
176 struct rndis_packet
*rndis_pkt
;
177 struct rndis_per_packet_info
*ppi
;
179 rndis_pkt
= &msg
->msg
.pkt
;
180 rndis_pkt
->data_offset
+= ppi_size
;
182 ppi
= (struct rndis_per_packet_info
*)((void *)rndis_pkt
+
183 rndis_pkt
->per_pkt_info_offset
+ rndis_pkt
->per_pkt_info_len
);
185 ppi
->size
= ppi_size
;
186 ppi
->type
= pkt_type
;
187 ppi
->ppi_offset
= sizeof(struct rndis_per_packet_info
);
189 rndis_pkt
->per_pkt_info_len
+= ppi_size
;
195 * Select queue for transmit.
197 * If a valid queue has already been assigned, then use that.
198 * Otherwise compute tx queue based on hash and the send table.
200 * This is basically similar to default (__netdev_pick_tx) with the added step
201 * of using the host send_table when no other queue has been assigned.
203 * TODO support XPS - but get_xps_queue not exported
205 static u16
netvsc_select_queue(struct net_device
*ndev
, struct sk_buff
*skb
,
206 void *accel_priv
, select_queue_fallback_t fallback
)
208 struct net_device_context
*net_device_ctx
= netdev_priv(ndev
);
209 unsigned int num_tx_queues
= ndev
->real_num_tx_queues
;
210 struct sock
*sk
= skb
->sk
;
211 int q_idx
= sk_tx_queue_get(sk
);
213 if (q_idx
< 0 || skb
->ooo_okay
|| q_idx
>= num_tx_queues
) {
214 u16 hash
= __skb_tx_hash(ndev
, skb
, VRSS_SEND_TAB_SIZE
);
217 new_idx
= net_device_ctx
->tx_send_table
[hash
] % num_tx_queues
;
219 if (q_idx
!= new_idx
&& sk
&&
220 sk_fullsock(sk
) && rcu_access_pointer(sk
->sk_dst_cache
))
221 sk_tx_queue_set(sk
, new_idx
);
229 static u32
fill_pg_buf(struct page
*page
, u32 offset
, u32 len
,
230 struct hv_page_buffer
*pb
)
234 /* Deal with compund pages by ignoring unused part
237 page
+= (offset
>> PAGE_SHIFT
);
238 offset
&= ~PAGE_MASK
;
243 bytes
= PAGE_SIZE
- offset
;
246 pb
[j
].pfn
= page_to_pfn(page
);
247 pb
[j
].offset
= offset
;
253 if (offset
== PAGE_SIZE
&& len
) {
263 static u32
init_page_array(void *hdr
, u32 len
, struct sk_buff
*skb
,
264 struct hv_netvsc_packet
*packet
,
265 struct hv_page_buffer
**page_buf
)
267 struct hv_page_buffer
*pb
= *page_buf
;
269 char *data
= skb
->data
;
270 int frags
= skb_shinfo(skb
)->nr_frags
;
273 /* The packet is laid out thus:
274 * 1. hdr: RNDIS header and PPI
276 * 3. skb fragment data
279 slots_used
+= fill_pg_buf(virt_to_page(hdr
),
281 len
, &pb
[slots_used
]);
283 packet
->rmsg_size
= len
;
284 packet
->rmsg_pgcnt
= slots_used
;
286 slots_used
+= fill_pg_buf(virt_to_page(data
),
287 offset_in_page(data
),
288 skb_headlen(skb
), &pb
[slots_used
]);
290 for (i
= 0; i
< frags
; i
++) {
291 skb_frag_t
*frag
= skb_shinfo(skb
)->frags
+ i
;
293 slots_used
+= fill_pg_buf(skb_frag_page(frag
),
295 skb_frag_size(frag
), &pb
[slots_used
]);
300 static int count_skb_frag_slots(struct sk_buff
*skb
)
302 int i
, frags
= skb_shinfo(skb
)->nr_frags
;
305 for (i
= 0; i
< frags
; i
++) {
306 skb_frag_t
*frag
= skb_shinfo(skb
)->frags
+ i
;
307 unsigned long size
= skb_frag_size(frag
);
308 unsigned long offset
= frag
->page_offset
;
310 /* Skip unused frames from start of page */
311 offset
&= ~PAGE_MASK
;
312 pages
+= PFN_UP(offset
+ size
);
317 static int netvsc_get_slots(struct sk_buff
*skb
)
319 char *data
= skb
->data
;
320 unsigned int offset
= offset_in_page(data
);
321 unsigned int len
= skb_headlen(skb
);
325 slots
= DIV_ROUND_UP(offset
+ len
, PAGE_SIZE
);
326 frag_slots
= count_skb_frag_slots(skb
);
327 return slots
+ frag_slots
;
330 static u32
net_checksum_info(struct sk_buff
*skb
)
332 if (skb
->protocol
== htons(ETH_P_IP
)) {
333 struct iphdr
*ip
= ip_hdr(skb
);
335 if (ip
->protocol
== IPPROTO_TCP
)
336 return TRANSPORT_INFO_IPV4_TCP
;
337 else if (ip
->protocol
== IPPROTO_UDP
)
338 return TRANSPORT_INFO_IPV4_UDP
;
340 struct ipv6hdr
*ip6
= ipv6_hdr(skb
);
342 if (ip6
->nexthdr
== IPPROTO_TCP
)
343 return TRANSPORT_INFO_IPV6_TCP
;
344 else if (ipv6_hdr(skb
)->nexthdr
== IPPROTO_UDP
)
345 return TRANSPORT_INFO_IPV6_UDP
;
348 return TRANSPORT_INFO_NOT_IP
;
351 static int netvsc_start_xmit(struct sk_buff
*skb
, struct net_device
*net
)
353 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
354 struct hv_netvsc_packet
*packet
= NULL
;
356 unsigned int num_data_pgs
;
357 struct rndis_message
*rndis_msg
;
358 struct rndis_packet
*rndis_pkt
;
360 struct rndis_per_packet_info
*ppi
;
362 struct hv_page_buffer page_buf
[MAX_PAGE_BUFFER_COUNT
];
363 struct hv_page_buffer
*pb
= page_buf
;
365 /* We will atmost need two pages to describe the rndis
366 * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
367 * of pages in a single packet. If skb is scattered around
368 * more pages we try linearizing it.
371 num_data_pgs
= netvsc_get_slots(skb
) + 2;
373 if (unlikely(num_data_pgs
> MAX_PAGE_BUFFER_COUNT
)) {
374 ++net_device_ctx
->eth_stats
.tx_scattered
;
376 if (skb_linearize(skb
))
379 num_data_pgs
= netvsc_get_slots(skb
) + 2;
380 if (num_data_pgs
> MAX_PAGE_BUFFER_COUNT
) {
381 ++net_device_ctx
->eth_stats
.tx_too_big
;
387 * Place the rndis header in the skb head room and
388 * the skb->cb will be used for hv_netvsc_packet
391 ret
= skb_cow_head(skb
, RNDIS_AND_PPI_SIZE
);
395 /* Use the skb control buffer for building up the packet */
396 BUILD_BUG_ON(sizeof(struct hv_netvsc_packet
) >
397 FIELD_SIZEOF(struct sk_buff
, cb
));
398 packet
= (struct hv_netvsc_packet
*)skb
->cb
;
400 packet
->q_idx
= skb_get_queue_mapping(skb
);
402 packet
->total_data_buflen
= skb
->len
;
403 packet
->total_bytes
= skb
->len
;
404 packet
->total_packets
= 1;
406 rndis_msg
= (struct rndis_message
*)skb
->head
;
408 memset(rndis_msg
, 0, RNDIS_AND_PPI_SIZE
);
410 /* Add the rndis header */
411 rndis_msg
->ndis_msg_type
= RNDIS_MSG_PACKET
;
412 rndis_msg
->msg_len
= packet
->total_data_buflen
;
413 rndis_pkt
= &rndis_msg
->msg
.pkt
;
414 rndis_pkt
->data_offset
= sizeof(struct rndis_packet
);
415 rndis_pkt
->data_len
= packet
->total_data_buflen
;
416 rndis_pkt
->per_pkt_info_offset
= sizeof(struct rndis_packet
);
418 rndis_msg_size
= RNDIS_MESSAGE_SIZE(struct rndis_packet
);
420 hash
= skb_get_hash_raw(skb
);
421 if (hash
!= 0 && net
->real_num_tx_queues
> 1) {
422 rndis_msg_size
+= NDIS_HASH_PPI_SIZE
;
423 ppi
= init_ppi_data(rndis_msg
, NDIS_HASH_PPI_SIZE
,
425 *(u32
*)((void *)ppi
+ ppi
->ppi_offset
) = hash
;
428 if (skb_vlan_tag_present(skb
)) {
429 struct ndis_pkt_8021q_info
*vlan
;
431 rndis_msg_size
+= NDIS_VLAN_PPI_SIZE
;
432 ppi
= init_ppi_data(rndis_msg
, NDIS_VLAN_PPI_SIZE
,
434 vlan
= (struct ndis_pkt_8021q_info
*)((void *)ppi
+
436 vlan
->vlanid
= skb
->vlan_tci
& VLAN_VID_MASK
;
437 vlan
->pri
= (skb
->vlan_tci
& VLAN_PRIO_MASK
) >>
441 if (skb_is_gso(skb
)) {
442 struct ndis_tcp_lso_info
*lso_info
;
444 rndis_msg_size
+= NDIS_LSO_PPI_SIZE
;
445 ppi
= init_ppi_data(rndis_msg
, NDIS_LSO_PPI_SIZE
,
446 TCP_LARGESEND_PKTINFO
);
448 lso_info
= (struct ndis_tcp_lso_info
*)((void *)ppi
+
451 lso_info
->lso_v2_transmit
.type
= NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE
;
452 if (skb
->protocol
== htons(ETH_P_IP
)) {
453 lso_info
->lso_v2_transmit
.ip_version
=
454 NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4
;
455 ip_hdr(skb
)->tot_len
= 0;
456 ip_hdr(skb
)->check
= 0;
457 tcp_hdr(skb
)->check
=
458 ~csum_tcpudp_magic(ip_hdr(skb
)->saddr
,
459 ip_hdr(skb
)->daddr
, 0, IPPROTO_TCP
, 0);
461 lso_info
->lso_v2_transmit
.ip_version
=
462 NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6
;
463 ipv6_hdr(skb
)->payload_len
= 0;
464 tcp_hdr(skb
)->check
=
465 ~csum_ipv6_magic(&ipv6_hdr(skb
)->saddr
,
466 &ipv6_hdr(skb
)->daddr
, 0, IPPROTO_TCP
, 0);
468 lso_info
->lso_v2_transmit
.tcp_header_offset
= skb_transport_offset(skb
);
469 lso_info
->lso_v2_transmit
.mss
= skb_shinfo(skb
)->gso_size
;
470 } else if (skb
->ip_summed
== CHECKSUM_PARTIAL
) {
471 if (net_checksum_info(skb
) & net_device_ctx
->tx_checksum_mask
) {
472 struct ndis_tcp_ip_checksum_info
*csum_info
;
474 rndis_msg_size
+= NDIS_CSUM_PPI_SIZE
;
475 ppi
= init_ppi_data(rndis_msg
, NDIS_CSUM_PPI_SIZE
,
476 TCPIP_CHKSUM_PKTINFO
);
478 csum_info
= (struct ndis_tcp_ip_checksum_info
*)((void *)ppi
+
481 csum_info
->transmit
.tcp_header_offset
= skb_transport_offset(skb
);
483 if (skb
->protocol
== htons(ETH_P_IP
)) {
484 csum_info
->transmit
.is_ipv4
= 1;
486 if (ip_hdr(skb
)->protocol
== IPPROTO_TCP
)
487 csum_info
->transmit
.tcp_checksum
= 1;
489 csum_info
->transmit
.udp_checksum
= 1;
491 csum_info
->transmit
.is_ipv6
= 1;
493 if (ipv6_hdr(skb
)->nexthdr
== IPPROTO_TCP
)
494 csum_info
->transmit
.tcp_checksum
= 1;
496 csum_info
->transmit
.udp_checksum
= 1;
499 /* Can't do offload of this type of checksum */
500 if (skb_checksum_help(skb
))
505 /* Start filling in the page buffers with the rndis hdr */
506 rndis_msg
->msg_len
+= rndis_msg_size
;
507 packet
->total_data_buflen
= rndis_msg
->msg_len
;
508 packet
->page_buf_cnt
= init_page_array(rndis_msg
, rndis_msg_size
,
511 /* timestamp packet in software */
512 skb_tx_timestamp(skb
);
513 ret
= netvsc_send(net_device_ctx
->device_ctx
, packet
,
514 rndis_msg
, &pb
, skb
);
515 if (likely(ret
== 0))
518 if (ret
== -EAGAIN
) {
519 ++net_device_ctx
->eth_stats
.tx_busy
;
520 return NETDEV_TX_BUSY
;
524 ++net_device_ctx
->eth_stats
.tx_no_space
;
527 dev_kfree_skb_any(skb
);
528 net
->stats
.tx_dropped
++;
533 ++net_device_ctx
->eth_stats
.tx_no_memory
;
537 * netvsc_linkstatus_callback - Link up/down notification
539 void netvsc_linkstatus_callback(struct hv_device
*device_obj
,
540 struct rndis_message
*resp
)
542 struct rndis_indicate_status
*indicate
= &resp
->msg
.indicate_status
;
543 struct net_device
*net
;
544 struct net_device_context
*ndev_ctx
;
545 struct netvsc_reconfig
*event
;
548 net
= hv_get_drvdata(device_obj
);
553 ndev_ctx
= netdev_priv(net
);
555 /* Update the physical link speed when changing to another vSwitch */
556 if (indicate
->status
== RNDIS_STATUS_LINK_SPEED_CHANGE
) {
559 speed
= *(u32
*)((void *)indicate
+ indicate
->
560 status_buf_offset
) / 10000;
561 ndev_ctx
->speed
= speed
;
565 /* Handle these link change statuses below */
566 if (indicate
->status
!= RNDIS_STATUS_NETWORK_CHANGE
&&
567 indicate
->status
!= RNDIS_STATUS_MEDIA_CONNECT
&&
568 indicate
->status
!= RNDIS_STATUS_MEDIA_DISCONNECT
)
571 if (net
->reg_state
!= NETREG_REGISTERED
)
574 event
= kzalloc(sizeof(*event
), GFP_ATOMIC
);
577 event
->event
= indicate
->status
;
579 spin_lock_irqsave(&ndev_ctx
->lock
, flags
);
580 list_add_tail(&event
->list
, &ndev_ctx
->reconfig_events
);
581 spin_unlock_irqrestore(&ndev_ctx
->lock
, flags
);
583 schedule_delayed_work(&ndev_ctx
->dwork
, 0);
586 static struct sk_buff
*netvsc_alloc_recv_skb(struct net_device
*net
,
587 struct napi_struct
*napi
,
588 const struct ndis_tcp_ip_checksum_info
*csum_info
,
589 const struct ndis_pkt_8021q_info
*vlan
,
590 void *data
, u32 buflen
)
594 skb
= napi_alloc_skb(napi
, buflen
);
599 * Copy to skb. This copy is needed here since the memory pointed by
600 * hv_netvsc_packet cannot be deallocated
602 memcpy(skb_put(skb
, buflen
), data
, buflen
);
604 skb
->protocol
= eth_type_trans(skb
, net
);
606 /* skb is already created with CHECKSUM_NONE */
607 skb_checksum_none_assert(skb
);
610 * In Linux, the IP checksum is always checked.
611 * Do L4 checksum offload if enabled and present.
613 if (csum_info
&& (net
->features
& NETIF_F_RXCSUM
)) {
614 if (csum_info
->receive
.tcp_checksum_succeeded
||
615 csum_info
->receive
.udp_checksum_succeeded
)
616 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
620 u16 vlan_tci
= vlan
->vlanid
| (vlan
->pri
<< VLAN_PRIO_SHIFT
);
622 __vlan_hwaccel_put_tag(skb
, htons(ETH_P_8021Q
),
630 * netvsc_recv_callback - Callback when we receive a packet from the
631 * "wire" on the specified device.
633 int netvsc_recv_callback(struct net_device
*net
,
634 struct vmbus_channel
*channel
,
636 const struct ndis_tcp_ip_checksum_info
*csum_info
,
637 const struct ndis_pkt_8021q_info
*vlan
)
639 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
640 struct netvsc_device
*net_device
= net_device_ctx
->nvdev
;
641 u16 q_idx
= channel
->offermsg
.offer
.sub_channel_index
;
642 struct netvsc_channel
*nvchan
= &net_device
->chan_table
[q_idx
];
643 struct net_device
*vf_netdev
;
645 struct netvsc_stats
*rx_stats
;
647 if (net
->reg_state
!= NETREG_REGISTERED
)
648 return NVSP_STAT_FAIL
;
651 * If necessary, inject this packet into the VF interface.
652 * On Hyper-V, multicast and brodcast packets are only delivered
653 * to the synthetic interface (after subjecting these to
654 * policy filters on the host). Deliver these via the VF
655 * interface in the guest.
658 vf_netdev
= rcu_dereference(net_device_ctx
->vf_netdev
);
659 if (vf_netdev
&& (vf_netdev
->flags
& IFF_UP
))
662 /* Allocate a skb - TODO direct I/O to pages? */
663 skb
= netvsc_alloc_recv_skb(net
, &nvchan
->napi
,
664 csum_info
, vlan
, data
, len
);
665 if (unlikely(!skb
)) {
666 ++net
->stats
.rx_dropped
;
668 return NVSP_STAT_FAIL
;
671 if (net
!= vf_netdev
)
672 skb_record_rx_queue(skb
, q_idx
);
675 * Even if injecting the packet, record the statistics
676 * on the synthetic device because modifying the VF device
677 * statistics will not work correctly.
679 rx_stats
= &nvchan
->rx_stats
;
680 u64_stats_update_begin(&rx_stats
->syncp
);
682 rx_stats
->bytes
+= len
;
684 if (skb
->pkt_type
== PACKET_BROADCAST
)
685 ++rx_stats
->broadcast
;
686 else if (skb
->pkt_type
== PACKET_MULTICAST
)
687 ++rx_stats
->multicast
;
688 u64_stats_update_end(&rx_stats
->syncp
);
690 napi_gro_receive(&nvchan
->napi
, skb
);
696 static void netvsc_get_drvinfo(struct net_device
*net
,
697 struct ethtool_drvinfo
*info
)
699 strlcpy(info
->driver
, KBUILD_MODNAME
, sizeof(info
->driver
));
700 strlcpy(info
->fw_version
, "N/A", sizeof(info
->fw_version
));
703 static void netvsc_get_channels(struct net_device
*net
,
704 struct ethtool_channels
*channel
)
706 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
707 struct netvsc_device
*nvdev
= net_device_ctx
->nvdev
;
710 channel
->max_combined
= nvdev
->max_chn
;
711 channel
->combined_count
= nvdev
->num_chn
;
715 static int netvsc_set_queues(struct net_device
*net
, struct hv_device
*dev
,
718 struct netvsc_device_info device_info
;
721 memset(&device_info
, 0, sizeof(device_info
));
722 device_info
.num_chn
= num_chn
;
723 device_info
.ring_size
= ring_size
;
724 device_info
.max_num_vrss_chns
= num_chn
;
726 ret
= rndis_filter_device_add(dev
, &device_info
);
730 ret
= netif_set_real_num_tx_queues(net
, num_chn
);
734 ret
= netif_set_real_num_rx_queues(net
, num_chn
);
739 static int netvsc_set_channels(struct net_device
*net
,
740 struct ethtool_channels
*channels
)
742 struct net_device_context
*net_device_ctx
= netdev_priv(net
);
743 struct hv_device
*dev
= net_device_ctx
->device_ctx
;
744 struct netvsc_device
*nvdev
= net_device_ctx
->nvdev
;
745 unsigned int count
= channels
->combined_count
;
748 /* We do not support separate count for rx, tx, or other */
750 channels
->rx_count
|| channels
->tx_count
|| channels
->other_count
)
753 if (count
> net
->num_tx_queues
|| count
> net
->num_rx_queues
)
756 if (net_device_ctx
->start_remove
|| !nvdev
|| nvdev
->destroy
)
759 if (nvdev
->nvsp_version
< NVSP_PROTOCOL_VERSION_5
)
762 if (count
> nvdev
->max_chn
)
765 ret
= netvsc_close(net
);
769 net_device_ctx
->start_remove
= true;
770 rndis_filter_device_remove(dev
, nvdev
);
772 ret
= netvsc_set_queues(net
, dev
, count
);
774 nvdev
->num_chn
= count
;
776 netvsc_set_queues(net
, dev
, nvdev
->num_chn
);
779 net_device_ctx
->start_remove
= false;
781 /* We may have missed link change notifications */
782 schedule_delayed_work(&net_device_ctx
->dwork
, 0);
788 netvsc_validate_ethtool_ss_cmd(const struct ethtool_link_ksettings
*cmd
)
790 struct ethtool_link_ksettings diff1
= *cmd
;
791 struct ethtool_link_ksettings diff2
= {};
793 diff1
.base
.speed
= 0;
794 diff1
.base
.duplex
= 0;
795 /* advertising and cmd are usually set */
796 ethtool_link_ksettings_zero_link_mode(&diff1
, advertising
);
798 /* We set port to PORT_OTHER */
799 diff2
.base
.port
= PORT_OTHER
;
801 return !memcmp(&diff1
, &diff2
, sizeof(diff1
));
804 static void netvsc_init_settings(struct net_device
*dev
)
806 struct net_device_context
*ndc
= netdev_priv(dev
);
808 ndc
->speed
= SPEED_UNKNOWN
;
809 ndc
->duplex
= DUPLEX_UNKNOWN
;
812 static int netvsc_get_link_ksettings(struct net_device
*dev
,
813 struct ethtool_link_ksettings
*cmd
)
815 struct net_device_context
*ndc
= netdev_priv(dev
);
817 cmd
->base
.speed
= ndc
->speed
;
818 cmd
->base
.duplex
= ndc
->duplex
;
819 cmd
->base
.port
= PORT_OTHER
;
824 static int netvsc_set_link_ksettings(struct net_device
*dev
,
825 const struct ethtool_link_ksettings
*cmd
)
827 struct net_device_context
*ndc
= netdev_priv(dev
);
830 speed
= cmd
->base
.speed
;
831 if (!ethtool_validate_speed(speed
) ||
832 !ethtool_validate_duplex(cmd
->base
.duplex
) ||
833 !netvsc_validate_ethtool_ss_cmd(cmd
))
837 ndc
->duplex
= cmd
->base
.duplex
;
842 static int netvsc_change_mtu(struct net_device
*ndev
, int mtu
)
844 struct net_device_context
*ndevctx
= netdev_priv(ndev
);
845 struct netvsc_device
*nvdev
= ndevctx
->nvdev
;
846 struct hv_device
*hdev
= ndevctx
->device_ctx
;
847 struct netvsc_device_info device_info
;
850 if (ndevctx
->start_remove
|| !nvdev
|| nvdev
->destroy
)
853 ret
= netvsc_close(ndev
);
857 memset(&device_info
, 0, sizeof(device_info
));
858 device_info
.ring_size
= ring_size
;
859 device_info
.num_chn
= nvdev
->num_chn
;
860 device_info
.max_num_vrss_chns
= nvdev
->num_chn
;
862 ndevctx
->start_remove
= true;
863 rndis_filter_device_remove(hdev
, nvdev
);
865 /* 'nvdev' has been freed in rndis_filter_device_remove() ->
866 * netvsc_device_remove () -> free_netvsc_device().
867 * We mustn't access it before it's re-created in
868 * rndis_filter_device_add() -> netvsc_device_add().
873 rndis_filter_device_add(hdev
, &device_info
);
877 ndevctx
->start_remove
= false;
879 /* We may have missed link change notifications */
880 schedule_delayed_work(&ndevctx
->dwork
, 0);
885 static void netvsc_get_stats64(struct net_device
*net
,
886 struct rtnl_link_stats64
*t
)
888 struct net_device_context
*ndev_ctx
= netdev_priv(net
);
889 struct netvsc_device
*nvdev
= ndev_ctx
->nvdev
;
895 for (i
= 0; i
< nvdev
->num_chn
; i
++) {
896 const struct netvsc_channel
*nvchan
= &nvdev
->chan_table
[i
];
897 const struct netvsc_stats
*stats
;
898 u64 packets
, bytes
, multicast
;
901 stats
= &nvchan
->tx_stats
;
903 start
= u64_stats_fetch_begin_irq(&stats
->syncp
);
904 packets
= stats
->packets
;
905 bytes
= stats
->bytes
;
906 } while (u64_stats_fetch_retry_irq(&stats
->syncp
, start
));
908 t
->tx_bytes
+= bytes
;
909 t
->tx_packets
+= packets
;
911 stats
= &nvchan
->rx_stats
;
913 start
= u64_stats_fetch_begin_irq(&stats
->syncp
);
914 packets
= stats
->packets
;
915 bytes
= stats
->bytes
;
916 multicast
= stats
->multicast
+ stats
->broadcast
;
917 } while (u64_stats_fetch_retry_irq(&stats
->syncp
, start
));
919 t
->rx_bytes
+= bytes
;
920 t
->rx_packets
+= packets
;
921 t
->multicast
+= multicast
;
924 t
->tx_dropped
= net
->stats
.tx_dropped
;
925 t
->tx_errors
= net
->stats
.tx_errors
;
927 t
->rx_dropped
= net
->stats
.rx_dropped
;
928 t
->rx_errors
= net
->stats
.rx_errors
;
931 static int netvsc_set_mac_addr(struct net_device
*ndev
, void *p
)
933 struct sockaddr
*addr
= p
;
934 char save_adr
[ETH_ALEN
];
935 unsigned char save_aatype
;
938 memcpy(save_adr
, ndev
->dev_addr
, ETH_ALEN
);
939 save_aatype
= ndev
->addr_assign_type
;
941 err
= eth_mac_addr(ndev
, p
);
945 err
= rndis_filter_set_device_mac(ndev
, addr
->sa_data
);
947 /* roll back to saved MAC */
948 memcpy(ndev
->dev_addr
, save_adr
, ETH_ALEN
);
949 ndev
->addr_assign_type
= save_aatype
;
955 static const struct {
956 char name
[ETH_GSTRING_LEN
];
959 { "tx_scattered", offsetof(struct netvsc_ethtool_stats
, tx_scattered
) },
960 { "tx_no_memory", offsetof(struct netvsc_ethtool_stats
, tx_no_memory
) },
961 { "tx_no_space", offsetof(struct netvsc_ethtool_stats
, tx_no_space
) },
962 { "tx_too_big", offsetof(struct netvsc_ethtool_stats
, tx_too_big
) },
963 { "tx_busy", offsetof(struct netvsc_ethtool_stats
, tx_busy
) },
966 #define NETVSC_GLOBAL_STATS_LEN ARRAY_SIZE(netvsc_stats)
968 /* 4 statistics per queue (rx/tx packets/bytes) */
969 #define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4)
971 static int netvsc_get_sset_count(struct net_device
*dev
, int string_set
)
973 struct net_device_context
*ndc
= netdev_priv(dev
);
974 struct netvsc_device
*nvdev
= ndc
->nvdev
;
976 switch (string_set
) {
978 return NETVSC_GLOBAL_STATS_LEN
+ NETVSC_QUEUE_STATS_LEN(nvdev
);
984 static void netvsc_get_ethtool_stats(struct net_device
*dev
,
985 struct ethtool_stats
*stats
, u64
*data
)
987 struct net_device_context
*ndc
= netdev_priv(dev
);
988 struct netvsc_device
*nvdev
= ndc
->nvdev
;
989 const void *nds
= &ndc
->eth_stats
;
990 const struct netvsc_stats
*qstats
;
995 for (i
= 0; i
< NETVSC_GLOBAL_STATS_LEN
; i
++)
996 data
[i
] = *(unsigned long *)(nds
+ netvsc_stats
[i
].offset
);
998 for (j
= 0; j
< nvdev
->num_chn
; j
++) {
999 qstats
= &nvdev
->chan_table
[j
].tx_stats
;
1002 start
= u64_stats_fetch_begin_irq(&qstats
->syncp
);
1003 packets
= qstats
->packets
;
1004 bytes
= qstats
->bytes
;
1005 } while (u64_stats_fetch_retry_irq(&qstats
->syncp
, start
));
1006 data
[i
++] = packets
;
1009 qstats
= &nvdev
->chan_table
[j
].rx_stats
;
1011 start
= u64_stats_fetch_begin_irq(&qstats
->syncp
);
1012 packets
= qstats
->packets
;
1013 bytes
= qstats
->bytes
;
1014 } while (u64_stats_fetch_retry_irq(&qstats
->syncp
, start
));
1015 data
[i
++] = packets
;
1020 static void netvsc_get_strings(struct net_device
*dev
, u32 stringset
, u8
*data
)
1022 struct net_device_context
*ndc
= netdev_priv(dev
);
1023 struct netvsc_device
*nvdev
= ndc
->nvdev
;
1027 switch (stringset
) {
1029 for (i
= 0; i
< ARRAY_SIZE(netvsc_stats
); i
++)
1030 memcpy(p
+ i
* ETH_GSTRING_LEN
,
1031 netvsc_stats
[i
].name
, ETH_GSTRING_LEN
);
1033 p
+= i
* ETH_GSTRING_LEN
;
1034 for (i
= 0; i
< nvdev
->num_chn
; i
++) {
1035 sprintf(p
, "tx_queue_%u_packets", i
);
1036 p
+= ETH_GSTRING_LEN
;
1037 sprintf(p
, "tx_queue_%u_bytes", i
);
1038 p
+= ETH_GSTRING_LEN
;
1039 sprintf(p
, "rx_queue_%u_packets", i
);
1040 p
+= ETH_GSTRING_LEN
;
1041 sprintf(p
, "rx_queue_%u_bytes", i
);
1042 p
+= ETH_GSTRING_LEN
;
1050 netvsc_get_rss_hash_opts(struct netvsc_device
*nvdev
,
1051 struct ethtool_rxnfc
*info
)
1053 info
->data
= RXH_IP_SRC
| RXH_IP_DST
;
1055 switch (info
->flow_type
) {
1058 info
->data
|= RXH_L4_B_0_1
| RXH_L4_B_2_3
;
1074 netvsc_get_rxnfc(struct net_device
*dev
, struct ethtool_rxnfc
*info
,
1077 struct net_device_context
*ndc
= netdev_priv(dev
);
1078 struct netvsc_device
*nvdev
= ndc
->nvdev
;
1080 switch (info
->cmd
) {
1081 case ETHTOOL_GRXRINGS
:
1082 info
->data
= nvdev
->num_chn
;
1086 return netvsc_get_rss_hash_opts(nvdev
, info
);
1091 #ifdef CONFIG_NET_POLL_CONTROLLER
1092 static void netvsc_poll_controller(struct net_device
*net
)
1094 /* As netvsc_start_xmit() works synchronous we don't have to
1095 * trigger anything here.
1100 static u32
netvsc_get_rxfh_key_size(struct net_device
*dev
)
1102 return NETVSC_HASH_KEYLEN
;
1105 static u32
netvsc_rss_indir_size(struct net_device
*dev
)
1110 static int netvsc_get_rxfh(struct net_device
*dev
, u32
*indir
, u8
*key
,
1113 struct net_device_context
*ndc
= netdev_priv(dev
);
1114 struct netvsc_device
*ndev
= ndc
->nvdev
;
1115 struct rndis_device
*rndis_dev
= ndev
->extension
;
1119 *hfunc
= ETH_RSS_HASH_TOP
; /* Toeplitz */
1122 for (i
= 0; i
< ITAB_NUM
; i
++)
1123 indir
[i
] = rndis_dev
->ind_table
[i
];
1127 memcpy(key
, rndis_dev
->rss_key
, NETVSC_HASH_KEYLEN
);
1132 static int netvsc_set_rxfh(struct net_device
*dev
, const u32
*indir
,
1133 const u8
*key
, const u8 hfunc
)
1135 struct net_device_context
*ndc
= netdev_priv(dev
);
1136 struct netvsc_device
*ndev
= ndc
->nvdev
;
1137 struct rndis_device
*rndis_dev
= ndev
->extension
;
1140 if (hfunc
!= ETH_RSS_HASH_NO_CHANGE
&& hfunc
!= ETH_RSS_HASH_TOP
)
1144 for (i
= 0; i
< ITAB_NUM
; i
++)
1145 if (indir
[i
] >= dev
->num_rx_queues
)
1148 for (i
= 0; i
< ITAB_NUM
; i
++)
1149 rndis_dev
->ind_table
[i
] = indir
[i
];
1156 key
= rndis_dev
->rss_key
;
1159 return rndis_filter_set_rss_param(rndis_dev
, key
, ndev
->num_chn
);
1162 static const struct ethtool_ops ethtool_ops
= {
1163 .get_drvinfo
= netvsc_get_drvinfo
,
1164 .get_link
= ethtool_op_get_link
,
1165 .get_ethtool_stats
= netvsc_get_ethtool_stats
,
1166 .get_sset_count
= netvsc_get_sset_count
,
1167 .get_strings
= netvsc_get_strings
,
1168 .get_channels
= netvsc_get_channels
,
1169 .set_channels
= netvsc_set_channels
,
1170 .get_ts_info
= ethtool_op_get_ts_info
,
1171 .get_rxnfc
= netvsc_get_rxnfc
,
1172 .get_rxfh_key_size
= netvsc_get_rxfh_key_size
,
1173 .get_rxfh_indir_size
= netvsc_rss_indir_size
,
1174 .get_rxfh
= netvsc_get_rxfh
,
1175 .set_rxfh
= netvsc_set_rxfh
,
1176 .get_link_ksettings
= netvsc_get_link_ksettings
,
1177 .set_link_ksettings
= netvsc_set_link_ksettings
,
1180 static const struct net_device_ops device_ops
= {
1181 .ndo_open
= netvsc_open
,
1182 .ndo_stop
= netvsc_close
,
1183 .ndo_start_xmit
= netvsc_start_xmit
,
1184 .ndo_set_rx_mode
= netvsc_set_multicast_list
,
1185 .ndo_change_mtu
= netvsc_change_mtu
,
1186 .ndo_validate_addr
= eth_validate_addr
,
1187 .ndo_set_mac_address
= netvsc_set_mac_addr
,
1188 .ndo_select_queue
= netvsc_select_queue
,
1189 .ndo_get_stats64
= netvsc_get_stats64
,
1190 #ifdef CONFIG_NET_POLL_CONTROLLER
1191 .ndo_poll_controller
= netvsc_poll_controller
,
1196 * Handle link status changes. For RNDIS_STATUS_NETWORK_CHANGE emulate link
1197 * down/up sequence. In case of RNDIS_STATUS_MEDIA_CONNECT when carrier is
1198 * present send GARP packet to network peers with netif_notify_peers().
1200 static void netvsc_link_change(struct work_struct
*w
)
1202 struct net_device_context
*ndev_ctx
=
1203 container_of(w
, struct net_device_context
, dwork
.work
);
1204 struct hv_device
*device_obj
= ndev_ctx
->device_ctx
;
1205 struct net_device
*net
= hv_get_drvdata(device_obj
);
1206 struct netvsc_device
*net_device
;
1207 struct rndis_device
*rdev
;
1208 struct netvsc_reconfig
*event
= NULL
;
1209 bool notify
= false, reschedule
= false;
1210 unsigned long flags
, next_reconfig
, delay
;
1213 if (ndev_ctx
->start_remove
)
1216 net_device
= ndev_ctx
->nvdev
;
1217 rdev
= net_device
->extension
;
1219 next_reconfig
= ndev_ctx
->last_reconfig
+ LINKCHANGE_INT
;
1220 if (time_is_after_jiffies(next_reconfig
)) {
1221 /* link_watch only sends one notification with current state
1222 * per second, avoid doing reconfig more frequently. Handle
1225 delay
= next_reconfig
- jiffies
;
1226 delay
= delay
< LINKCHANGE_INT
? delay
: LINKCHANGE_INT
;
1227 schedule_delayed_work(&ndev_ctx
->dwork
, delay
);
1230 ndev_ctx
->last_reconfig
= jiffies
;
1232 spin_lock_irqsave(&ndev_ctx
->lock
, flags
);
1233 if (!list_empty(&ndev_ctx
->reconfig_events
)) {
1234 event
= list_first_entry(&ndev_ctx
->reconfig_events
,
1235 struct netvsc_reconfig
, list
);
1236 list_del(&event
->list
);
1237 reschedule
= !list_empty(&ndev_ctx
->reconfig_events
);
1239 spin_unlock_irqrestore(&ndev_ctx
->lock
, flags
);
1244 switch (event
->event
) {
1245 /* Only the following events are possible due to the check in
1246 * netvsc_linkstatus_callback()
1248 case RNDIS_STATUS_MEDIA_CONNECT
:
1249 if (rdev
->link_state
) {
1250 rdev
->link_state
= false;
1251 netif_carrier_on(net
);
1252 netif_tx_wake_all_queues(net
);
1258 case RNDIS_STATUS_MEDIA_DISCONNECT
:
1259 if (!rdev
->link_state
) {
1260 rdev
->link_state
= true;
1261 netif_carrier_off(net
);
1262 netif_tx_stop_all_queues(net
);
1266 case RNDIS_STATUS_NETWORK_CHANGE
:
1267 /* Only makes sense if carrier is present */
1268 if (!rdev
->link_state
) {
1269 rdev
->link_state
= true;
1270 netif_carrier_off(net
);
1271 netif_tx_stop_all_queues(net
);
1272 event
->event
= RNDIS_STATUS_MEDIA_CONNECT
;
1273 spin_lock_irqsave(&ndev_ctx
->lock
, flags
);
1274 list_add(&event
->list
, &ndev_ctx
->reconfig_events
);
1275 spin_unlock_irqrestore(&ndev_ctx
->lock
, flags
);
1284 netdev_notify_peers(net
);
1286 /* link_watch only sends one notification with current state per
1287 * second, handle next reconfig event in 2 seconds.
1290 schedule_delayed_work(&ndev_ctx
->dwork
, LINKCHANGE_INT
);
1298 static struct net_device
*get_netvsc_bymac(const u8
*mac
)
1300 struct net_device
*dev
;
1304 for_each_netdev(&init_net
, dev
) {
1305 if (dev
->netdev_ops
!= &device_ops
)
1306 continue; /* not a netvsc device */
1308 if (ether_addr_equal(mac
, dev
->perm_addr
))
1315 static struct net_device
*get_netvsc_byref(struct net_device
*vf_netdev
)
1317 struct net_device
*dev
;
1321 for_each_netdev(&init_net
, dev
) {
1322 struct net_device_context
*net_device_ctx
;
1324 if (dev
->netdev_ops
!= &device_ops
)
1325 continue; /* not a netvsc device */
1327 net_device_ctx
= netdev_priv(dev
);
1328 if (net_device_ctx
->nvdev
== NULL
)
1329 continue; /* device is removed */
1331 if (rtnl_dereference(net_device_ctx
->vf_netdev
) == vf_netdev
)
1332 return dev
; /* a match */
1338 static int netvsc_register_vf(struct net_device
*vf_netdev
)
1340 struct net_device
*ndev
;
1341 struct net_device_context
*net_device_ctx
;
1342 struct netvsc_device
*netvsc_dev
;
1344 if (vf_netdev
->addr_len
!= ETH_ALEN
)
1348 * We will use the MAC address to locate the synthetic interface to
1349 * associate with the VF interface. If we don't find a matching
1350 * synthetic interface, move on.
1352 ndev
= get_netvsc_bymac(vf_netdev
->perm_addr
);
1356 net_device_ctx
= netdev_priv(ndev
);
1357 netvsc_dev
= net_device_ctx
->nvdev
;
1358 if (!netvsc_dev
|| rtnl_dereference(net_device_ctx
->vf_netdev
))
1361 netdev_info(ndev
, "VF registering: %s\n", vf_netdev
->name
);
1363 * Take a reference on the module.
1365 try_module_get(THIS_MODULE
);
1367 dev_hold(vf_netdev
);
1368 rcu_assign_pointer(net_device_ctx
->vf_netdev
, vf_netdev
);
1372 static int netvsc_vf_up(struct net_device
*vf_netdev
)
1374 struct net_device
*ndev
;
1375 struct netvsc_device
*netvsc_dev
;
1376 struct net_device_context
*net_device_ctx
;
1378 ndev
= get_netvsc_byref(vf_netdev
);
1382 net_device_ctx
= netdev_priv(ndev
);
1383 netvsc_dev
= net_device_ctx
->nvdev
;
1385 netdev_info(ndev
, "VF up: %s\n", vf_netdev
->name
);
1388 * Open the device before switching data path.
1390 rndis_filter_open(netvsc_dev
);
1393 * notify the host to switch the data path.
1395 netvsc_switch_datapath(ndev
, true);
1396 netdev_info(ndev
, "Data path switched to VF: %s\n", vf_netdev
->name
);
1398 netif_carrier_off(ndev
);
1400 /* Now notify peers through VF device. */
1401 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS
, vf_netdev
);
1406 static int netvsc_vf_down(struct net_device
*vf_netdev
)
1408 struct net_device
*ndev
;
1409 struct netvsc_device
*netvsc_dev
;
1410 struct net_device_context
*net_device_ctx
;
1412 ndev
= get_netvsc_byref(vf_netdev
);
1416 net_device_ctx
= netdev_priv(ndev
);
1417 netvsc_dev
= net_device_ctx
->nvdev
;
1419 netdev_info(ndev
, "VF down: %s\n", vf_netdev
->name
);
1420 netvsc_switch_datapath(ndev
, false);
1421 netdev_info(ndev
, "Data path switched from VF: %s\n", vf_netdev
->name
);
1422 rndis_filter_close(netvsc_dev
);
1423 netif_carrier_on(ndev
);
1425 /* Now notify peers through netvsc device. */
1426 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS
, ndev
);
1431 static int netvsc_unregister_vf(struct net_device
*vf_netdev
)
1433 struct net_device
*ndev
;
1434 struct net_device_context
*net_device_ctx
;
1436 ndev
= get_netvsc_byref(vf_netdev
);
1440 net_device_ctx
= netdev_priv(ndev
);
1442 netdev_info(ndev
, "VF unregistering: %s\n", vf_netdev
->name
);
1444 RCU_INIT_POINTER(net_device_ctx
->vf_netdev
, NULL
);
1446 module_put(THIS_MODULE
);
1450 static int netvsc_probe(struct hv_device
*dev
,
1451 const struct hv_vmbus_device_id
*dev_id
)
1453 struct net_device
*net
= NULL
;
1454 struct net_device_context
*net_device_ctx
;
1455 struct netvsc_device_info device_info
;
1456 struct netvsc_device
*nvdev
;
1459 net
= alloc_etherdev_mq(sizeof(struct net_device_context
),
1464 netif_carrier_off(net
);
1466 netvsc_init_settings(net
);
1468 net_device_ctx
= netdev_priv(net
);
1469 net_device_ctx
->device_ctx
= dev
;
1470 net_device_ctx
->msg_enable
= netif_msg_init(debug
, default_msg
);
1471 if (netif_msg_probe(net_device_ctx
))
1472 netdev_dbg(net
, "netvsc msg_enable: %d\n",
1473 net_device_ctx
->msg_enable
);
1475 hv_set_drvdata(dev
, net
);
1477 net_device_ctx
->start_remove
= false;
1479 INIT_DELAYED_WORK(&net_device_ctx
->dwork
, netvsc_link_change
);
1480 INIT_WORK(&net_device_ctx
->work
, do_set_multicast
);
1482 spin_lock_init(&net_device_ctx
->lock
);
1483 INIT_LIST_HEAD(&net_device_ctx
->reconfig_events
);
1485 net
->netdev_ops
= &device_ops
;
1486 net
->ethtool_ops
= ðtool_ops
;
1487 SET_NETDEV_DEV(net
, &dev
->device
);
1489 /* We always need headroom for rndis header */
1490 net
->needed_headroom
= RNDIS_AND_PPI_SIZE
;
1492 /* Notify the netvsc driver of the new device */
1493 memset(&device_info
, 0, sizeof(device_info
));
1494 device_info
.ring_size
= ring_size
;
1495 device_info
.max_num_vrss_chns
= min_t(u32
, VRSS_CHANNEL_DEFAULT
,
1497 ret
= rndis_filter_device_add(dev
, &device_info
);
1499 netdev_err(net
, "unable to add netvsc device (ret %d)\n", ret
);
1501 hv_set_drvdata(dev
, NULL
);
1504 memcpy(net
->dev_addr
, device_info
.mac_adr
, ETH_ALEN
);
1506 /* hw_features computed in rndis_filter_device_add */
1507 net
->features
= net
->hw_features
|
1508 NETIF_F_HIGHDMA
| NETIF_F_SG
|
1509 NETIF_F_HW_VLAN_CTAG_TX
| NETIF_F_HW_VLAN_CTAG_RX
;
1510 net
->vlan_features
= net
->features
;
1512 nvdev
= net_device_ctx
->nvdev
;
1513 netif_set_real_num_tx_queues(net
, nvdev
->num_chn
);
1514 netif_set_real_num_rx_queues(net
, nvdev
->num_chn
);
1516 /* MTU range: 68 - 1500 or 65521 */
1517 net
->min_mtu
= NETVSC_MTU_MIN
;
1518 if (nvdev
->nvsp_version
>= NVSP_PROTOCOL_VERSION_2
)
1519 net
->max_mtu
= NETVSC_MTU
- ETH_HLEN
;
1521 net
->max_mtu
= ETH_DATA_LEN
;
1523 ret
= register_netdev(net
);
1525 pr_err("Unable to register netdev.\n");
1526 rndis_filter_device_remove(dev
, nvdev
);
1533 static int netvsc_remove(struct hv_device
*dev
)
1535 struct net_device
*net
;
1536 struct net_device_context
*ndev_ctx
;
1538 net
= hv_get_drvdata(dev
);
1541 dev_err(&dev
->device
, "No net device to remove\n");
1545 ndev_ctx
= netdev_priv(net
);
1547 /* Avoid racing with netvsc_change_mtu()/netvsc_set_channels()
1548 * removing the device.
1551 ndev_ctx
->start_remove
= true;
1554 cancel_delayed_work_sync(&ndev_ctx
->dwork
);
1555 cancel_work_sync(&ndev_ctx
->work
);
1557 /* Stop outbound asap */
1558 netif_tx_disable(net
);
1560 unregister_netdev(net
);
1563 * Call to the vsc driver to let it know that the device is being
1566 rndis_filter_device_remove(dev
, ndev_ctx
->nvdev
);
1568 hv_set_drvdata(dev
, NULL
);
1574 static const struct hv_vmbus_device_id id_table
[] = {
1580 MODULE_DEVICE_TABLE(vmbus
, id_table
);
1582 /* The one and only one */
1583 static struct hv_driver netvsc_drv
= {
1584 .name
= KBUILD_MODNAME
,
1585 .id_table
= id_table
,
1586 .probe
= netvsc_probe
,
1587 .remove
= netvsc_remove
,
1591 * On Hyper-V, every VF interface is matched with a corresponding
1592 * synthetic interface. The synthetic interface is presented first
1593 * to the guest. When the corresponding VF instance is registered,
1594 * we will take care of switching the data path.
1596 static int netvsc_netdev_event(struct notifier_block
*this,
1597 unsigned long event
, void *ptr
)
1599 struct net_device
*event_dev
= netdev_notifier_info_to_dev(ptr
);
1601 /* Skip our own events */
1602 if (event_dev
->netdev_ops
== &device_ops
)
1605 /* Avoid non-Ethernet type devices */
1606 if (event_dev
->type
!= ARPHRD_ETHER
)
1609 /* Avoid Vlan dev with same MAC registering as VF */
1610 if (is_vlan_dev(event_dev
))
1613 /* Avoid Bonding master dev with same MAC registering as VF */
1614 if ((event_dev
->priv_flags
& IFF_BONDING
) &&
1615 (event_dev
->flags
& IFF_MASTER
))
1619 case NETDEV_REGISTER
:
1620 return netvsc_register_vf(event_dev
);
1621 case NETDEV_UNREGISTER
:
1622 return netvsc_unregister_vf(event_dev
);
1624 return netvsc_vf_up(event_dev
);
1626 return netvsc_vf_down(event_dev
);
1632 static struct notifier_block netvsc_netdev_notifier
= {
1633 .notifier_call
= netvsc_netdev_event
,
1636 static void __exit
netvsc_drv_exit(void)
1638 unregister_netdevice_notifier(&netvsc_netdev_notifier
);
1639 vmbus_driver_unregister(&netvsc_drv
);
1642 static int __init
netvsc_drv_init(void)
1646 if (ring_size
< RING_SIZE_MIN
) {
1647 ring_size
= RING_SIZE_MIN
;
1648 pr_info("Increased ring_size to %d (min allowed)\n",
1651 ret
= vmbus_driver_register(&netvsc_drv
);
1656 register_netdevice_notifier(&netvsc_netdev_notifier
);
1660 MODULE_LICENSE("GPL");
1661 MODULE_DESCRIPTION("Microsoft Hyper-V network driver");
1663 module_init(netvsc_drv_init
);
1664 module_exit(netvsc_drv_exit
);