2 * Copyright (c) 2009, Microsoft Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, see <http://www.gnu.org/licenses/>.
17 * Haiyang Zhang <haiyangz@microsoft.com>
18 * Hank Janssen <hjanssen@microsoft.com>
20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22 #include <linux/kernel.h>
23 #include <linux/sched.h>
24 #include <linux/wait.h>
26 #include <linux/delay.h>
28 #include <linux/slab.h>
29 #include <linux/netdevice.h>
30 #include <linux/if_ether.h>
31 #include <linux/vmalloc.h>
32 #include <asm/sync_bitops.h>
34 #include "hyperv_net.h"
37 * Switch the data path from the synthetic interface to the VF
40 void netvsc_switch_datapath(struct net_device
*ndev
, bool vf
)
42 struct net_device_context
*net_device_ctx
= netdev_priv(ndev
);
43 struct hv_device
*dev
= net_device_ctx
->device_ctx
;
44 struct netvsc_device
*nv_dev
= net_device_ctx
->nvdev
;
45 struct nvsp_message
*init_pkt
= &nv_dev
->channel_init_pkt
;
47 memset(init_pkt
, 0, sizeof(struct nvsp_message
));
48 init_pkt
->hdr
.msg_type
= NVSP_MSG4_TYPE_SWITCH_DATA_PATH
;
50 init_pkt
->msg
.v4_msg
.active_dp
.active_datapath
=
53 init_pkt
->msg
.v4_msg
.active_dp
.active_datapath
=
54 NVSP_DATAPATH_SYNTHETIC
;
56 vmbus_sendpacket(dev
->channel
, init_pkt
,
57 sizeof(struct nvsp_message
),
58 (unsigned long)init_pkt
,
59 VM_PKT_DATA_INBAND
, 0);
61 net_device_ctx
->datapath
= vf
;
64 static struct netvsc_device
*alloc_net_device(void)
66 struct netvsc_device
*net_device
;
68 net_device
= kzalloc(sizeof(struct netvsc_device
), GFP_KERNEL
);
72 net_device
->chan_table
[0].mrc
.buf
73 = vzalloc(NETVSC_RECVSLOT_MAX
* sizeof(struct recv_comp_data
));
75 init_waitqueue_head(&net_device
->wait_drain
);
76 net_device
->destroy
= false;
77 atomic_set(&net_device
->open_cnt
, 0);
78 net_device
->max_pkt
= RNDIS_MAX_PKT_DEFAULT
;
79 net_device
->pkt_align
= RNDIS_PKT_ALIGN_DEFAULT
;
80 init_completion(&net_device
->channel_init_wait
);
81 init_waitqueue_head(&net_device
->subchan_open
);
86 static void free_netvsc_device(struct rcu_head
*head
)
88 struct netvsc_device
*nvdev
89 = container_of(head
, struct netvsc_device
, rcu
);
92 for (i
= 0; i
< VRSS_CHANNEL_MAX
; i
++)
93 vfree(nvdev
->chan_table
[i
].mrc
.buf
);
98 static void free_netvsc_device_rcu(struct netvsc_device
*nvdev
)
100 call_rcu(&nvdev
->rcu
, free_netvsc_device
);
103 static void netvsc_destroy_buf(struct hv_device
*device
)
105 struct nvsp_message
*revoke_packet
;
106 struct net_device
*ndev
= hv_get_drvdata(device
);
107 struct netvsc_device
*net_device
= net_device_to_netvsc_device(ndev
);
111 * If we got a section count, it means we received a
112 * SendReceiveBufferComplete msg (ie sent
113 * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
114 * to send a revoke msg here
116 if (net_device
->recv_section_cnt
) {
117 /* Send the revoke receive buffer */
118 revoke_packet
= &net_device
->revoke_packet
;
119 memset(revoke_packet
, 0, sizeof(struct nvsp_message
));
121 revoke_packet
->hdr
.msg_type
=
122 NVSP_MSG1_TYPE_REVOKE_RECV_BUF
;
123 revoke_packet
->msg
.v1_msg
.
124 revoke_recv_buf
.id
= NETVSC_RECEIVE_BUFFER_ID
;
126 ret
= vmbus_sendpacket(device
->channel
,
128 sizeof(struct nvsp_message
),
129 (unsigned long)revoke_packet
,
130 VM_PKT_DATA_INBAND
, 0);
131 /* If the failure is because the channel is rescinded;
132 * ignore the failure since we cannot send on a rescinded
133 * channel. This would allow us to properly cleanup
134 * even when the channel is rescinded.
136 if (device
->channel
->rescind
)
139 * If we failed here, we might as well return and
140 * have a leak rather than continue and a bugchk
143 netdev_err(ndev
, "unable to send "
144 "revoke receive buffer to netvsp\n");
149 /* Teardown the gpadl on the vsp end */
150 if (net_device
->recv_buf_gpadl_handle
) {
151 ret
= vmbus_teardown_gpadl(device
->channel
,
152 net_device
->recv_buf_gpadl_handle
);
154 /* If we failed here, we might as well return and have a leak
155 * rather than continue and a bugchk
159 "unable to teardown receive buffer's gpadl\n");
162 net_device
->recv_buf_gpadl_handle
= 0;
165 if (net_device
->recv_buf
) {
166 /* Free up the receive buffer */
167 vfree(net_device
->recv_buf
);
168 net_device
->recv_buf
= NULL
;
171 if (net_device
->recv_section
) {
172 net_device
->recv_section_cnt
= 0;
173 kfree(net_device
->recv_section
);
174 net_device
->recv_section
= NULL
;
177 /* Deal with the send buffer we may have setup.
178 * If we got a send section size, it means we received a
179 * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
180 * NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need
181 * to send a revoke msg here
183 if (net_device
->send_section_size
) {
184 /* Send the revoke receive buffer */
185 revoke_packet
= &net_device
->revoke_packet
;
186 memset(revoke_packet
, 0, sizeof(struct nvsp_message
));
188 revoke_packet
->hdr
.msg_type
=
189 NVSP_MSG1_TYPE_REVOKE_SEND_BUF
;
190 revoke_packet
->msg
.v1_msg
.revoke_send_buf
.id
=
191 NETVSC_SEND_BUFFER_ID
;
193 ret
= vmbus_sendpacket(device
->channel
,
195 sizeof(struct nvsp_message
),
196 (unsigned long)revoke_packet
,
197 VM_PKT_DATA_INBAND
, 0);
199 /* If the failure is because the channel is rescinded;
200 * ignore the failure since we cannot send on a rescinded
201 * channel. This would allow us to properly cleanup
202 * even when the channel is rescinded.
204 if (device
->channel
->rescind
)
207 /* If we failed here, we might as well return and
208 * have a leak rather than continue and a bugchk
211 netdev_err(ndev
, "unable to send "
212 "revoke send buffer to netvsp\n");
216 /* Teardown the gpadl on the vsp end */
217 if (net_device
->send_buf_gpadl_handle
) {
218 ret
= vmbus_teardown_gpadl(device
->channel
,
219 net_device
->send_buf_gpadl_handle
);
221 /* If we failed here, we might as well return and have a leak
222 * rather than continue and a bugchk
226 "unable to teardown send buffer's gpadl\n");
229 net_device
->send_buf_gpadl_handle
= 0;
231 if (net_device
->send_buf
) {
232 /* Free up the send buffer */
233 vfree(net_device
->send_buf
);
234 net_device
->send_buf
= NULL
;
236 kfree(net_device
->send_section_map
);
239 static int netvsc_init_buf(struct hv_device
*device
,
240 struct netvsc_device
*net_device
)
243 struct nvsp_message
*init_packet
;
244 struct net_device
*ndev
;
248 ndev
= hv_get_drvdata(device
);
250 node
= cpu_to_node(device
->channel
->target_cpu
);
251 net_device
->recv_buf
= vzalloc_node(net_device
->recv_buf_size
, node
);
252 if (!net_device
->recv_buf
)
253 net_device
->recv_buf
= vzalloc(net_device
->recv_buf_size
);
255 if (!net_device
->recv_buf
) {
256 netdev_err(ndev
, "unable to allocate receive "
257 "buffer of size %d\n", net_device
->recv_buf_size
);
263 * Establish the gpadl handle for this buffer on this
264 * channel. Note: This call uses the vmbus connection rather
265 * than the channel to establish the gpadl handle.
267 ret
= vmbus_establish_gpadl(device
->channel
, net_device
->recv_buf
,
268 net_device
->recv_buf_size
,
269 &net_device
->recv_buf_gpadl_handle
);
272 "unable to establish receive buffer's gpadl\n");
276 /* Notify the NetVsp of the gpadl handle */
277 init_packet
= &net_device
->channel_init_pkt
;
278 memset(init_packet
, 0, sizeof(struct nvsp_message
));
279 init_packet
->hdr
.msg_type
= NVSP_MSG1_TYPE_SEND_RECV_BUF
;
280 init_packet
->msg
.v1_msg
.send_recv_buf
.
281 gpadl_handle
= net_device
->recv_buf_gpadl_handle
;
282 init_packet
->msg
.v1_msg
.
283 send_recv_buf
.id
= NETVSC_RECEIVE_BUFFER_ID
;
285 /* Send the gpadl notification request */
286 ret
= vmbus_sendpacket(device
->channel
, init_packet
,
287 sizeof(struct nvsp_message
),
288 (unsigned long)init_packet
,
290 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
);
293 "unable to send receive buffer's gpadl to netvsp\n");
297 wait_for_completion(&net_device
->channel_init_wait
);
299 /* Check the response */
300 if (init_packet
->msg
.v1_msg
.
301 send_recv_buf_complete
.status
!= NVSP_STAT_SUCCESS
) {
302 netdev_err(ndev
, "Unable to complete receive buffer "
303 "initialization with NetVsp - status %d\n",
304 init_packet
->msg
.v1_msg
.
305 send_recv_buf_complete
.status
);
310 /* Parse the response */
312 net_device
->recv_section_cnt
= init_packet
->msg
.
313 v1_msg
.send_recv_buf_complete
.num_sections
;
315 net_device
->recv_section
= kmemdup(
316 init_packet
->msg
.v1_msg
.send_recv_buf_complete
.sections
,
317 net_device
->recv_section_cnt
*
318 sizeof(struct nvsp_1_receive_buffer_section
),
320 if (net_device
->recv_section
== NULL
) {
326 * For 1st release, there should only be 1 section that represents the
327 * entire receive buffer
329 if (net_device
->recv_section_cnt
!= 1 ||
330 net_device
->recv_section
->offset
!= 0) {
335 /* Now setup the send buffer.
337 net_device
->send_buf
= vzalloc_node(net_device
->send_buf_size
, node
);
338 if (!net_device
->send_buf
)
339 net_device
->send_buf
= vzalloc(net_device
->send_buf_size
);
340 if (!net_device
->send_buf
) {
341 netdev_err(ndev
, "unable to allocate send "
342 "buffer of size %d\n", net_device
->send_buf_size
);
347 /* Establish the gpadl handle for this buffer on this
348 * channel. Note: This call uses the vmbus connection rather
349 * than the channel to establish the gpadl handle.
351 ret
= vmbus_establish_gpadl(device
->channel
, net_device
->send_buf
,
352 net_device
->send_buf_size
,
353 &net_device
->send_buf_gpadl_handle
);
356 "unable to establish send buffer's gpadl\n");
360 /* Notify the NetVsp of the gpadl handle */
361 init_packet
= &net_device
->channel_init_pkt
;
362 memset(init_packet
, 0, sizeof(struct nvsp_message
));
363 init_packet
->hdr
.msg_type
= NVSP_MSG1_TYPE_SEND_SEND_BUF
;
364 init_packet
->msg
.v1_msg
.send_send_buf
.gpadl_handle
=
365 net_device
->send_buf_gpadl_handle
;
366 init_packet
->msg
.v1_msg
.send_send_buf
.id
= NETVSC_SEND_BUFFER_ID
;
368 /* Send the gpadl notification request */
369 ret
= vmbus_sendpacket(device
->channel
, init_packet
,
370 sizeof(struct nvsp_message
),
371 (unsigned long)init_packet
,
373 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
);
376 "unable to send send buffer's gpadl to netvsp\n");
380 wait_for_completion(&net_device
->channel_init_wait
);
382 /* Check the response */
383 if (init_packet
->msg
.v1_msg
.
384 send_send_buf_complete
.status
!= NVSP_STAT_SUCCESS
) {
385 netdev_err(ndev
, "Unable to complete send buffer "
386 "initialization with NetVsp - status %d\n",
387 init_packet
->msg
.v1_msg
.
388 send_send_buf_complete
.status
);
393 /* Parse the response */
394 net_device
->send_section_size
= init_packet
->msg
.
395 v1_msg
.send_send_buf_complete
.section_size
;
397 /* Section count is simply the size divided by the section size.
399 net_device
->send_section_cnt
=
400 net_device
->send_buf_size
/ net_device
->send_section_size
;
402 netdev_dbg(ndev
, "Send section size: %d, Section count:%d\n",
403 net_device
->send_section_size
, net_device
->send_section_cnt
);
405 /* Setup state for managing the send buffer. */
406 map_words
= DIV_ROUND_UP(net_device
->send_section_cnt
, BITS_PER_LONG
);
408 net_device
->send_section_map
= kcalloc(map_words
, sizeof(ulong
), GFP_KERNEL
);
409 if (net_device
->send_section_map
== NULL
) {
417 netvsc_destroy_buf(device
);
423 /* Negotiate NVSP protocol version */
424 static int negotiate_nvsp_ver(struct hv_device
*device
,
425 struct netvsc_device
*net_device
,
426 struct nvsp_message
*init_packet
,
429 struct net_device
*ndev
= hv_get_drvdata(device
);
432 memset(init_packet
, 0, sizeof(struct nvsp_message
));
433 init_packet
->hdr
.msg_type
= NVSP_MSG_TYPE_INIT
;
434 init_packet
->msg
.init_msg
.init
.min_protocol_ver
= nvsp_ver
;
435 init_packet
->msg
.init_msg
.init
.max_protocol_ver
= nvsp_ver
;
437 /* Send the init request */
438 ret
= vmbus_sendpacket(device
->channel
, init_packet
,
439 sizeof(struct nvsp_message
),
440 (unsigned long)init_packet
,
442 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
);
447 wait_for_completion(&net_device
->channel_init_wait
);
449 if (init_packet
->msg
.init_msg
.init_complete
.status
!=
453 if (nvsp_ver
== NVSP_PROTOCOL_VERSION_1
)
456 /* NVSPv2 or later: Send NDIS config */
457 memset(init_packet
, 0, sizeof(struct nvsp_message
));
458 init_packet
->hdr
.msg_type
= NVSP_MSG2_TYPE_SEND_NDIS_CONFIG
;
459 init_packet
->msg
.v2_msg
.send_ndis_config
.mtu
= ndev
->mtu
+ ETH_HLEN
;
460 init_packet
->msg
.v2_msg
.send_ndis_config
.capability
.ieee8021q
= 1;
462 if (nvsp_ver
>= NVSP_PROTOCOL_VERSION_5
) {
463 init_packet
->msg
.v2_msg
.send_ndis_config
.capability
.sriov
= 1;
465 /* Teaming bit is needed to receive link speed updates */
466 init_packet
->msg
.v2_msg
.send_ndis_config
.capability
.teaming
= 1;
469 ret
= vmbus_sendpacket(device
->channel
, init_packet
,
470 sizeof(struct nvsp_message
),
471 (unsigned long)init_packet
,
472 VM_PKT_DATA_INBAND
, 0);
477 static int netvsc_connect_vsp(struct hv_device
*device
,
478 struct netvsc_device
*net_device
)
480 const u32 ver_list
[] = {
481 NVSP_PROTOCOL_VERSION_1
, NVSP_PROTOCOL_VERSION_2
,
482 NVSP_PROTOCOL_VERSION_4
, NVSP_PROTOCOL_VERSION_5
484 struct nvsp_message
*init_packet
;
485 int ndis_version
, i
, ret
;
487 init_packet
= &net_device
->channel_init_pkt
;
489 /* Negotiate the latest NVSP protocol supported */
490 for (i
= ARRAY_SIZE(ver_list
) - 1; i
>= 0; i
--)
491 if (negotiate_nvsp_ver(device
, net_device
, init_packet
,
493 net_device
->nvsp_version
= ver_list
[i
];
502 pr_debug("Negotiated NVSP version:%x\n", net_device
->nvsp_version
);
504 /* Send the ndis version */
505 memset(init_packet
, 0, sizeof(struct nvsp_message
));
507 if (net_device
->nvsp_version
<= NVSP_PROTOCOL_VERSION_4
)
508 ndis_version
= 0x00060001;
510 ndis_version
= 0x0006001e;
512 init_packet
->hdr
.msg_type
= NVSP_MSG1_TYPE_SEND_NDIS_VER
;
513 init_packet
->msg
.v1_msg
.
514 send_ndis_ver
.ndis_major_ver
=
515 (ndis_version
& 0xFFFF0000) >> 16;
516 init_packet
->msg
.v1_msg
.
517 send_ndis_ver
.ndis_minor_ver
=
518 ndis_version
& 0xFFFF;
520 /* Send the init request */
521 ret
= vmbus_sendpacket(device
->channel
, init_packet
,
522 sizeof(struct nvsp_message
),
523 (unsigned long)init_packet
,
524 VM_PKT_DATA_INBAND
, 0);
528 /* Post the big receive buffer to NetVSP */
529 if (net_device
->nvsp_version
<= NVSP_PROTOCOL_VERSION_2
)
530 net_device
->recv_buf_size
= NETVSC_RECEIVE_BUFFER_SIZE_LEGACY
;
532 net_device
->recv_buf_size
= NETVSC_RECEIVE_BUFFER_SIZE
;
533 net_device
->send_buf_size
= NETVSC_SEND_BUFFER_SIZE
;
535 ret
= netvsc_init_buf(device
, net_device
);
541 static void netvsc_disconnect_vsp(struct hv_device
*device
)
543 netvsc_destroy_buf(device
);
547 * netvsc_device_remove - Callback when the root bus device is removed
549 void netvsc_device_remove(struct hv_device
*device
)
551 struct net_device
*ndev
= hv_get_drvdata(device
);
552 struct net_device_context
*net_device_ctx
= netdev_priv(ndev
);
553 struct netvsc_device
*net_device
= net_device_ctx
->nvdev
;
556 netvsc_disconnect_vsp(device
);
558 RCU_INIT_POINTER(net_device_ctx
->nvdev
, NULL
);
561 * At this point, no one should be accessing net_device
564 netdev_dbg(ndev
, "net device safe to remove\n");
566 /* Now, we can close the channel safely */
567 vmbus_close(device
->channel
);
569 /* And dissassociate NAPI context from device */
570 for (i
= 0; i
< net_device
->num_chn
; i
++)
571 netif_napi_del(&net_device
->chan_table
[i
].napi
);
573 /* Release all resources */
574 free_netvsc_device_rcu(net_device
);
577 #define RING_AVAIL_PERCENT_HIWATER 20
578 #define RING_AVAIL_PERCENT_LOWATER 10
581 * Get the percentage of available bytes to write in the ring.
582 * The return value is in range from 0 to 100.
584 static inline u32
hv_ringbuf_avail_percent(
585 struct hv_ring_buffer_info
*ring_info
)
587 u32 avail_read
, avail_write
;
589 hv_get_ringbuffer_availbytes(ring_info
, &avail_read
, &avail_write
);
591 return avail_write
* 100 / ring_info
->ring_datasize
;
594 static inline void netvsc_free_send_slot(struct netvsc_device
*net_device
,
597 sync_change_bit(index
, net_device
->send_section_map
);
600 static void netvsc_send_tx_complete(struct netvsc_device
*net_device
,
601 struct vmbus_channel
*incoming_channel
,
602 struct hv_device
*device
,
603 const struct vmpacket_descriptor
*desc
,
606 struct sk_buff
*skb
= (struct sk_buff
*)(unsigned long)desc
->trans_id
;
607 struct net_device
*ndev
= hv_get_drvdata(device
);
608 struct vmbus_channel
*channel
= device
->channel
;
612 /* Notify the layer above us */
614 const struct hv_netvsc_packet
*packet
615 = (struct hv_netvsc_packet
*)skb
->cb
;
616 u32 send_index
= packet
->send_buf_index
;
617 struct netvsc_stats
*tx_stats
;
619 if (send_index
!= NETVSC_INVALID_INDEX
)
620 netvsc_free_send_slot(net_device
, send_index
);
621 q_idx
= packet
->q_idx
;
622 channel
= incoming_channel
;
624 tx_stats
= &net_device
->chan_table
[q_idx
].tx_stats
;
626 u64_stats_update_begin(&tx_stats
->syncp
);
627 tx_stats
->packets
+= packet
->total_packets
;
628 tx_stats
->bytes
+= packet
->total_bytes
;
629 u64_stats_update_end(&tx_stats
->syncp
);
631 napi_consume_skb(skb
, budget
);
635 atomic_dec_return(&net_device
->chan_table
[q_idx
].queue_sends
);
637 if (net_device
->destroy
&& queue_sends
== 0)
638 wake_up(&net_device
->wait_drain
);
640 if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev
, q_idx
)) &&
641 (hv_ringbuf_avail_percent(&channel
->outbound
) > RING_AVAIL_PERCENT_HIWATER
||
643 netif_tx_wake_queue(netdev_get_tx_queue(ndev
, q_idx
));
646 static void netvsc_send_completion(struct netvsc_device
*net_device
,
647 struct vmbus_channel
*incoming_channel
,
648 struct hv_device
*device
,
649 const struct vmpacket_descriptor
*desc
,
652 struct nvsp_message
*nvsp_packet
= hv_pkt_data(desc
);
653 struct net_device
*ndev
= hv_get_drvdata(device
);
655 switch (nvsp_packet
->hdr
.msg_type
) {
656 case NVSP_MSG_TYPE_INIT_COMPLETE
:
657 case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE
:
658 case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE
:
659 case NVSP_MSG5_TYPE_SUBCHANNEL
:
660 /* Copy the response back */
661 memcpy(&net_device
->channel_init_pkt
, nvsp_packet
,
662 sizeof(struct nvsp_message
));
663 complete(&net_device
->channel_init_wait
);
666 case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE
:
667 netvsc_send_tx_complete(net_device
, incoming_channel
,
668 device
, desc
, budget
);
673 "Unknown send completion type %d received!!\n",
674 nvsp_packet
->hdr
.msg_type
);
678 static u32
netvsc_get_next_send_section(struct netvsc_device
*net_device
)
680 unsigned long *map_addr
= net_device
->send_section_map
;
683 for_each_clear_bit(i
, map_addr
, net_device
->send_section_cnt
) {
684 if (sync_test_and_set_bit(i
, map_addr
) == 0)
688 return NETVSC_INVALID_INDEX
;
691 static u32
netvsc_copy_to_send_buf(struct netvsc_device
*net_device
,
692 unsigned int section_index
,
694 struct hv_netvsc_packet
*packet
,
695 struct rndis_message
*rndis_msg
,
696 struct hv_page_buffer
**pb
,
699 char *start
= net_device
->send_buf
;
700 char *dest
= start
+ (section_index
* net_device
->send_section_size
)
705 u32 remain
= packet
->total_data_buflen
% net_device
->pkt_align
;
706 u32 page_count
= packet
->cp_partial
? packet
->rmsg_pgcnt
:
707 packet
->page_buf_cnt
;
710 if (skb
->xmit_more
&& remain
&& !packet
->cp_partial
) {
711 padding
= net_device
->pkt_align
- remain
;
712 rndis_msg
->msg_len
+= padding
;
713 packet
->total_data_buflen
+= padding
;
716 for (i
= 0; i
< page_count
; i
++) {
717 char *src
= phys_to_virt((*pb
)[i
].pfn
<< PAGE_SHIFT
);
718 u32 offset
= (*pb
)[i
].offset
;
719 u32 len
= (*pb
)[i
].len
;
721 memcpy(dest
, (src
+ offset
), len
);
727 memset(dest
, 0, padding
);
734 static inline int netvsc_send_pkt(
735 struct hv_device
*device
,
736 struct hv_netvsc_packet
*packet
,
737 struct netvsc_device
*net_device
,
738 struct hv_page_buffer
**pb
,
741 struct nvsp_message nvmsg
;
742 struct netvsc_channel
*nvchan
743 = &net_device
->chan_table
[packet
->q_idx
];
744 struct vmbus_channel
*out_channel
= nvchan
->channel
;
745 struct net_device
*ndev
= hv_get_drvdata(device
);
746 struct netdev_queue
*txq
= netdev_get_tx_queue(ndev
, packet
->q_idx
);
749 struct hv_page_buffer
*pgbuf
;
750 u32 ring_avail
= hv_ringbuf_avail_percent(&out_channel
->outbound
);
752 nvmsg
.hdr
.msg_type
= NVSP_MSG1_TYPE_SEND_RNDIS_PKT
;
755 nvmsg
.msg
.v1_msg
.send_rndis_pkt
.channel_type
= 0;
757 /* 1 is RMC_CONTROL; */
758 nvmsg
.msg
.v1_msg
.send_rndis_pkt
.channel_type
= 1;
761 nvmsg
.msg
.v1_msg
.send_rndis_pkt
.send_buf_section_index
=
762 packet
->send_buf_index
;
763 if (packet
->send_buf_index
== NETVSC_INVALID_INDEX
)
764 nvmsg
.msg
.v1_msg
.send_rndis_pkt
.send_buf_section_size
= 0;
766 nvmsg
.msg
.v1_msg
.send_rndis_pkt
.send_buf_section_size
=
767 packet
->total_data_buflen
;
771 if (out_channel
->rescind
)
774 if (packet
->page_buf_cnt
) {
775 pgbuf
= packet
->cp_partial
? (*pb
) +
776 packet
->rmsg_pgcnt
: (*pb
);
777 ret
= vmbus_sendpacket_pagebuffer_ctl(out_channel
,
779 packet
->page_buf_cnt
,
781 sizeof(struct nvsp_message
),
783 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
);
785 ret
= vmbus_sendpacket_ctl(out_channel
, &nvmsg
,
786 sizeof(struct nvsp_message
),
789 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
);
793 atomic_inc_return(&nvchan
->queue_sends
);
795 if (ring_avail
< RING_AVAIL_PERCENT_LOWATER
)
796 netif_tx_stop_queue(txq
);
797 } else if (ret
== -EAGAIN
) {
798 netif_tx_stop_queue(txq
);
799 if (atomic_read(&nvchan
->queue_sends
) < 1) {
800 netif_tx_wake_queue(txq
);
804 netdev_err(ndev
, "Unable to send packet %p ret %d\n",
811 /* Move packet out of multi send data (msd), and clear msd */
812 static inline void move_pkt_msd(struct hv_netvsc_packet
**msd_send
,
813 struct sk_buff
**msd_skb
,
814 struct multi_send_data
*msdp
)
816 *msd_skb
= msdp
->skb
;
817 *msd_send
= msdp
->pkt
;
823 int netvsc_send(struct hv_device
*device
,
824 struct hv_netvsc_packet
*packet
,
825 struct rndis_message
*rndis_msg
,
826 struct hv_page_buffer
**pb
,
829 struct netvsc_device
*net_device
= hv_device_to_netvsc_device(device
);
831 struct netvsc_channel
*nvchan
;
832 u32 pktlen
= packet
->total_data_buflen
, msd_len
= 0;
833 unsigned int section_index
= NETVSC_INVALID_INDEX
;
834 struct multi_send_data
*msdp
;
835 struct hv_netvsc_packet
*msd_send
= NULL
, *cur_send
= NULL
;
836 struct sk_buff
*msd_skb
= NULL
;
838 bool xmit_more
= (skb
!= NULL
) ? skb
->xmit_more
: false;
840 /* If device is rescinded, return error and packet will get dropped. */
841 if (unlikely(net_device
->destroy
))
844 /* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get
845 * here before the negotiation with the host is finished and
846 * send_section_map may not be allocated yet.
848 if (unlikely(!net_device
->send_section_map
))
851 nvchan
= &net_device
->chan_table
[packet
->q_idx
];
852 packet
->send_buf_index
= NETVSC_INVALID_INDEX
;
853 packet
->cp_partial
= false;
855 /* Send control message directly without accessing msd (Multi-Send
856 * Data) field which may be changed during data packet processing.
863 /* batch packets in send buffer if possible */
866 msd_len
= msdp
->pkt
->total_data_buflen
;
868 try_batch
= msd_len
> 0 && msdp
->count
< net_device
->max_pkt
;
869 if (try_batch
&& msd_len
+ pktlen
+ net_device
->pkt_align
<
870 net_device
->send_section_size
) {
871 section_index
= msdp
->pkt
->send_buf_index
;
873 } else if (try_batch
&& msd_len
+ packet
->rmsg_size
<
874 net_device
->send_section_size
) {
875 section_index
= msdp
->pkt
->send_buf_index
;
876 packet
->cp_partial
= true;
878 } else if (pktlen
+ net_device
->pkt_align
<
879 net_device
->send_section_size
) {
880 section_index
= netvsc_get_next_send_section(net_device
);
881 if (section_index
!= NETVSC_INVALID_INDEX
) {
882 move_pkt_msd(&msd_send
, &msd_skb
, msdp
);
887 if (section_index
!= NETVSC_INVALID_INDEX
) {
888 netvsc_copy_to_send_buf(net_device
,
889 section_index
, msd_len
,
890 packet
, rndis_msg
, pb
, skb
);
892 packet
->send_buf_index
= section_index
;
894 if (packet
->cp_partial
) {
895 packet
->page_buf_cnt
-= packet
->rmsg_pgcnt
;
896 packet
->total_data_buflen
= msd_len
+ packet
->rmsg_size
;
898 packet
->page_buf_cnt
= 0;
899 packet
->total_data_buflen
+= msd_len
;
903 packet
->total_packets
+= msdp
->pkt
->total_packets
;
904 packet
->total_bytes
+= msdp
->pkt
->total_bytes
;
908 dev_consume_skb_any(msdp
->skb
);
910 if (xmit_more
&& !packet
->cp_partial
) {
921 move_pkt_msd(&msd_send
, &msd_skb
, msdp
);
926 int m_ret
= netvsc_send_pkt(device
, msd_send
, net_device
,
930 netvsc_free_send_slot(net_device
,
931 msd_send
->send_buf_index
);
932 dev_kfree_skb_any(msd_skb
);
938 ret
= netvsc_send_pkt(device
, cur_send
, net_device
, pb
, skb
);
940 if (ret
!= 0 && section_index
!= NETVSC_INVALID_INDEX
)
941 netvsc_free_send_slot(net_device
, section_index
);
946 static int netvsc_send_recv_completion(struct vmbus_channel
*channel
,
947 u64 transaction_id
, u32 status
)
949 struct nvsp_message recvcompMessage
;
952 recvcompMessage
.hdr
.msg_type
=
953 NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE
;
955 recvcompMessage
.msg
.v1_msg
.send_rndis_pkt_complete
.status
= status
;
957 /* Send the completion */
958 ret
= vmbus_sendpacket(channel
, &recvcompMessage
,
959 sizeof(struct nvsp_message_header
) + sizeof(u32
),
960 transaction_id
, VM_PKT_COMP
, 0);
965 static inline void count_recv_comp_slot(struct netvsc_device
*nvdev
, u16 q_idx
,
966 u32
*filled
, u32
*avail
)
968 struct multi_recv_comp
*mrc
= &nvdev
->chan_table
[q_idx
].mrc
;
969 u32 first
= mrc
->first
;
970 u32 next
= mrc
->next
;
972 *filled
= (first
> next
) ? NETVSC_RECVSLOT_MAX
- first
+ next
:
975 *avail
= NETVSC_RECVSLOT_MAX
- *filled
- 1;
978 /* Read the first filled slot, no change to index */
979 static inline struct recv_comp_data
*read_recv_comp_slot(struct netvsc_device
982 struct multi_recv_comp
*mrc
= &nvdev
->chan_table
[q_idx
].mrc
;
985 if (unlikely(!mrc
->buf
))
988 count_recv_comp_slot(nvdev
, q_idx
, &filled
, &avail
);
992 return mrc
->buf
+ mrc
->first
* sizeof(struct recv_comp_data
);
995 /* Put the first filled slot back to available pool */
996 static inline void put_recv_comp_slot(struct netvsc_device
*nvdev
, u16 q_idx
)
998 struct multi_recv_comp
*mrc
= &nvdev
->chan_table
[q_idx
].mrc
;
1001 mrc
->first
= (mrc
->first
+ 1) % NETVSC_RECVSLOT_MAX
;
1003 num_recv
= atomic_dec_return(&nvdev
->num_outstanding_recvs
);
1005 if (nvdev
->destroy
&& num_recv
== 0)
1006 wake_up(&nvdev
->wait_drain
);
1009 /* Check and send pending recv completions */
1010 static void netvsc_chk_recv_comp(struct netvsc_device
*nvdev
,
1011 struct vmbus_channel
*channel
, u16 q_idx
)
1013 struct recv_comp_data
*rcd
;
1017 rcd
= read_recv_comp_slot(nvdev
, q_idx
);
1021 ret
= netvsc_send_recv_completion(channel
, rcd
->tid
,
1026 put_recv_comp_slot(nvdev
, q_idx
);
1030 #define NETVSC_RCD_WATERMARK 80
1032 /* Get next available slot */
1033 static inline struct recv_comp_data
*get_recv_comp_slot(
1034 struct netvsc_device
*nvdev
, struct vmbus_channel
*channel
, u16 q_idx
)
1036 struct multi_recv_comp
*mrc
= &nvdev
->chan_table
[q_idx
].mrc
;
1037 u32 filled
, avail
, next
;
1038 struct recv_comp_data
*rcd
;
1040 if (unlikely(!nvdev
->recv_section
))
1043 if (unlikely(!mrc
->buf
))
1046 if (atomic_read(&nvdev
->num_outstanding_recvs
) >
1047 nvdev
->recv_section
->num_sub_allocs
* NETVSC_RCD_WATERMARK
/ 100)
1048 netvsc_chk_recv_comp(nvdev
, channel
, q_idx
);
1050 count_recv_comp_slot(nvdev
, q_idx
, &filled
, &avail
);
1055 rcd
= mrc
->buf
+ next
* sizeof(struct recv_comp_data
);
1056 mrc
->next
= (next
+ 1) % NETVSC_RECVSLOT_MAX
;
1058 atomic_inc(&nvdev
->num_outstanding_recvs
);
1063 static int netvsc_receive(struct net_device
*ndev
,
1064 struct netvsc_device
*net_device
,
1065 struct net_device_context
*net_device_ctx
,
1066 struct hv_device
*device
,
1067 struct vmbus_channel
*channel
,
1068 const struct vmpacket_descriptor
*desc
,
1069 struct nvsp_message
*nvsp
)
1071 const struct vmtransfer_page_packet_header
*vmxferpage_packet
1072 = container_of(desc
, const struct vmtransfer_page_packet_header
, d
);
1073 u16 q_idx
= channel
->offermsg
.offer
.sub_channel_index
;
1074 char *recv_buf
= net_device
->recv_buf
;
1075 u32 status
= NVSP_STAT_SUCCESS
;
1080 /* Make sure this is a valid nvsp packet */
1081 if (unlikely(nvsp
->hdr
.msg_type
!= NVSP_MSG1_TYPE_SEND_RNDIS_PKT
)) {
1082 netif_err(net_device_ctx
, rx_err
, ndev
,
1083 "Unknown nvsp packet type received %u\n",
1084 nvsp
->hdr
.msg_type
);
1088 if (unlikely(vmxferpage_packet
->xfer_pageset_id
!= NETVSC_RECEIVE_BUFFER_ID
)) {
1089 netif_err(net_device_ctx
, rx_err
, ndev
,
1090 "Invalid xfer page set id - expecting %x got %x\n",
1091 NETVSC_RECEIVE_BUFFER_ID
,
1092 vmxferpage_packet
->xfer_pageset_id
);
1096 count
= vmxferpage_packet
->range_cnt
;
1098 /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
1099 for (i
= 0; i
< count
; i
++) {
1100 void *data
= recv_buf
1101 + vmxferpage_packet
->ranges
[i
].byte_offset
;
1102 u32 buflen
= vmxferpage_packet
->ranges
[i
].byte_count
;
1104 /* Pass it to the upper layer */
1105 status
= rndis_filter_receive(ndev
, net_device
, device
,
1106 channel
, data
, buflen
);
1109 if (net_device
->chan_table
[q_idx
].mrc
.buf
) {
1110 struct recv_comp_data
*rcd
;
1112 rcd
= get_recv_comp_slot(net_device
, channel
, q_idx
);
1114 rcd
->tid
= vmxferpage_packet
->d
.trans_id
;
1115 rcd
->status
= status
;
1117 netdev_err(ndev
, "Recv_comp full buf q:%hd, tid:%llx\n",
1118 q_idx
, vmxferpage_packet
->d
.trans_id
);
1121 ret
= netvsc_send_recv_completion(channel
,
1122 vmxferpage_packet
->d
.trans_id
,
1125 netdev_err(ndev
, "Recv_comp q:%hd, tid:%llx, err:%d\n",
1126 q_idx
, vmxferpage_packet
->d
.trans_id
, ret
);
1131 static void netvsc_send_table(struct hv_device
*hdev
,
1132 struct nvsp_message
*nvmsg
)
1134 struct net_device
*ndev
= hv_get_drvdata(hdev
);
1135 struct net_device_context
*net_device_ctx
= netdev_priv(ndev
);
1139 count
= nvmsg
->msg
.v5_msg
.send_table
.count
;
1140 if (count
!= VRSS_SEND_TAB_SIZE
) {
1141 netdev_err(ndev
, "Received wrong send-table size:%u\n", count
);
1145 tab
= (u32
*)((unsigned long)&nvmsg
->msg
.v5_msg
.send_table
+
1146 nvmsg
->msg
.v5_msg
.send_table
.offset
);
1148 for (i
= 0; i
< count
; i
++)
1149 net_device_ctx
->tx_send_table
[i
] = tab
[i
];
1152 static void netvsc_send_vf(struct net_device_context
*net_device_ctx
,
1153 struct nvsp_message
*nvmsg
)
1155 net_device_ctx
->vf_alloc
= nvmsg
->msg
.v4_msg
.vf_assoc
.allocated
;
1156 net_device_ctx
->vf_serial
= nvmsg
->msg
.v4_msg
.vf_assoc
.serial
;
1159 static inline void netvsc_receive_inband(struct hv_device
*hdev
,
1160 struct net_device_context
*net_device_ctx
,
1161 struct nvsp_message
*nvmsg
)
1163 switch (nvmsg
->hdr
.msg_type
) {
1164 case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE
:
1165 netvsc_send_table(hdev
, nvmsg
);
1168 case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION
:
1169 netvsc_send_vf(net_device_ctx
, nvmsg
);
1174 static int netvsc_process_raw_pkt(struct hv_device
*device
,
1175 struct vmbus_channel
*channel
,
1176 struct netvsc_device
*net_device
,
1177 struct net_device
*ndev
,
1178 const struct vmpacket_descriptor
*desc
,
1181 struct net_device_context
*net_device_ctx
= netdev_priv(ndev
);
1182 struct nvsp_message
*nvmsg
= hv_pkt_data(desc
);
1184 switch (desc
->type
) {
1186 netvsc_send_completion(net_device
, channel
, device
,
1190 case VM_PKT_DATA_USING_XFER_PAGES
:
1191 return netvsc_receive(ndev
, net_device
, net_device_ctx
,
1192 device
, channel
, desc
, nvmsg
);
1195 case VM_PKT_DATA_INBAND
:
1196 netvsc_receive_inband(device
, net_device_ctx
, nvmsg
);
1200 netdev_err(ndev
, "unhandled packet type %d, tid %llx\n",
1201 desc
->type
, desc
->trans_id
);
1208 static struct hv_device
*netvsc_channel_to_device(struct vmbus_channel
*channel
)
1210 struct vmbus_channel
*primary
= channel
->primary_channel
;
1212 return primary
? primary
->device_obj
: channel
->device_obj
;
1215 /* Network processing softirq
1216 * Process data in incoming ring buffer from host
1217 * Stops when ring is empty or budget is met or exceeded.
1219 int netvsc_poll(struct napi_struct
*napi
, int budget
)
1221 struct netvsc_channel
*nvchan
1222 = container_of(napi
, struct netvsc_channel
, napi
);
1223 struct vmbus_channel
*channel
= nvchan
->channel
;
1224 struct hv_device
*device
= netvsc_channel_to_device(channel
);
1225 u16 q_idx
= channel
->offermsg
.offer
.sub_channel_index
;
1226 struct net_device
*ndev
= hv_get_drvdata(device
);
1227 struct netvsc_device
*net_device
= net_device_to_netvsc_device(ndev
);
1230 /* If starting a new interval */
1232 nvchan
->desc
= hv_pkt_iter_first(channel
);
1234 while (nvchan
->desc
&& work_done
< budget
) {
1235 work_done
+= netvsc_process_raw_pkt(device
, channel
, net_device
,
1236 ndev
, nvchan
->desc
, budget
);
1237 nvchan
->desc
= hv_pkt_iter_next(channel
, nvchan
->desc
);
1240 /* If receive ring was exhausted
1241 * and not doing busy poll
1242 * then re-enable host interrupts
1243 * and reschedule if ring is not empty.
1245 if (work_done
< budget
&&
1246 napi_complete_done(napi
, work_done
) &&
1247 hv_end_read(&channel
->inbound
) != 0)
1248 napi_reschedule(napi
);
1250 netvsc_chk_recv_comp(net_device
, channel
, q_idx
);
1252 /* Driver may overshoot since multiple packets per descriptor */
1253 return min(work_done
, budget
);
1256 /* Call back when data is available in host ring buffer.
1257 * Processing is deferred until network softirq (NAPI)
1259 void netvsc_channel_cb(void *context
)
1261 struct netvsc_channel
*nvchan
= context
;
1263 if (napi_schedule_prep(&nvchan
->napi
)) {
1264 /* disable interupts from host */
1265 hv_begin_read(&nvchan
->channel
->inbound
);
1267 __napi_schedule(&nvchan
->napi
);
1272 * netvsc_device_add - Callback when the device belonging to this
1275 int netvsc_device_add(struct hv_device
*device
,
1276 const struct netvsc_device_info
*device_info
)
1279 int ring_size
= device_info
->ring_size
;
1280 struct netvsc_device
*net_device
;
1281 struct net_device
*ndev
= hv_get_drvdata(device
);
1282 struct net_device_context
*net_device_ctx
= netdev_priv(ndev
);
1284 net_device
= alloc_net_device();
1288 net_device
->ring_size
= ring_size
;
1290 /* Because the device uses NAPI, all the interrupt batching and
1291 * control is done via Net softirq, not the channel handling
1293 set_channel_read_mode(device
->channel
, HV_CALL_ISR
);
1295 /* If we're reopening the device we may have multiple queues, fill the
1296 * chn_table with the default channel to use it before subchannels are
1298 * Initialize the channel state before we open;
1299 * we can be interrupted as soon as we open the channel.
1302 for (i
= 0; i
< VRSS_CHANNEL_MAX
; i
++) {
1303 struct netvsc_channel
*nvchan
= &net_device
->chan_table
[i
];
1305 nvchan
->channel
= device
->channel
;
1306 u64_stats_init(&nvchan
->tx_stats
.syncp
);
1307 u64_stats_init(&nvchan
->rx_stats
.syncp
);
1310 /* Enable NAPI handler before init callbacks */
1311 netif_napi_add(ndev
, &net_device
->chan_table
[0].napi
,
1312 netvsc_poll
, NAPI_POLL_WEIGHT
);
1314 /* Open the channel */
1315 ret
= vmbus_open(device
->channel
, ring_size
* PAGE_SIZE
,
1316 ring_size
* PAGE_SIZE
, NULL
, 0,
1318 net_device
->chan_table
);
1321 netif_napi_del(&net_device
->chan_table
[0].napi
);
1322 netdev_err(ndev
, "unable to open channel: %d\n", ret
);
1326 /* Channel is opened */
1327 netdev_dbg(ndev
, "hv_netvsc channel opened successfully\n");
1329 napi_enable(&net_device
->chan_table
[0].napi
);
1331 /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
1334 rcu_assign_pointer(net_device_ctx
->nvdev
, net_device
);
1336 /* Connect with the NetVsp */
1337 ret
= netvsc_connect_vsp(device
, net_device
);
1340 "unable to connect to NetVSP - %d\n", ret
);
1347 netif_napi_del(&net_device
->chan_table
[0].napi
);
1349 /* Now, we can close the channel safely */
1350 vmbus_close(device
->channel
);
1353 free_netvsc_device(&net_device
->rcu
);