2 * Copyright (c) 2009, Microsoft Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, see <http://www.gnu.org/licenses/>.
17 * Haiyang Zhang <haiyangz@microsoft.com>
18 * Hank Janssen <hjanssen@microsoft.com>
20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22 #include <linux/kernel.h>
23 #include <linux/sched.h>
24 #include <linux/wait.h>
26 #include <linux/delay.h>
28 #include <linux/slab.h>
29 #include <linux/netdevice.h>
30 #include <linux/if_ether.h>
31 #include <linux/vmalloc.h>
32 #include <asm/sync_bitops.h>
34 #include "hyperv_net.h"
37 * Switch the data path from the synthetic interface to the VF
40 void netvsc_switch_datapath(struct net_device
*ndev
, bool vf
)
42 struct net_device_context
*net_device_ctx
= netdev_priv(ndev
);
43 struct hv_device
*dev
= net_device_ctx
->device_ctx
;
44 struct netvsc_device
*nv_dev
= net_device_ctx
->nvdev
;
45 struct nvsp_message
*init_pkt
= &nv_dev
->channel_init_pkt
;
47 memset(init_pkt
, 0, sizeof(struct nvsp_message
));
48 init_pkt
->hdr
.msg_type
= NVSP_MSG4_TYPE_SWITCH_DATA_PATH
;
50 init_pkt
->msg
.v4_msg
.active_dp
.active_datapath
=
53 init_pkt
->msg
.v4_msg
.active_dp
.active_datapath
=
54 NVSP_DATAPATH_SYNTHETIC
;
56 vmbus_sendpacket(dev
->channel
, init_pkt
,
57 sizeof(struct nvsp_message
),
58 (unsigned long)init_pkt
,
59 VM_PKT_DATA_INBAND
, 0);
62 static struct netvsc_device
*alloc_net_device(void)
64 struct netvsc_device
*net_device
;
66 net_device
= kzalloc(sizeof(struct netvsc_device
), GFP_KERNEL
);
70 net_device
->cb_buffer
= kzalloc(NETVSC_PACKET_SIZE
, GFP_KERNEL
);
71 if (!net_device
->cb_buffer
) {
76 net_device
->mrc
[0].buf
= vzalloc(NETVSC_RECVSLOT_MAX
*
77 sizeof(struct recv_comp_data
));
79 init_waitqueue_head(&net_device
->wait_drain
);
80 net_device
->destroy
= false;
81 atomic_set(&net_device
->open_cnt
, 0);
82 net_device
->max_pkt
= RNDIS_MAX_PKT_DEFAULT
;
83 net_device
->pkt_align
= RNDIS_PKT_ALIGN_DEFAULT
;
84 init_completion(&net_device
->channel_init_wait
);
89 static void free_netvsc_device(struct netvsc_device
*nvdev
)
93 for (i
= 0; i
< VRSS_CHANNEL_MAX
; i
++)
94 vfree(nvdev
->mrc
[i
].buf
);
96 kfree(nvdev
->cb_buffer
);
100 static struct netvsc_device
*get_outbound_net_device(struct hv_device
*device
)
102 struct netvsc_device
*net_device
= hv_device_to_netvsc_device(device
);
104 if (net_device
&& net_device
->destroy
)
110 static struct netvsc_device
*get_inbound_net_device(struct hv_device
*device
)
112 struct netvsc_device
*net_device
= hv_device_to_netvsc_device(device
);
117 if (net_device
->destroy
&&
118 atomic_read(&net_device
->num_outstanding_sends
) == 0 &&
119 atomic_read(&net_device
->num_outstanding_recvs
) == 0)
126 static void netvsc_destroy_buf(struct hv_device
*device
)
128 struct nvsp_message
*revoke_packet
;
129 struct net_device
*ndev
= hv_get_drvdata(device
);
130 struct netvsc_device
*net_device
= net_device_to_netvsc_device(ndev
);
134 * If we got a section count, it means we received a
135 * SendReceiveBufferComplete msg (ie sent
136 * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
137 * to send a revoke msg here
139 if (net_device
->recv_section_cnt
) {
140 /* Send the revoke receive buffer */
141 revoke_packet
= &net_device
->revoke_packet
;
142 memset(revoke_packet
, 0, sizeof(struct nvsp_message
));
144 revoke_packet
->hdr
.msg_type
=
145 NVSP_MSG1_TYPE_REVOKE_RECV_BUF
;
146 revoke_packet
->msg
.v1_msg
.
147 revoke_recv_buf
.id
= NETVSC_RECEIVE_BUFFER_ID
;
149 ret
= vmbus_sendpacket(device
->channel
,
151 sizeof(struct nvsp_message
),
152 (unsigned long)revoke_packet
,
153 VM_PKT_DATA_INBAND
, 0);
155 * If we failed here, we might as well return and
156 * have a leak rather than continue and a bugchk
159 netdev_err(ndev
, "unable to send "
160 "revoke receive buffer to netvsp\n");
165 /* Teardown the gpadl on the vsp end */
166 if (net_device
->recv_buf_gpadl_handle
) {
167 ret
= vmbus_teardown_gpadl(device
->channel
,
168 net_device
->recv_buf_gpadl_handle
);
170 /* If we failed here, we might as well return and have a leak
171 * rather than continue and a bugchk
175 "unable to teardown receive buffer's gpadl\n");
178 net_device
->recv_buf_gpadl_handle
= 0;
181 if (net_device
->recv_buf
) {
182 /* Free up the receive buffer */
183 vfree(net_device
->recv_buf
);
184 net_device
->recv_buf
= NULL
;
187 if (net_device
->recv_section
) {
188 net_device
->recv_section_cnt
= 0;
189 kfree(net_device
->recv_section
);
190 net_device
->recv_section
= NULL
;
193 /* Deal with the send buffer we may have setup.
194 * If we got a send section size, it means we received a
195 * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
196 * NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need
197 * to send a revoke msg here
199 if (net_device
->send_section_size
) {
200 /* Send the revoke receive buffer */
201 revoke_packet
= &net_device
->revoke_packet
;
202 memset(revoke_packet
, 0, sizeof(struct nvsp_message
));
204 revoke_packet
->hdr
.msg_type
=
205 NVSP_MSG1_TYPE_REVOKE_SEND_BUF
;
206 revoke_packet
->msg
.v1_msg
.revoke_send_buf
.id
=
207 NETVSC_SEND_BUFFER_ID
;
209 ret
= vmbus_sendpacket(device
->channel
,
211 sizeof(struct nvsp_message
),
212 (unsigned long)revoke_packet
,
213 VM_PKT_DATA_INBAND
, 0);
214 /* If we failed here, we might as well return and
215 * have a leak rather than continue and a bugchk
218 netdev_err(ndev
, "unable to send "
219 "revoke send buffer to netvsp\n");
223 /* Teardown the gpadl on the vsp end */
224 if (net_device
->send_buf_gpadl_handle
) {
225 ret
= vmbus_teardown_gpadl(device
->channel
,
226 net_device
->send_buf_gpadl_handle
);
228 /* If we failed here, we might as well return and have a leak
229 * rather than continue and a bugchk
233 "unable to teardown send buffer's gpadl\n");
236 net_device
->send_buf_gpadl_handle
= 0;
238 if (net_device
->send_buf
) {
239 /* Free up the send buffer */
240 vfree(net_device
->send_buf
);
241 net_device
->send_buf
= NULL
;
243 kfree(net_device
->send_section_map
);
246 static int netvsc_init_buf(struct hv_device
*device
)
249 struct netvsc_device
*net_device
;
250 struct nvsp_message
*init_packet
;
251 struct net_device
*ndev
;
254 net_device
= get_outbound_net_device(device
);
257 ndev
= hv_get_drvdata(device
);
259 node
= cpu_to_node(device
->channel
->target_cpu
);
260 net_device
->recv_buf
= vzalloc_node(net_device
->recv_buf_size
, node
);
261 if (!net_device
->recv_buf
)
262 net_device
->recv_buf
= vzalloc(net_device
->recv_buf_size
);
264 if (!net_device
->recv_buf
) {
265 netdev_err(ndev
, "unable to allocate receive "
266 "buffer of size %d\n", net_device
->recv_buf_size
);
272 * Establish the gpadl handle for this buffer on this
273 * channel. Note: This call uses the vmbus connection rather
274 * than the channel to establish the gpadl handle.
276 ret
= vmbus_establish_gpadl(device
->channel
, net_device
->recv_buf
,
277 net_device
->recv_buf_size
,
278 &net_device
->recv_buf_gpadl_handle
);
281 "unable to establish receive buffer's gpadl\n");
285 /* Notify the NetVsp of the gpadl handle */
286 init_packet
= &net_device
->channel_init_pkt
;
288 memset(init_packet
, 0, sizeof(struct nvsp_message
));
290 init_packet
->hdr
.msg_type
= NVSP_MSG1_TYPE_SEND_RECV_BUF
;
291 init_packet
->msg
.v1_msg
.send_recv_buf
.
292 gpadl_handle
= net_device
->recv_buf_gpadl_handle
;
293 init_packet
->msg
.v1_msg
.
294 send_recv_buf
.id
= NETVSC_RECEIVE_BUFFER_ID
;
296 /* Send the gpadl notification request */
297 ret
= vmbus_sendpacket(device
->channel
, init_packet
,
298 sizeof(struct nvsp_message
),
299 (unsigned long)init_packet
,
301 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
);
304 "unable to send receive buffer's gpadl to netvsp\n");
308 wait_for_completion(&net_device
->channel_init_wait
);
310 /* Check the response */
311 if (init_packet
->msg
.v1_msg
.
312 send_recv_buf_complete
.status
!= NVSP_STAT_SUCCESS
) {
313 netdev_err(ndev
, "Unable to complete receive buffer "
314 "initialization with NetVsp - status %d\n",
315 init_packet
->msg
.v1_msg
.
316 send_recv_buf_complete
.status
);
321 /* Parse the response */
323 net_device
->recv_section_cnt
= init_packet
->msg
.
324 v1_msg
.send_recv_buf_complete
.num_sections
;
326 net_device
->recv_section
= kmemdup(
327 init_packet
->msg
.v1_msg
.send_recv_buf_complete
.sections
,
328 net_device
->recv_section_cnt
*
329 sizeof(struct nvsp_1_receive_buffer_section
),
331 if (net_device
->recv_section
== NULL
) {
337 * For 1st release, there should only be 1 section that represents the
338 * entire receive buffer
340 if (net_device
->recv_section_cnt
!= 1 ||
341 net_device
->recv_section
->offset
!= 0) {
346 /* Now setup the send buffer.
348 net_device
->send_buf
= vzalloc_node(net_device
->send_buf_size
, node
);
349 if (!net_device
->send_buf
)
350 net_device
->send_buf
= vzalloc(net_device
->send_buf_size
);
351 if (!net_device
->send_buf
) {
352 netdev_err(ndev
, "unable to allocate send "
353 "buffer of size %d\n", net_device
->send_buf_size
);
358 /* Establish the gpadl handle for this buffer on this
359 * channel. Note: This call uses the vmbus connection rather
360 * than the channel to establish the gpadl handle.
362 ret
= vmbus_establish_gpadl(device
->channel
, net_device
->send_buf
,
363 net_device
->send_buf_size
,
364 &net_device
->send_buf_gpadl_handle
);
367 "unable to establish send buffer's gpadl\n");
371 /* Notify the NetVsp of the gpadl handle */
372 init_packet
= &net_device
->channel_init_pkt
;
373 memset(init_packet
, 0, sizeof(struct nvsp_message
));
374 init_packet
->hdr
.msg_type
= NVSP_MSG1_TYPE_SEND_SEND_BUF
;
375 init_packet
->msg
.v1_msg
.send_send_buf
.gpadl_handle
=
376 net_device
->send_buf_gpadl_handle
;
377 init_packet
->msg
.v1_msg
.send_send_buf
.id
= NETVSC_SEND_BUFFER_ID
;
379 /* Send the gpadl notification request */
380 ret
= vmbus_sendpacket(device
->channel
, init_packet
,
381 sizeof(struct nvsp_message
),
382 (unsigned long)init_packet
,
384 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
);
387 "unable to send send buffer's gpadl to netvsp\n");
391 wait_for_completion(&net_device
->channel_init_wait
);
393 /* Check the response */
394 if (init_packet
->msg
.v1_msg
.
395 send_send_buf_complete
.status
!= NVSP_STAT_SUCCESS
) {
396 netdev_err(ndev
, "Unable to complete send buffer "
397 "initialization with NetVsp - status %d\n",
398 init_packet
->msg
.v1_msg
.
399 send_send_buf_complete
.status
);
404 /* Parse the response */
405 net_device
->send_section_size
= init_packet
->msg
.
406 v1_msg
.send_send_buf_complete
.section_size
;
408 /* Section count is simply the size divided by the section size.
410 net_device
->send_section_cnt
=
411 net_device
->send_buf_size
/ net_device
->send_section_size
;
413 dev_info(&device
->device
, "Send section size: %d, Section count:%d\n",
414 net_device
->send_section_size
, net_device
->send_section_cnt
);
416 /* Setup state for managing the send buffer. */
417 net_device
->map_words
= DIV_ROUND_UP(net_device
->send_section_cnt
,
420 net_device
->send_section_map
= kcalloc(net_device
->map_words
,
421 sizeof(ulong
), GFP_KERNEL
);
422 if (net_device
->send_section_map
== NULL
) {
430 netvsc_destroy_buf(device
);
436 /* Negotiate NVSP protocol version */
437 static int negotiate_nvsp_ver(struct hv_device
*device
,
438 struct netvsc_device
*net_device
,
439 struct nvsp_message
*init_packet
,
442 struct net_device
*ndev
= hv_get_drvdata(device
);
445 memset(init_packet
, 0, sizeof(struct nvsp_message
));
446 init_packet
->hdr
.msg_type
= NVSP_MSG_TYPE_INIT
;
447 init_packet
->msg
.init_msg
.init
.min_protocol_ver
= nvsp_ver
;
448 init_packet
->msg
.init_msg
.init
.max_protocol_ver
= nvsp_ver
;
450 /* Send the init request */
451 ret
= vmbus_sendpacket(device
->channel
, init_packet
,
452 sizeof(struct nvsp_message
),
453 (unsigned long)init_packet
,
455 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
);
460 wait_for_completion(&net_device
->channel_init_wait
);
462 if (init_packet
->msg
.init_msg
.init_complete
.status
!=
466 if (nvsp_ver
== NVSP_PROTOCOL_VERSION_1
)
469 /* NVSPv2 or later: Send NDIS config */
470 memset(init_packet
, 0, sizeof(struct nvsp_message
));
471 init_packet
->hdr
.msg_type
= NVSP_MSG2_TYPE_SEND_NDIS_CONFIG
;
472 init_packet
->msg
.v2_msg
.send_ndis_config
.mtu
= ndev
->mtu
+ ETH_HLEN
;
473 init_packet
->msg
.v2_msg
.send_ndis_config
.capability
.ieee8021q
= 1;
475 if (nvsp_ver
>= NVSP_PROTOCOL_VERSION_5
) {
476 init_packet
->msg
.v2_msg
.send_ndis_config
.capability
.sriov
= 1;
478 /* Teaming bit is needed to receive link speed updates */
479 init_packet
->msg
.v2_msg
.send_ndis_config
.capability
.teaming
= 1;
482 ret
= vmbus_sendpacket(device
->channel
, init_packet
,
483 sizeof(struct nvsp_message
),
484 (unsigned long)init_packet
,
485 VM_PKT_DATA_INBAND
, 0);
490 static int netvsc_connect_vsp(struct hv_device
*device
)
493 struct netvsc_device
*net_device
;
494 struct nvsp_message
*init_packet
;
496 const u32 ver_list
[] = {
497 NVSP_PROTOCOL_VERSION_1
, NVSP_PROTOCOL_VERSION_2
,
498 NVSP_PROTOCOL_VERSION_4
, NVSP_PROTOCOL_VERSION_5
};
501 net_device
= get_outbound_net_device(device
);
505 init_packet
= &net_device
->channel_init_pkt
;
507 /* Negotiate the latest NVSP protocol supported */
508 for (i
= ARRAY_SIZE(ver_list
) - 1; i
>= 0; i
--)
509 if (negotiate_nvsp_ver(device
, net_device
, init_packet
,
511 net_device
->nvsp_version
= ver_list
[i
];
520 pr_debug("Negotiated NVSP version:%x\n", net_device
->nvsp_version
);
522 /* Send the ndis version */
523 memset(init_packet
, 0, sizeof(struct nvsp_message
));
525 if (net_device
->nvsp_version
<= NVSP_PROTOCOL_VERSION_4
)
526 ndis_version
= 0x00060001;
528 ndis_version
= 0x0006001e;
530 init_packet
->hdr
.msg_type
= NVSP_MSG1_TYPE_SEND_NDIS_VER
;
531 init_packet
->msg
.v1_msg
.
532 send_ndis_ver
.ndis_major_ver
=
533 (ndis_version
& 0xFFFF0000) >> 16;
534 init_packet
->msg
.v1_msg
.
535 send_ndis_ver
.ndis_minor_ver
=
536 ndis_version
& 0xFFFF;
538 /* Send the init request */
539 ret
= vmbus_sendpacket(device
->channel
, init_packet
,
540 sizeof(struct nvsp_message
),
541 (unsigned long)init_packet
,
542 VM_PKT_DATA_INBAND
, 0);
546 /* Post the big receive buffer to NetVSP */
547 if (net_device
->nvsp_version
<= NVSP_PROTOCOL_VERSION_2
)
548 net_device
->recv_buf_size
= NETVSC_RECEIVE_BUFFER_SIZE_LEGACY
;
550 net_device
->recv_buf_size
= NETVSC_RECEIVE_BUFFER_SIZE
;
551 net_device
->send_buf_size
= NETVSC_SEND_BUFFER_SIZE
;
553 ret
= netvsc_init_buf(device
);
559 static void netvsc_disconnect_vsp(struct hv_device
*device
)
561 netvsc_destroy_buf(device
);
565 * netvsc_device_remove - Callback when the root bus device is removed
567 void netvsc_device_remove(struct hv_device
*device
)
569 struct net_device
*ndev
= hv_get_drvdata(device
);
570 struct net_device_context
*net_device_ctx
= netdev_priv(ndev
);
571 struct netvsc_device
*net_device
= net_device_ctx
->nvdev
;
573 netvsc_disconnect_vsp(device
);
575 net_device_ctx
->nvdev
= NULL
;
578 * At this point, no one should be accessing net_device
581 dev_notice(&device
->device
, "net device safe to remove\n");
583 /* Now, we can close the channel safely */
584 vmbus_close(device
->channel
);
586 /* Release all resources */
587 vfree(net_device
->sub_cb_buf
);
588 free_netvsc_device(net_device
);
591 #define RING_AVAIL_PERCENT_HIWATER 20
592 #define RING_AVAIL_PERCENT_LOWATER 10
595 * Get the percentage of available bytes to write in the ring.
596 * The return value is in range from 0 to 100.
598 static inline u32
hv_ringbuf_avail_percent(
599 struct hv_ring_buffer_info
*ring_info
)
601 u32 avail_read
, avail_write
;
603 hv_get_ringbuffer_availbytes(ring_info
, &avail_read
, &avail_write
);
605 return avail_write
* 100 / ring_info
->ring_datasize
;
608 static inline void netvsc_free_send_slot(struct netvsc_device
*net_device
,
611 sync_change_bit(index
, net_device
->send_section_map
);
614 static void netvsc_send_tx_complete(struct netvsc_device
*net_device
,
615 struct vmbus_channel
*incoming_channel
,
616 struct hv_device
*device
,
617 struct vmpacket_descriptor
*packet
)
619 struct sk_buff
*skb
= (struct sk_buff
*)(unsigned long)packet
->trans_id
;
620 struct net_device
*ndev
= hv_get_drvdata(device
);
621 struct net_device_context
*net_device_ctx
= netdev_priv(ndev
);
622 struct vmbus_channel
*channel
= device
->channel
;
623 int num_outstanding_sends
;
627 /* Notify the layer above us */
629 struct hv_netvsc_packet
*nvsc_packet
630 = (struct hv_netvsc_packet
*)skb
->cb
;
631 u32 send_index
= nvsc_packet
->send_buf_index
;
633 if (send_index
!= NETVSC_INVALID_INDEX
)
634 netvsc_free_send_slot(net_device
, send_index
);
635 q_idx
= nvsc_packet
->q_idx
;
636 channel
= incoming_channel
;
638 dev_consume_skb_any(skb
);
641 num_outstanding_sends
=
642 atomic_dec_return(&net_device
->num_outstanding_sends
);
643 queue_sends
= atomic_dec_return(&net_device
->queue_sends
[q_idx
]);
645 if (net_device
->destroy
&& num_outstanding_sends
== 0)
646 wake_up(&net_device
->wait_drain
);
648 if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev
, q_idx
)) &&
649 !net_device_ctx
->start_remove
&&
650 (hv_ringbuf_avail_percent(&channel
->outbound
) > RING_AVAIL_PERCENT_HIWATER
||
652 netif_tx_wake_queue(netdev_get_tx_queue(ndev
, q_idx
));
655 static void netvsc_send_completion(struct netvsc_device
*net_device
,
656 struct vmbus_channel
*incoming_channel
,
657 struct hv_device
*device
,
658 struct vmpacket_descriptor
*packet
)
660 struct nvsp_message
*nvsp_packet
;
661 struct net_device
*ndev
= hv_get_drvdata(device
);
663 nvsp_packet
= (struct nvsp_message
*)((unsigned long)packet
+
664 (packet
->offset8
<< 3));
666 switch (nvsp_packet
->hdr
.msg_type
) {
667 case NVSP_MSG_TYPE_INIT_COMPLETE
:
668 case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE
:
669 case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE
:
670 case NVSP_MSG5_TYPE_SUBCHANNEL
:
671 /* Copy the response back */
672 memcpy(&net_device
->channel_init_pkt
, nvsp_packet
,
673 sizeof(struct nvsp_message
));
674 complete(&net_device
->channel_init_wait
);
677 case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE
:
678 netvsc_send_tx_complete(net_device
, incoming_channel
,
684 "Unknown send completion type %d received!!\n",
685 nvsp_packet
->hdr
.msg_type
);
689 static u32
netvsc_get_next_send_section(struct netvsc_device
*net_device
)
692 u32 max_words
= net_device
->map_words
;
693 unsigned long *map_addr
= (unsigned long *)net_device
->send_section_map
;
694 u32 section_cnt
= net_device
->send_section_cnt
;
695 int ret_val
= NETVSC_INVALID_INDEX
;
699 for (i
= 0; i
< max_words
; i
++) {
702 index
= ffz(map_addr
[i
]);
703 prev_val
= sync_test_and_set_bit(index
, &map_addr
[i
]);
706 if ((index
+ (i
* BITS_PER_LONG
)) >= section_cnt
)
708 ret_val
= (index
+ (i
* BITS_PER_LONG
));
714 static u32
netvsc_copy_to_send_buf(struct netvsc_device
*net_device
,
715 unsigned int section_index
,
717 struct hv_netvsc_packet
*packet
,
718 struct rndis_message
*rndis_msg
,
719 struct hv_page_buffer
**pb
,
722 char *start
= net_device
->send_buf
;
723 char *dest
= start
+ (section_index
* net_device
->send_section_size
)
726 bool is_data_pkt
= (skb
!= NULL
) ? true : false;
727 bool xmit_more
= (skb
!= NULL
) ? skb
->xmit_more
: false;
730 u32 remain
= packet
->total_data_buflen
% net_device
->pkt_align
;
731 u32 page_count
= packet
->cp_partial
? packet
->rmsg_pgcnt
:
732 packet
->page_buf_cnt
;
735 if (is_data_pkt
&& xmit_more
&& remain
&&
736 !packet
->cp_partial
) {
737 padding
= net_device
->pkt_align
- remain
;
738 rndis_msg
->msg_len
+= padding
;
739 packet
->total_data_buflen
+= padding
;
742 for (i
= 0; i
< page_count
; i
++) {
743 char *src
= phys_to_virt((*pb
)[i
].pfn
<< PAGE_SHIFT
);
744 u32 offset
= (*pb
)[i
].offset
;
745 u32 len
= (*pb
)[i
].len
;
747 memcpy(dest
, (src
+ offset
), len
);
753 memset(dest
, 0, padding
);
760 static inline int netvsc_send_pkt(
761 struct hv_device
*device
,
762 struct hv_netvsc_packet
*packet
,
763 struct netvsc_device
*net_device
,
764 struct hv_page_buffer
**pb
,
767 struct nvsp_message nvmsg
;
768 u16 q_idx
= packet
->q_idx
;
769 struct vmbus_channel
*out_channel
= net_device
->chn_table
[q_idx
];
770 struct net_device
*ndev
= hv_get_drvdata(device
);
773 struct hv_page_buffer
*pgbuf
;
774 u32 ring_avail
= hv_ringbuf_avail_percent(&out_channel
->outbound
);
775 bool xmit_more
= (skb
!= NULL
) ? skb
->xmit_more
: false;
777 nvmsg
.hdr
.msg_type
= NVSP_MSG1_TYPE_SEND_RNDIS_PKT
;
780 nvmsg
.msg
.v1_msg
.send_rndis_pkt
.channel_type
= 0;
782 /* 1 is RMC_CONTROL; */
783 nvmsg
.msg
.v1_msg
.send_rndis_pkt
.channel_type
= 1;
786 nvmsg
.msg
.v1_msg
.send_rndis_pkt
.send_buf_section_index
=
787 packet
->send_buf_index
;
788 if (packet
->send_buf_index
== NETVSC_INVALID_INDEX
)
789 nvmsg
.msg
.v1_msg
.send_rndis_pkt
.send_buf_section_size
= 0;
791 nvmsg
.msg
.v1_msg
.send_rndis_pkt
.send_buf_section_size
=
792 packet
->total_data_buflen
;
796 if (out_channel
->rescind
)
800 * It is possible that once we successfully place this packet
801 * on the ringbuffer, we may stop the queue. In that case, we want
802 * to notify the host independent of the xmit_more flag. We don't
803 * need to be precise here; in the worst case we may signal the host
806 if (ring_avail
< (RING_AVAIL_PERCENT_LOWATER
+ 1))
809 if (packet
->page_buf_cnt
) {
810 pgbuf
= packet
->cp_partial
? (*pb
) +
811 packet
->rmsg_pgcnt
: (*pb
);
812 ret
= vmbus_sendpacket_pagebuffer_ctl(out_channel
,
814 packet
->page_buf_cnt
,
816 sizeof(struct nvsp_message
),
818 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
,
821 ret
= vmbus_sendpacket_ctl(out_channel
, &nvmsg
,
822 sizeof(struct nvsp_message
),
825 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
,
830 atomic_inc(&net_device
->num_outstanding_sends
);
831 atomic_inc(&net_device
->queue_sends
[q_idx
]);
833 if (ring_avail
< RING_AVAIL_PERCENT_LOWATER
) {
834 netif_tx_stop_queue(netdev_get_tx_queue(ndev
, q_idx
));
836 if (atomic_read(&net_device
->
837 queue_sends
[q_idx
]) < 1)
838 netif_tx_wake_queue(netdev_get_tx_queue(
841 } else if (ret
== -EAGAIN
) {
842 netif_tx_stop_queue(netdev_get_tx_queue(
844 if (atomic_read(&net_device
->queue_sends
[q_idx
]) < 1) {
845 netif_tx_wake_queue(netdev_get_tx_queue(
850 netdev_err(ndev
, "Unable to send packet %p ret %d\n",
857 /* Move packet out of multi send data (msd), and clear msd */
858 static inline void move_pkt_msd(struct hv_netvsc_packet
**msd_send
,
859 struct sk_buff
**msd_skb
,
860 struct multi_send_data
*msdp
)
862 *msd_skb
= msdp
->skb
;
863 *msd_send
= msdp
->pkt
;
869 int netvsc_send(struct hv_device
*device
,
870 struct hv_netvsc_packet
*packet
,
871 struct rndis_message
*rndis_msg
,
872 struct hv_page_buffer
**pb
,
875 struct netvsc_device
*net_device
;
877 struct vmbus_channel
*out_channel
;
878 u16 q_idx
= packet
->q_idx
;
879 u32 pktlen
= packet
->total_data_buflen
, msd_len
= 0;
880 unsigned int section_index
= NETVSC_INVALID_INDEX
;
881 struct multi_send_data
*msdp
;
882 struct hv_netvsc_packet
*msd_send
= NULL
, *cur_send
= NULL
;
883 struct sk_buff
*msd_skb
= NULL
;
885 bool xmit_more
= (skb
!= NULL
) ? skb
->xmit_more
: false;
887 net_device
= get_outbound_net_device(device
);
891 out_channel
= net_device
->chn_table
[q_idx
];
893 packet
->send_buf_index
= NETVSC_INVALID_INDEX
;
894 packet
->cp_partial
= false;
896 /* Send control message directly without accessing msd (Multi-Send
897 * Data) field which may be changed during data packet processing.
904 msdp
= &net_device
->msd
[q_idx
];
906 /* batch packets in send buffer if possible */
908 msd_len
= msdp
->pkt
->total_data_buflen
;
910 try_batch
= (skb
!= NULL
) && msd_len
> 0 && msdp
->count
<
913 if (try_batch
&& msd_len
+ pktlen
+ net_device
->pkt_align
<
914 net_device
->send_section_size
) {
915 section_index
= msdp
->pkt
->send_buf_index
;
917 } else if (try_batch
&& msd_len
+ packet
->rmsg_size
<
918 net_device
->send_section_size
) {
919 section_index
= msdp
->pkt
->send_buf_index
;
920 packet
->cp_partial
= true;
922 } else if ((skb
!= NULL
) && pktlen
+ net_device
->pkt_align
<
923 net_device
->send_section_size
) {
924 section_index
= netvsc_get_next_send_section(net_device
);
925 if (section_index
!= NETVSC_INVALID_INDEX
) {
926 move_pkt_msd(&msd_send
, &msd_skb
, msdp
);
931 if (section_index
!= NETVSC_INVALID_INDEX
) {
932 netvsc_copy_to_send_buf(net_device
,
933 section_index
, msd_len
,
934 packet
, rndis_msg
, pb
, skb
);
936 packet
->send_buf_index
= section_index
;
938 if (packet
->cp_partial
) {
939 packet
->page_buf_cnt
-= packet
->rmsg_pgcnt
;
940 packet
->total_data_buflen
= msd_len
+ packet
->rmsg_size
;
942 packet
->page_buf_cnt
= 0;
943 packet
->total_data_buflen
+= msd_len
;
947 dev_consume_skb_any(msdp
->skb
);
949 if (xmit_more
&& !packet
->cp_partial
) {
960 move_pkt_msd(&msd_send
, &msd_skb
, msdp
);
965 int m_ret
= netvsc_send_pkt(device
, msd_send
, net_device
,
969 netvsc_free_send_slot(net_device
,
970 msd_send
->send_buf_index
);
971 dev_kfree_skb_any(msd_skb
);
977 ret
= netvsc_send_pkt(device
, cur_send
, net_device
, pb
, skb
);
979 if (ret
!= 0 && section_index
!= NETVSC_INVALID_INDEX
)
980 netvsc_free_send_slot(net_device
, section_index
);
985 static int netvsc_send_recv_completion(struct vmbus_channel
*channel
,
986 u64 transaction_id
, u32 status
)
988 struct nvsp_message recvcompMessage
;
991 recvcompMessage
.hdr
.msg_type
=
992 NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE
;
994 recvcompMessage
.msg
.v1_msg
.send_rndis_pkt_complete
.status
= status
;
996 /* Send the completion */
997 ret
= vmbus_sendpacket(channel
, &recvcompMessage
,
998 sizeof(struct nvsp_message_header
) + sizeof(u32
),
999 transaction_id
, VM_PKT_COMP
, 0);
1004 static inline void count_recv_comp_slot(struct netvsc_device
*nvdev
, u16 q_idx
,
1005 u32
*filled
, u32
*avail
)
1007 u32 first
= nvdev
->mrc
[q_idx
].first
;
1008 u32 next
= nvdev
->mrc
[q_idx
].next
;
1010 *filled
= (first
> next
) ? NETVSC_RECVSLOT_MAX
- first
+ next
:
1013 *avail
= NETVSC_RECVSLOT_MAX
- *filled
- 1;
1016 /* Read the first filled slot, no change to index */
1017 static inline struct recv_comp_data
*read_recv_comp_slot(struct netvsc_device
1022 if (!nvdev
->mrc
[q_idx
].buf
)
1025 count_recv_comp_slot(nvdev
, q_idx
, &filled
, &avail
);
1029 return nvdev
->mrc
[q_idx
].buf
+ nvdev
->mrc
[q_idx
].first
*
1030 sizeof(struct recv_comp_data
);
1033 /* Put the first filled slot back to available pool */
1034 static inline void put_recv_comp_slot(struct netvsc_device
*nvdev
, u16 q_idx
)
1038 nvdev
->mrc
[q_idx
].first
= (nvdev
->mrc
[q_idx
].first
+ 1) %
1039 NETVSC_RECVSLOT_MAX
;
1041 num_recv
= atomic_dec_return(&nvdev
->num_outstanding_recvs
);
1043 if (nvdev
->destroy
&& num_recv
== 0)
1044 wake_up(&nvdev
->wait_drain
);
1047 /* Check and send pending recv completions */
1048 static void netvsc_chk_recv_comp(struct netvsc_device
*nvdev
,
1049 struct vmbus_channel
*channel
, u16 q_idx
)
1051 struct recv_comp_data
*rcd
;
1055 rcd
= read_recv_comp_slot(nvdev
, q_idx
);
1059 ret
= netvsc_send_recv_completion(channel
, rcd
->tid
,
1064 put_recv_comp_slot(nvdev
, q_idx
);
1068 #define NETVSC_RCD_WATERMARK 80
1070 /* Get next available slot */
1071 static inline struct recv_comp_data
*get_recv_comp_slot(
1072 struct netvsc_device
*nvdev
, struct vmbus_channel
*channel
, u16 q_idx
)
1074 u32 filled
, avail
, next
;
1075 struct recv_comp_data
*rcd
;
1077 if (!nvdev
->recv_section
)
1080 if (!nvdev
->mrc
[q_idx
].buf
)
1083 if (atomic_read(&nvdev
->num_outstanding_recvs
) >
1084 nvdev
->recv_section
->num_sub_allocs
* NETVSC_RCD_WATERMARK
/ 100)
1085 netvsc_chk_recv_comp(nvdev
, channel
, q_idx
);
1087 count_recv_comp_slot(nvdev
, q_idx
, &filled
, &avail
);
1091 next
= nvdev
->mrc
[q_idx
].next
;
1092 rcd
= nvdev
->mrc
[q_idx
].buf
+ next
* sizeof(struct recv_comp_data
);
1093 nvdev
->mrc
[q_idx
].next
= (next
+ 1) % NETVSC_RECVSLOT_MAX
;
1095 atomic_inc(&nvdev
->num_outstanding_recvs
);
1100 static void netvsc_receive(struct netvsc_device
*net_device
,
1101 struct vmbus_channel
*channel
,
1102 struct hv_device
*device
,
1103 struct vmpacket_descriptor
*packet
)
1105 struct vmtransfer_page_packet_header
*vmxferpage_packet
;
1106 struct nvsp_message
*nvsp_packet
;
1107 struct hv_netvsc_packet nv_pkt
;
1108 struct hv_netvsc_packet
*netvsc_packet
= &nv_pkt
;
1109 u32 status
= NVSP_STAT_SUCCESS
;
1112 struct net_device
*ndev
= hv_get_drvdata(device
);
1115 struct recv_comp_data
*rcd
;
1116 u16 q_idx
= channel
->offermsg
.offer
.sub_channel_index
;
1119 * All inbound packets other than send completion should be xfer page
1122 if (packet
->type
!= VM_PKT_DATA_USING_XFER_PAGES
) {
1123 netdev_err(ndev
, "Unknown packet type received - %d\n",
1128 nvsp_packet
= (struct nvsp_message
*)((unsigned long)packet
+
1129 (packet
->offset8
<< 3));
1131 /* Make sure this is a valid nvsp packet */
1132 if (nvsp_packet
->hdr
.msg_type
!=
1133 NVSP_MSG1_TYPE_SEND_RNDIS_PKT
) {
1134 netdev_err(ndev
, "Unknown nvsp packet type received-"
1135 " %d\n", nvsp_packet
->hdr
.msg_type
);
1139 vmxferpage_packet
= (struct vmtransfer_page_packet_header
*)packet
;
1141 if (vmxferpage_packet
->xfer_pageset_id
!= NETVSC_RECEIVE_BUFFER_ID
) {
1142 netdev_err(ndev
, "Invalid xfer page set id - "
1143 "expecting %x got %x\n", NETVSC_RECEIVE_BUFFER_ID
,
1144 vmxferpage_packet
->xfer_pageset_id
);
1148 count
= vmxferpage_packet
->range_cnt
;
1150 /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
1151 for (i
= 0; i
< count
; i
++) {
1152 /* Initialize the netvsc packet */
1153 data
= (void *)((unsigned long)net_device
->
1154 recv_buf
+ vmxferpage_packet
->ranges
[i
].byte_offset
);
1155 netvsc_packet
->total_data_buflen
=
1156 vmxferpage_packet
->ranges
[i
].byte_count
;
1158 /* Pass it to the upper layer */
1159 status
= rndis_filter_receive(device
, netvsc_packet
, &data
,
1163 if (!net_device
->mrc
[q_idx
].buf
) {
1164 ret
= netvsc_send_recv_completion(channel
,
1165 vmxferpage_packet
->d
.trans_id
,
1168 netdev_err(ndev
, "Recv_comp q:%hd, tid:%llx, err:%d\n",
1169 q_idx
, vmxferpage_packet
->d
.trans_id
, ret
);
1173 rcd
= get_recv_comp_slot(net_device
, channel
, q_idx
);
1176 netdev_err(ndev
, "Recv_comp full buf q:%hd, tid:%llx\n",
1177 q_idx
, vmxferpage_packet
->d
.trans_id
);
1181 rcd
->tid
= vmxferpage_packet
->d
.trans_id
;
1182 rcd
->status
= status
;
1185 static void netvsc_send_table(struct hv_device
*hdev
,
1186 struct nvsp_message
*nvmsg
)
1188 struct netvsc_device
*nvscdev
;
1189 struct net_device
*ndev
= hv_get_drvdata(hdev
);
1193 nvscdev
= get_outbound_net_device(hdev
);
1197 count
= nvmsg
->msg
.v5_msg
.send_table
.count
;
1198 if (count
!= VRSS_SEND_TAB_SIZE
) {
1199 netdev_err(ndev
, "Received wrong send-table size:%u\n", count
);
1203 tab
= (u32
*)((unsigned long)&nvmsg
->msg
.v5_msg
.send_table
+
1204 nvmsg
->msg
.v5_msg
.send_table
.offset
);
1206 for (i
= 0; i
< count
; i
++)
1207 nvscdev
->send_table
[i
] = tab
[i
];
1210 static void netvsc_send_vf(struct net_device_context
*net_device_ctx
,
1211 struct nvsp_message
*nvmsg
)
1213 net_device_ctx
->vf_alloc
= nvmsg
->msg
.v4_msg
.vf_assoc
.allocated
;
1214 net_device_ctx
->vf_serial
= nvmsg
->msg
.v4_msg
.vf_assoc
.serial
;
1217 static inline void netvsc_receive_inband(struct hv_device
*hdev
,
1218 struct net_device_context
*net_device_ctx
,
1219 struct nvsp_message
*nvmsg
)
1221 switch (nvmsg
->hdr
.msg_type
) {
1222 case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE
:
1223 netvsc_send_table(hdev
, nvmsg
);
1226 case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION
:
1227 netvsc_send_vf(net_device_ctx
, nvmsg
);
1232 static void netvsc_process_raw_pkt(struct hv_device
*device
,
1233 struct vmbus_channel
*channel
,
1234 struct netvsc_device
*net_device
,
1235 struct net_device
*ndev
,
1237 struct vmpacket_descriptor
*desc
)
1239 struct nvsp_message
*nvmsg
;
1240 struct net_device_context
*net_device_ctx
= netdev_priv(ndev
);
1242 nvmsg
= (struct nvsp_message
*)((unsigned long)
1243 desc
+ (desc
->offset8
<< 3));
1245 switch (desc
->type
) {
1247 netvsc_send_completion(net_device
, channel
, device
, desc
);
1250 case VM_PKT_DATA_USING_XFER_PAGES
:
1251 netvsc_receive(net_device
, channel
, device
, desc
);
1254 case VM_PKT_DATA_INBAND
:
1255 netvsc_receive_inband(device
, net_device_ctx
, nvmsg
);
1259 netdev_err(ndev
, "unhandled packet type %d, tid %llx\n",
1260 desc
->type
, request_id
);
1265 void netvsc_channel_cb(void *context
)
1268 struct vmbus_channel
*channel
= (struct vmbus_channel
*)context
;
1269 u16 q_idx
= channel
->offermsg
.offer
.sub_channel_index
;
1270 struct hv_device
*device
;
1271 struct netvsc_device
*net_device
;
1274 struct vmpacket_descriptor
*desc
;
1275 unsigned char *buffer
;
1276 int bufferlen
= NETVSC_PACKET_SIZE
;
1277 struct net_device
*ndev
;
1278 bool need_to_commit
= false;
1280 if (channel
->primary_channel
!= NULL
)
1281 device
= channel
->primary_channel
->device_obj
;
1283 device
= channel
->device_obj
;
1285 net_device
= get_inbound_net_device(device
);
1288 ndev
= hv_get_drvdata(device
);
1289 buffer
= get_per_channel_state(channel
);
1292 desc
= get_next_pkt_raw(channel
);
1294 netvsc_process_raw_pkt(device
,
1301 put_pkt_raw(channel
, desc
);
1302 need_to_commit
= true;
1305 if (need_to_commit
) {
1306 need_to_commit
= false;
1307 commit_rd_index(channel
);
1310 ret
= vmbus_recvpacket_raw(channel
, buffer
, bufferlen
,
1311 &bytes_recvd
, &request_id
);
1313 if (bytes_recvd
> 0) {
1314 desc
= (struct vmpacket_descriptor
*)buffer
;
1315 netvsc_process_raw_pkt(device
,
1323 * We are done for this pass.
1328 } else if (ret
== -ENOBUFS
) {
1329 if (bufferlen
> NETVSC_PACKET_SIZE
)
1331 /* Handle large packet */
1332 buffer
= kmalloc(bytes_recvd
, GFP_ATOMIC
);
1333 if (buffer
== NULL
) {
1334 /* Try again next time around */
1336 "unable to allocate buffer of size "
1337 "(%d)!!\n", bytes_recvd
);
1341 bufferlen
= bytes_recvd
;
1345 if (bufferlen
> NETVSC_PACKET_SIZE
)
1348 netvsc_chk_recv_comp(net_device
, channel
, q_idx
);
1352 * netvsc_device_add - Callback when the device belonging to this
1355 int netvsc_device_add(struct hv_device
*device
, void *additional_info
)
1359 ((struct netvsc_device_info
*)additional_info
)->ring_size
;
1360 struct netvsc_device
*net_device
;
1361 struct net_device
*ndev
= hv_get_drvdata(device
);
1362 struct net_device_context
*net_device_ctx
= netdev_priv(ndev
);
1364 net_device
= alloc_net_device();
1368 net_device
->ring_size
= ring_size
;
1370 set_per_channel_state(device
->channel
, net_device
->cb_buffer
);
1372 /* Open the channel */
1373 ret
= vmbus_open(device
->channel
, ring_size
* PAGE_SIZE
,
1374 ring_size
* PAGE_SIZE
, NULL
, 0,
1375 netvsc_channel_cb
, device
->channel
);
1378 netdev_err(ndev
, "unable to open channel: %d\n", ret
);
1382 /* Channel is opened */
1383 pr_info("hv_netvsc channel opened successfully\n");
1385 /* If we're reopening the device we may have multiple queues, fill the
1386 * chn_table with the default channel to use it before subchannels are
1389 for (i
= 0; i
< VRSS_CHANNEL_MAX
; i
++)
1390 net_device
->chn_table
[i
] = device
->channel
;
1392 /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
1397 net_device_ctx
->nvdev
= net_device
;
1399 /* Connect with the NetVsp */
1400 ret
= netvsc_connect_vsp(device
);
1403 "unable to connect to NetVSP - %d\n", ret
);
1410 /* Now, we can close the channel safely */
1411 vmbus_close(device
->channel
);
1414 free_netvsc_device(net_device
);