2 * Copyright (c) 2009, Microsoft Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, see <http://www.gnu.org/licenses/>.
17 * Haiyang Zhang <haiyangz@microsoft.com>
18 * Hank Janssen <hjanssen@microsoft.com>
20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22 #include <linux/kernel.h>
23 #include <linux/sched.h>
24 #include <linux/wait.h>
26 #include <linux/delay.h>
28 #include <linux/slab.h>
29 #include <linux/netdevice.h>
30 #include <linux/if_ether.h>
31 #include <linux/vmalloc.h>
32 #include <linux/rtnetlink.h>
33 #include <linux/prefetch.h>
34 #include <linux/reciprocal_div.h>
36 #include <asm/sync_bitops.h>
38 #include "hyperv_net.h"
39 #include "netvsc_trace.h"
42 * Switch the data path from the synthetic interface to the VF
45 void netvsc_switch_datapath(struct net_device
*ndev
, bool vf
)
47 struct net_device_context
*net_device_ctx
= netdev_priv(ndev
);
48 struct hv_device
*dev
= net_device_ctx
->device_ctx
;
49 struct netvsc_device
*nv_dev
= rtnl_dereference(net_device_ctx
->nvdev
);
50 struct nvsp_message
*init_pkt
= &nv_dev
->channel_init_pkt
;
52 memset(init_pkt
, 0, sizeof(struct nvsp_message
));
53 init_pkt
->hdr
.msg_type
= NVSP_MSG4_TYPE_SWITCH_DATA_PATH
;
55 init_pkt
->msg
.v4_msg
.active_dp
.active_datapath
=
58 init_pkt
->msg
.v4_msg
.active_dp
.active_datapath
=
59 NVSP_DATAPATH_SYNTHETIC
;
61 trace_nvsp_send(ndev
, init_pkt
);
63 vmbus_sendpacket(dev
->channel
, init_pkt
,
64 sizeof(struct nvsp_message
),
65 (unsigned long)init_pkt
,
66 VM_PKT_DATA_INBAND
, 0);
69 static struct netvsc_device
*alloc_net_device(void)
71 struct netvsc_device
*net_device
;
73 net_device
= kzalloc(sizeof(struct netvsc_device
), GFP_KERNEL
);
77 init_waitqueue_head(&net_device
->wait_drain
);
78 net_device
->destroy
= false;
80 net_device
->max_pkt
= RNDIS_MAX_PKT_DEFAULT
;
81 net_device
->pkt_align
= RNDIS_PKT_ALIGN_DEFAULT
;
83 init_completion(&net_device
->channel_init_wait
);
84 init_waitqueue_head(&net_device
->subchan_open
);
85 INIT_WORK(&net_device
->subchan_work
, rndis_set_subchannel
);
90 static void free_netvsc_device(struct rcu_head
*head
)
92 struct netvsc_device
*nvdev
93 = container_of(head
, struct netvsc_device
, rcu
);
96 kfree(nvdev
->extension
);
97 vfree(nvdev
->recv_buf
);
98 vfree(nvdev
->send_buf
);
99 kfree(nvdev
->send_section_map
);
101 for (i
= 0; i
< VRSS_CHANNEL_MAX
; i
++)
102 vfree(nvdev
->chan_table
[i
].mrc
.slots
);
107 static void free_netvsc_device_rcu(struct netvsc_device
*nvdev
)
109 call_rcu(&nvdev
->rcu
, free_netvsc_device
);
112 static void netvsc_revoke_buf(struct hv_device
*device
,
113 struct netvsc_device
*net_device
)
115 struct nvsp_message
*revoke_packet
;
116 struct net_device
*ndev
= hv_get_drvdata(device
);
120 * If we got a section count, it means we received a
121 * SendReceiveBufferComplete msg (ie sent
122 * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
123 * to send a revoke msg here
125 if (net_device
->recv_section_cnt
) {
126 /* Send the revoke receive buffer */
127 revoke_packet
= &net_device
->revoke_packet
;
128 memset(revoke_packet
, 0, sizeof(struct nvsp_message
));
130 revoke_packet
->hdr
.msg_type
=
131 NVSP_MSG1_TYPE_REVOKE_RECV_BUF
;
132 revoke_packet
->msg
.v1_msg
.
133 revoke_recv_buf
.id
= NETVSC_RECEIVE_BUFFER_ID
;
135 trace_nvsp_send(ndev
, revoke_packet
);
137 ret
= vmbus_sendpacket(device
->channel
,
139 sizeof(struct nvsp_message
),
140 (unsigned long)revoke_packet
,
141 VM_PKT_DATA_INBAND
, 0);
142 /* If the failure is because the channel is rescinded;
143 * ignore the failure since we cannot send on a rescinded
144 * channel. This would allow us to properly cleanup
145 * even when the channel is rescinded.
147 if (device
->channel
->rescind
)
150 * If we failed here, we might as well return and
151 * have a leak rather than continue and a bugchk
154 netdev_err(ndev
, "unable to send "
155 "revoke receive buffer to netvsp\n");
158 net_device
->recv_section_cnt
= 0;
161 /* Deal with the send buffer we may have setup.
162 * If we got a send section size, it means we received a
163 * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
164 * NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need
165 * to send a revoke msg here
167 if (net_device
->send_section_cnt
) {
168 /* Send the revoke receive buffer */
169 revoke_packet
= &net_device
->revoke_packet
;
170 memset(revoke_packet
, 0, sizeof(struct nvsp_message
));
172 revoke_packet
->hdr
.msg_type
=
173 NVSP_MSG1_TYPE_REVOKE_SEND_BUF
;
174 revoke_packet
->msg
.v1_msg
.revoke_send_buf
.id
=
175 NETVSC_SEND_BUFFER_ID
;
177 trace_nvsp_send(ndev
, revoke_packet
);
179 ret
= vmbus_sendpacket(device
->channel
,
181 sizeof(struct nvsp_message
),
182 (unsigned long)revoke_packet
,
183 VM_PKT_DATA_INBAND
, 0);
185 /* If the failure is because the channel is rescinded;
186 * ignore the failure since we cannot send on a rescinded
187 * channel. This would allow us to properly cleanup
188 * even when the channel is rescinded.
190 if (device
->channel
->rescind
)
193 /* If we failed here, we might as well return and
194 * have a leak rather than continue and a bugchk
197 netdev_err(ndev
, "unable to send "
198 "revoke send buffer to netvsp\n");
201 net_device
->send_section_cnt
= 0;
205 static void netvsc_teardown_gpadl(struct hv_device
*device
,
206 struct netvsc_device
*net_device
)
208 struct net_device
*ndev
= hv_get_drvdata(device
);
211 if (net_device
->recv_buf_gpadl_handle
) {
212 ret
= vmbus_teardown_gpadl(device
->channel
,
213 net_device
->recv_buf_gpadl_handle
);
215 /* If we failed here, we might as well return and have a leak
216 * rather than continue and a bugchk
220 "unable to teardown receive buffer's gpadl\n");
223 net_device
->recv_buf_gpadl_handle
= 0;
226 if (net_device
->send_buf_gpadl_handle
) {
227 ret
= vmbus_teardown_gpadl(device
->channel
,
228 net_device
->send_buf_gpadl_handle
);
230 /* If we failed here, we might as well return and have a leak
231 * rather than continue and a bugchk
235 "unable to teardown send buffer's gpadl\n");
238 net_device
->send_buf_gpadl_handle
= 0;
242 int netvsc_alloc_recv_comp_ring(struct netvsc_device
*net_device
, u32 q_idx
)
244 struct netvsc_channel
*nvchan
= &net_device
->chan_table
[q_idx
];
245 int node
= cpu_to_node(nvchan
->channel
->target_cpu
);
248 size
= net_device
->recv_completion_cnt
* sizeof(struct recv_comp_data
);
249 nvchan
->mrc
.slots
= vzalloc_node(size
, node
);
250 if (!nvchan
->mrc
.slots
)
251 nvchan
->mrc
.slots
= vzalloc(size
);
253 return nvchan
->mrc
.slots
? 0 : -ENOMEM
;
256 static int netvsc_init_buf(struct hv_device
*device
,
257 struct netvsc_device
*net_device
,
258 const struct netvsc_device_info
*device_info
)
260 struct nvsp_1_message_send_receive_buffer_complete
*resp
;
261 struct net_device
*ndev
= hv_get_drvdata(device
);
262 struct nvsp_message
*init_packet
;
263 unsigned int buf_size
;
267 /* Get receive buffer area. */
268 buf_size
= device_info
->recv_sections
* device_info
->recv_section_size
;
269 buf_size
= roundup(buf_size
, PAGE_SIZE
);
271 /* Legacy hosts only allow smaller receive buffer */
272 if (net_device
->nvsp_version
<= NVSP_PROTOCOL_VERSION_2
)
273 buf_size
= min_t(unsigned int, buf_size
,
274 NETVSC_RECEIVE_BUFFER_SIZE_LEGACY
);
276 net_device
->recv_buf
= vzalloc(buf_size
);
277 if (!net_device
->recv_buf
) {
279 "unable to allocate receive buffer of size %u\n",
285 net_device
->recv_buf_size
= buf_size
;
288 * Establish the gpadl handle for this buffer on this
289 * channel. Note: This call uses the vmbus connection rather
290 * than the channel to establish the gpadl handle.
292 ret
= vmbus_establish_gpadl(device
->channel
, net_device
->recv_buf
,
294 &net_device
->recv_buf_gpadl_handle
);
297 "unable to establish receive buffer's gpadl\n");
301 /* Notify the NetVsp of the gpadl handle */
302 init_packet
= &net_device
->channel_init_pkt
;
303 memset(init_packet
, 0, sizeof(struct nvsp_message
));
304 init_packet
->hdr
.msg_type
= NVSP_MSG1_TYPE_SEND_RECV_BUF
;
305 init_packet
->msg
.v1_msg
.send_recv_buf
.
306 gpadl_handle
= net_device
->recv_buf_gpadl_handle
;
307 init_packet
->msg
.v1_msg
.
308 send_recv_buf
.id
= NETVSC_RECEIVE_BUFFER_ID
;
310 trace_nvsp_send(ndev
, init_packet
);
312 /* Send the gpadl notification request */
313 ret
= vmbus_sendpacket(device
->channel
, init_packet
,
314 sizeof(struct nvsp_message
),
315 (unsigned long)init_packet
,
317 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
);
320 "unable to send receive buffer's gpadl to netvsp\n");
324 wait_for_completion(&net_device
->channel_init_wait
);
326 /* Check the response */
327 resp
= &init_packet
->msg
.v1_msg
.send_recv_buf_complete
;
328 if (resp
->status
!= NVSP_STAT_SUCCESS
) {
330 "Unable to complete receive buffer initialization with NetVsp - status %d\n",
336 /* Parse the response */
337 netdev_dbg(ndev
, "Receive sections: %u sub_allocs: size %u count: %u\n",
338 resp
->num_sections
, resp
->sections
[0].sub_alloc_size
,
339 resp
->sections
[0].num_sub_allocs
);
341 /* There should only be one section for the entire receive buffer */
342 if (resp
->num_sections
!= 1 || resp
->sections
[0].offset
!= 0) {
347 net_device
->recv_section_size
= resp
->sections
[0].sub_alloc_size
;
348 net_device
->recv_section_cnt
= resp
->sections
[0].num_sub_allocs
;
350 /* Setup receive completion ring */
351 net_device
->recv_completion_cnt
352 = round_up(net_device
->recv_section_cnt
+ 1,
353 PAGE_SIZE
/ sizeof(u64
));
354 ret
= netvsc_alloc_recv_comp_ring(net_device
, 0);
358 /* Now setup the send buffer. */
359 buf_size
= device_info
->send_sections
* device_info
->send_section_size
;
360 buf_size
= round_up(buf_size
, PAGE_SIZE
);
362 net_device
->send_buf
= vzalloc(buf_size
);
363 if (!net_device
->send_buf
) {
364 netdev_err(ndev
, "unable to allocate send buffer of size %u\n",
370 /* Establish the gpadl handle for this buffer on this
371 * channel. Note: This call uses the vmbus connection rather
372 * than the channel to establish the gpadl handle.
374 ret
= vmbus_establish_gpadl(device
->channel
, net_device
->send_buf
,
376 &net_device
->send_buf_gpadl_handle
);
379 "unable to establish send buffer's gpadl\n");
383 /* Notify the NetVsp of the gpadl handle */
384 init_packet
= &net_device
->channel_init_pkt
;
385 memset(init_packet
, 0, sizeof(struct nvsp_message
));
386 init_packet
->hdr
.msg_type
= NVSP_MSG1_TYPE_SEND_SEND_BUF
;
387 init_packet
->msg
.v1_msg
.send_send_buf
.gpadl_handle
=
388 net_device
->send_buf_gpadl_handle
;
389 init_packet
->msg
.v1_msg
.send_send_buf
.id
= NETVSC_SEND_BUFFER_ID
;
391 trace_nvsp_send(ndev
, init_packet
);
393 /* Send the gpadl notification request */
394 ret
= vmbus_sendpacket(device
->channel
, init_packet
,
395 sizeof(struct nvsp_message
),
396 (unsigned long)init_packet
,
398 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
);
401 "unable to send send buffer's gpadl to netvsp\n");
405 wait_for_completion(&net_device
->channel_init_wait
);
407 /* Check the response */
408 if (init_packet
->msg
.v1_msg
.
409 send_send_buf_complete
.status
!= NVSP_STAT_SUCCESS
) {
410 netdev_err(ndev
, "Unable to complete send buffer "
411 "initialization with NetVsp - status %d\n",
412 init_packet
->msg
.v1_msg
.
413 send_send_buf_complete
.status
);
418 /* Parse the response */
419 net_device
->send_section_size
= init_packet
->msg
.
420 v1_msg
.send_send_buf_complete
.section_size
;
422 /* Section count is simply the size divided by the section size. */
423 net_device
->send_section_cnt
= buf_size
/ net_device
->send_section_size
;
425 netdev_dbg(ndev
, "Send section size: %d, Section count:%d\n",
426 net_device
->send_section_size
, net_device
->send_section_cnt
);
428 /* Setup state for managing the send buffer. */
429 map_words
= DIV_ROUND_UP(net_device
->send_section_cnt
, BITS_PER_LONG
);
431 net_device
->send_section_map
= kcalloc(map_words
, sizeof(ulong
), GFP_KERNEL
);
432 if (net_device
->send_section_map
== NULL
) {
440 netvsc_revoke_buf(device
, net_device
);
441 netvsc_teardown_gpadl(device
, net_device
);
447 /* Negotiate NVSP protocol version */
448 static int negotiate_nvsp_ver(struct hv_device
*device
,
449 struct netvsc_device
*net_device
,
450 struct nvsp_message
*init_packet
,
453 struct net_device
*ndev
= hv_get_drvdata(device
);
456 memset(init_packet
, 0, sizeof(struct nvsp_message
));
457 init_packet
->hdr
.msg_type
= NVSP_MSG_TYPE_INIT
;
458 init_packet
->msg
.init_msg
.init
.min_protocol_ver
= nvsp_ver
;
459 init_packet
->msg
.init_msg
.init
.max_protocol_ver
= nvsp_ver
;
461 trace_nvsp_send(ndev
, init_packet
);
463 /* Send the init request */
464 ret
= vmbus_sendpacket(device
->channel
, init_packet
,
465 sizeof(struct nvsp_message
),
466 (unsigned long)init_packet
,
468 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
);
473 wait_for_completion(&net_device
->channel_init_wait
);
475 if (init_packet
->msg
.init_msg
.init_complete
.status
!=
479 if (nvsp_ver
== NVSP_PROTOCOL_VERSION_1
)
482 /* NVSPv2 or later: Send NDIS config */
483 memset(init_packet
, 0, sizeof(struct nvsp_message
));
484 init_packet
->hdr
.msg_type
= NVSP_MSG2_TYPE_SEND_NDIS_CONFIG
;
485 init_packet
->msg
.v2_msg
.send_ndis_config
.mtu
= ndev
->mtu
+ ETH_HLEN
;
486 init_packet
->msg
.v2_msg
.send_ndis_config
.capability
.ieee8021q
= 1;
488 if (nvsp_ver
>= NVSP_PROTOCOL_VERSION_5
) {
489 init_packet
->msg
.v2_msg
.send_ndis_config
.capability
.sriov
= 1;
491 /* Teaming bit is needed to receive link speed updates */
492 init_packet
->msg
.v2_msg
.send_ndis_config
.capability
.teaming
= 1;
495 trace_nvsp_send(ndev
, init_packet
);
497 ret
= vmbus_sendpacket(device
->channel
, init_packet
,
498 sizeof(struct nvsp_message
),
499 (unsigned long)init_packet
,
500 VM_PKT_DATA_INBAND
, 0);
505 static int netvsc_connect_vsp(struct hv_device
*device
,
506 struct netvsc_device
*net_device
,
507 const struct netvsc_device_info
*device_info
)
509 struct net_device
*ndev
= hv_get_drvdata(device
);
510 static const u32 ver_list
[] = {
511 NVSP_PROTOCOL_VERSION_1
, NVSP_PROTOCOL_VERSION_2
,
512 NVSP_PROTOCOL_VERSION_4
, NVSP_PROTOCOL_VERSION_5
514 struct nvsp_message
*init_packet
;
515 int ndis_version
, i
, ret
;
517 init_packet
= &net_device
->channel_init_pkt
;
519 /* Negotiate the latest NVSP protocol supported */
520 for (i
= ARRAY_SIZE(ver_list
) - 1; i
>= 0; i
--)
521 if (negotiate_nvsp_ver(device
, net_device
, init_packet
,
523 net_device
->nvsp_version
= ver_list
[i
];
532 pr_debug("Negotiated NVSP version:%x\n", net_device
->nvsp_version
);
534 /* Send the ndis version */
535 memset(init_packet
, 0, sizeof(struct nvsp_message
));
537 if (net_device
->nvsp_version
<= NVSP_PROTOCOL_VERSION_4
)
538 ndis_version
= 0x00060001;
540 ndis_version
= 0x0006001e;
542 init_packet
->hdr
.msg_type
= NVSP_MSG1_TYPE_SEND_NDIS_VER
;
543 init_packet
->msg
.v1_msg
.
544 send_ndis_ver
.ndis_major_ver
=
545 (ndis_version
& 0xFFFF0000) >> 16;
546 init_packet
->msg
.v1_msg
.
547 send_ndis_ver
.ndis_minor_ver
=
548 ndis_version
& 0xFFFF;
550 trace_nvsp_send(ndev
, init_packet
);
552 /* Send the init request */
553 ret
= vmbus_sendpacket(device
->channel
, init_packet
,
554 sizeof(struct nvsp_message
),
555 (unsigned long)init_packet
,
556 VM_PKT_DATA_INBAND
, 0);
561 ret
= netvsc_init_buf(device
, net_device
, device_info
);
568 * netvsc_device_remove - Callback when the root bus device is removed
570 void netvsc_device_remove(struct hv_device
*device
)
572 struct net_device
*ndev
= hv_get_drvdata(device
);
573 struct net_device_context
*net_device_ctx
= netdev_priv(ndev
);
574 struct netvsc_device
*net_device
575 = rtnl_dereference(net_device_ctx
->nvdev
);
578 netvsc_revoke_buf(device
, net_device
);
580 RCU_INIT_POINTER(net_device_ctx
->nvdev
, NULL
);
582 /* And disassociate NAPI context from device */
583 for (i
= 0; i
< net_device
->num_chn
; i
++)
584 netif_napi_del(&net_device
->chan_table
[i
].napi
);
587 * At this point, no one should be accessing net_device
590 netdev_dbg(ndev
, "net device safe to remove\n");
592 /* older versions require that buffer be revoked before close */
593 if (net_device
->nvsp_version
< NVSP_PROTOCOL_VERSION_4
)
594 netvsc_teardown_gpadl(device
, net_device
);
596 /* Now, we can close the channel safely */
597 vmbus_close(device
->channel
);
599 if (net_device
->nvsp_version
>= NVSP_PROTOCOL_VERSION_4
)
600 netvsc_teardown_gpadl(device
, net_device
);
602 /* Release all resources */
603 free_netvsc_device_rcu(net_device
);
606 #define RING_AVAIL_PERCENT_HIWATER 20
607 #define RING_AVAIL_PERCENT_LOWATER 10
610 * Get the percentage of available bytes to write in the ring.
611 * The return value is in range from 0 to 100.
613 static u32
hv_ringbuf_avail_percent(const struct hv_ring_buffer_info
*ring_info
)
615 u32 avail_write
= hv_get_bytes_to_write(ring_info
);
617 return reciprocal_divide(avail_write
* 100, netvsc_ring_reciprocal
);
620 static inline void netvsc_free_send_slot(struct netvsc_device
*net_device
,
623 sync_change_bit(index
, net_device
->send_section_map
);
626 static void netvsc_send_tx_complete(struct netvsc_device
*net_device
,
627 struct vmbus_channel
*incoming_channel
,
628 struct hv_device
*device
,
629 const struct vmpacket_descriptor
*desc
,
632 struct sk_buff
*skb
= (struct sk_buff
*)(unsigned long)desc
->trans_id
;
633 struct net_device
*ndev
= hv_get_drvdata(device
);
634 struct net_device_context
*ndev_ctx
= netdev_priv(ndev
);
635 struct vmbus_channel
*channel
= device
->channel
;
639 /* Notify the layer above us */
641 const struct hv_netvsc_packet
*packet
642 = (struct hv_netvsc_packet
*)skb
->cb
;
643 u32 send_index
= packet
->send_buf_index
;
644 struct netvsc_stats
*tx_stats
;
646 if (send_index
!= NETVSC_INVALID_INDEX
)
647 netvsc_free_send_slot(net_device
, send_index
);
648 q_idx
= packet
->q_idx
;
649 channel
= incoming_channel
;
651 tx_stats
= &net_device
->chan_table
[q_idx
].tx_stats
;
653 u64_stats_update_begin(&tx_stats
->syncp
);
654 tx_stats
->packets
+= packet
->total_packets
;
655 tx_stats
->bytes
+= packet
->total_bytes
;
656 u64_stats_update_end(&tx_stats
->syncp
);
658 napi_consume_skb(skb
, budget
);
662 atomic_dec_return(&net_device
->chan_table
[q_idx
].queue_sends
);
664 if (unlikely(net_device
->destroy
)) {
665 if (queue_sends
== 0)
666 wake_up(&net_device
->wait_drain
);
668 struct netdev_queue
*txq
= netdev_get_tx_queue(ndev
, q_idx
);
670 if (netif_tx_queue_stopped(txq
) &&
671 (hv_ringbuf_avail_percent(&channel
->outbound
) > RING_AVAIL_PERCENT_HIWATER
||
673 netif_tx_wake_queue(txq
);
674 ndev_ctx
->eth_stats
.wake_queue
++;
679 static void netvsc_send_completion(struct netvsc_device
*net_device
,
680 struct vmbus_channel
*incoming_channel
,
681 struct hv_device
*device
,
682 const struct vmpacket_descriptor
*desc
,
685 struct nvsp_message
*nvsp_packet
= hv_pkt_data(desc
);
686 struct net_device
*ndev
= hv_get_drvdata(device
);
688 switch (nvsp_packet
->hdr
.msg_type
) {
689 case NVSP_MSG_TYPE_INIT_COMPLETE
:
690 case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE
:
691 case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE
:
692 case NVSP_MSG5_TYPE_SUBCHANNEL
:
693 /* Copy the response back */
694 memcpy(&net_device
->channel_init_pkt
, nvsp_packet
,
695 sizeof(struct nvsp_message
));
696 complete(&net_device
->channel_init_wait
);
699 case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE
:
700 netvsc_send_tx_complete(net_device
, incoming_channel
,
701 device
, desc
, budget
);
706 "Unknown send completion type %d received!!\n",
707 nvsp_packet
->hdr
.msg_type
);
711 static u32
netvsc_get_next_send_section(struct netvsc_device
*net_device
)
713 unsigned long *map_addr
= net_device
->send_section_map
;
716 for_each_clear_bit(i
, map_addr
, net_device
->send_section_cnt
) {
717 if (sync_test_and_set_bit(i
, map_addr
) == 0)
721 return NETVSC_INVALID_INDEX
;
724 static void netvsc_copy_to_send_buf(struct netvsc_device
*net_device
,
725 unsigned int section_index
,
727 struct hv_netvsc_packet
*packet
,
728 struct rndis_message
*rndis_msg
,
729 struct hv_page_buffer
*pb
,
732 char *start
= net_device
->send_buf
;
733 char *dest
= start
+ (section_index
* net_device
->send_section_size
)
737 u32 page_count
= packet
->cp_partial
? packet
->rmsg_pgcnt
:
738 packet
->page_buf_cnt
;
742 remain
= packet
->total_data_buflen
& (net_device
->pkt_align
- 1);
743 if (xmit_more
&& remain
) {
744 padding
= net_device
->pkt_align
- remain
;
745 rndis_msg
->msg_len
+= padding
;
746 packet
->total_data_buflen
+= padding
;
749 for (i
= 0; i
< page_count
; i
++) {
750 char *src
= phys_to_virt(pb
[i
].pfn
<< PAGE_SHIFT
);
751 u32 offset
= pb
[i
].offset
;
754 memcpy(dest
, (src
+ offset
), len
);
759 memset(dest
, 0, padding
);
762 static inline int netvsc_send_pkt(
763 struct hv_device
*device
,
764 struct hv_netvsc_packet
*packet
,
765 struct netvsc_device
*net_device
,
766 struct hv_page_buffer
*pb
,
769 struct nvsp_message nvmsg
;
770 struct nvsp_1_message_send_rndis_packet
*rpkt
=
771 &nvmsg
.msg
.v1_msg
.send_rndis_pkt
;
772 struct netvsc_channel
* const nvchan
=
773 &net_device
->chan_table
[packet
->q_idx
];
774 struct vmbus_channel
*out_channel
= nvchan
->channel
;
775 struct net_device
*ndev
= hv_get_drvdata(device
);
776 struct net_device_context
*ndev_ctx
= netdev_priv(ndev
);
777 struct netdev_queue
*txq
= netdev_get_tx_queue(ndev
, packet
->q_idx
);
780 u32 ring_avail
= hv_ringbuf_avail_percent(&out_channel
->outbound
);
782 nvmsg
.hdr
.msg_type
= NVSP_MSG1_TYPE_SEND_RNDIS_PKT
;
784 rpkt
->channel_type
= 0; /* 0 is RMC_DATA */
786 rpkt
->channel_type
= 1; /* 1 is RMC_CONTROL */
788 rpkt
->send_buf_section_index
= packet
->send_buf_index
;
789 if (packet
->send_buf_index
== NETVSC_INVALID_INDEX
)
790 rpkt
->send_buf_section_size
= 0;
792 rpkt
->send_buf_section_size
= packet
->total_data_buflen
;
796 if (out_channel
->rescind
)
799 trace_nvsp_send_pkt(ndev
, out_channel
, rpkt
);
801 if (packet
->page_buf_cnt
) {
802 if (packet
->cp_partial
)
803 pb
+= packet
->rmsg_pgcnt
;
805 ret
= vmbus_sendpacket_pagebuffer(out_channel
,
806 pb
, packet
->page_buf_cnt
,
807 &nvmsg
, sizeof(nvmsg
),
810 ret
= vmbus_sendpacket(out_channel
,
811 &nvmsg
, sizeof(nvmsg
),
812 req_id
, VM_PKT_DATA_INBAND
,
813 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
);
817 atomic_inc_return(&nvchan
->queue_sends
);
819 if (ring_avail
< RING_AVAIL_PERCENT_LOWATER
) {
820 netif_tx_stop_queue(txq
);
821 ndev_ctx
->eth_stats
.stop_queue
++;
823 } else if (ret
== -EAGAIN
) {
824 netif_tx_stop_queue(txq
);
825 ndev_ctx
->eth_stats
.stop_queue
++;
826 if (atomic_read(&nvchan
->queue_sends
) < 1) {
827 netif_tx_wake_queue(txq
);
828 ndev_ctx
->eth_stats
.wake_queue
++;
833 "Unable to send packet pages %u len %u, ret %d\n",
834 packet
->page_buf_cnt
, packet
->total_data_buflen
,
841 /* Move packet out of multi send data (msd), and clear msd */
842 static inline void move_pkt_msd(struct hv_netvsc_packet
**msd_send
,
843 struct sk_buff
**msd_skb
,
844 struct multi_send_data
*msdp
)
846 *msd_skb
= msdp
->skb
;
847 *msd_send
= msdp
->pkt
;
853 /* RCU already held by caller */
854 int netvsc_send(struct net_device
*ndev
,
855 struct hv_netvsc_packet
*packet
,
856 struct rndis_message
*rndis_msg
,
857 struct hv_page_buffer
*pb
,
860 struct net_device_context
*ndev_ctx
= netdev_priv(ndev
);
861 struct netvsc_device
*net_device
862 = rcu_dereference_bh(ndev_ctx
->nvdev
);
863 struct hv_device
*device
= ndev_ctx
->device_ctx
;
865 struct netvsc_channel
*nvchan
;
866 u32 pktlen
= packet
->total_data_buflen
, msd_len
= 0;
867 unsigned int section_index
= NETVSC_INVALID_INDEX
;
868 struct multi_send_data
*msdp
;
869 struct hv_netvsc_packet
*msd_send
= NULL
, *cur_send
= NULL
;
870 struct sk_buff
*msd_skb
= NULL
;
871 bool try_batch
, xmit_more
;
873 /* If device is rescinded, return error and packet will get dropped. */
874 if (unlikely(!net_device
|| net_device
->destroy
))
877 nvchan
= &net_device
->chan_table
[packet
->q_idx
];
878 packet
->send_buf_index
= NETVSC_INVALID_INDEX
;
879 packet
->cp_partial
= false;
881 /* Send control message directly without accessing msd (Multi-Send
882 * Data) field which may be changed during data packet processing.
885 return netvsc_send_pkt(device
, packet
, net_device
, pb
, skb
);
887 /* batch packets in send buffer if possible */
890 msd_len
= msdp
->pkt
->total_data_buflen
;
892 try_batch
= msd_len
> 0 && msdp
->count
< net_device
->max_pkt
;
893 if (try_batch
&& msd_len
+ pktlen
+ net_device
->pkt_align
<
894 net_device
->send_section_size
) {
895 section_index
= msdp
->pkt
->send_buf_index
;
897 } else if (try_batch
&& msd_len
+ packet
->rmsg_size
<
898 net_device
->send_section_size
) {
899 section_index
= msdp
->pkt
->send_buf_index
;
900 packet
->cp_partial
= true;
902 } else if (pktlen
+ net_device
->pkt_align
<
903 net_device
->send_section_size
) {
904 section_index
= netvsc_get_next_send_section(net_device
);
905 if (unlikely(section_index
== NETVSC_INVALID_INDEX
)) {
906 ++ndev_ctx
->eth_stats
.tx_send_full
;
908 move_pkt_msd(&msd_send
, &msd_skb
, msdp
);
913 /* Keep aggregating only if stack says more data is coming
914 * and not doing mixed modes send and not flow blocked
916 xmit_more
= skb
->xmit_more
&&
917 !packet
->cp_partial
&&
918 !netif_xmit_stopped(netdev_get_tx_queue(ndev
, packet
->q_idx
));
920 if (section_index
!= NETVSC_INVALID_INDEX
) {
921 netvsc_copy_to_send_buf(net_device
,
922 section_index
, msd_len
,
923 packet
, rndis_msg
, pb
, xmit_more
);
925 packet
->send_buf_index
= section_index
;
927 if (packet
->cp_partial
) {
928 packet
->page_buf_cnt
-= packet
->rmsg_pgcnt
;
929 packet
->total_data_buflen
= msd_len
+ packet
->rmsg_size
;
931 packet
->page_buf_cnt
= 0;
932 packet
->total_data_buflen
+= msd_len
;
936 packet
->total_packets
+= msdp
->pkt
->total_packets
;
937 packet
->total_bytes
+= msdp
->pkt
->total_bytes
;
941 dev_consume_skb_any(msdp
->skb
);
954 move_pkt_msd(&msd_send
, &msd_skb
, msdp
);
959 int m_ret
= netvsc_send_pkt(device
, msd_send
, net_device
,
963 netvsc_free_send_slot(net_device
,
964 msd_send
->send_buf_index
);
965 dev_kfree_skb_any(msd_skb
);
970 ret
= netvsc_send_pkt(device
, cur_send
, net_device
, pb
, skb
);
972 if (ret
!= 0 && section_index
!= NETVSC_INVALID_INDEX
)
973 netvsc_free_send_slot(net_device
, section_index
);
978 /* Send pending recv completions */
979 static int send_recv_completions(struct net_device
*ndev
,
980 struct netvsc_device
*nvdev
,
981 struct netvsc_channel
*nvchan
)
983 struct multi_recv_comp
*mrc
= &nvchan
->mrc
;
984 struct recv_comp_msg
{
985 struct nvsp_message_header hdr
;
988 struct recv_comp_msg msg
= {
989 .hdr
.msg_type
= NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE
,
993 while (mrc
->first
!= mrc
->next
) {
994 const struct recv_comp_data
*rcd
995 = mrc
->slots
+ mrc
->first
;
997 msg
.status
= rcd
->status
;
998 ret
= vmbus_sendpacket(nvchan
->channel
, &msg
, sizeof(msg
),
999 rcd
->tid
, VM_PKT_COMP
, 0);
1000 if (unlikely(ret
)) {
1001 struct net_device_context
*ndev_ctx
= netdev_priv(ndev
);
1003 ++ndev_ctx
->eth_stats
.rx_comp_busy
;
1007 if (++mrc
->first
== nvdev
->recv_completion_cnt
)
1011 /* receive completion ring has been emptied */
1012 if (unlikely(nvdev
->destroy
))
1013 wake_up(&nvdev
->wait_drain
);
1018 /* Count how many receive completions are outstanding */
1019 static void recv_comp_slot_avail(const struct netvsc_device
*nvdev
,
1020 const struct multi_recv_comp
*mrc
,
1021 u32
*filled
, u32
*avail
)
1023 u32 count
= nvdev
->recv_completion_cnt
;
1025 if (mrc
->next
>= mrc
->first
)
1026 *filled
= mrc
->next
- mrc
->first
;
1028 *filled
= (count
- mrc
->first
) + mrc
->next
;
1030 *avail
= count
- *filled
- 1;
1033 /* Add receive complete to ring to send to host. */
1034 static void enq_receive_complete(struct net_device
*ndev
,
1035 struct netvsc_device
*nvdev
, u16 q_idx
,
1036 u64 tid
, u32 status
)
1038 struct netvsc_channel
*nvchan
= &nvdev
->chan_table
[q_idx
];
1039 struct multi_recv_comp
*mrc
= &nvchan
->mrc
;
1040 struct recv_comp_data
*rcd
;
1043 recv_comp_slot_avail(nvdev
, mrc
, &filled
, &avail
);
1045 if (unlikely(filled
> NAPI_POLL_WEIGHT
)) {
1046 send_recv_completions(ndev
, nvdev
, nvchan
);
1047 recv_comp_slot_avail(nvdev
, mrc
, &filled
, &avail
);
1050 if (unlikely(!avail
)) {
1051 netdev_err(ndev
, "Recv_comp full buf q:%hd, tid:%llx\n",
1056 rcd
= mrc
->slots
+ mrc
->next
;
1058 rcd
->status
= status
;
1060 if (++mrc
->next
== nvdev
->recv_completion_cnt
)
1064 static int netvsc_receive(struct net_device
*ndev
,
1065 struct netvsc_device
*net_device
,
1066 struct net_device_context
*net_device_ctx
,
1067 struct hv_device
*device
,
1068 struct vmbus_channel
*channel
,
1069 const struct vmpacket_descriptor
*desc
,
1070 struct nvsp_message
*nvsp
)
1072 const struct vmtransfer_page_packet_header
*vmxferpage_packet
1073 = container_of(desc
, const struct vmtransfer_page_packet_header
, d
);
1074 u16 q_idx
= channel
->offermsg
.offer
.sub_channel_index
;
1075 char *recv_buf
= net_device
->recv_buf
;
1076 u32 status
= NVSP_STAT_SUCCESS
;
1080 /* Make sure this is a valid nvsp packet */
1081 if (unlikely(nvsp
->hdr
.msg_type
!= NVSP_MSG1_TYPE_SEND_RNDIS_PKT
)) {
1082 netif_err(net_device_ctx
, rx_err
, ndev
,
1083 "Unknown nvsp packet type received %u\n",
1084 nvsp
->hdr
.msg_type
);
1088 if (unlikely(vmxferpage_packet
->xfer_pageset_id
!= NETVSC_RECEIVE_BUFFER_ID
)) {
1089 netif_err(net_device_ctx
, rx_err
, ndev
,
1090 "Invalid xfer page set id - expecting %x got %x\n",
1091 NETVSC_RECEIVE_BUFFER_ID
,
1092 vmxferpage_packet
->xfer_pageset_id
);
1096 count
= vmxferpage_packet
->range_cnt
;
1098 /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
1099 for (i
= 0; i
< count
; i
++) {
1100 u32 offset
= vmxferpage_packet
->ranges
[i
].byte_offset
;
1101 u32 buflen
= vmxferpage_packet
->ranges
[i
].byte_count
;
1105 if (unlikely(offset
+ buflen
> net_device
->recv_buf_size
)) {
1106 status
= NVSP_STAT_FAIL
;
1107 netif_err(net_device_ctx
, rx_err
, ndev
,
1108 "Packet offset:%u + len:%u too big\n",
1114 data
= recv_buf
+ offset
;
1116 trace_rndis_recv(ndev
, q_idx
, data
);
1118 /* Pass it to the upper layer */
1119 ret
= rndis_filter_receive(ndev
, net_device
,
1120 channel
, data
, buflen
);
1122 if (unlikely(ret
!= NVSP_STAT_SUCCESS
))
1123 status
= NVSP_STAT_FAIL
;
1126 enq_receive_complete(ndev
, net_device
, q_idx
,
1127 vmxferpage_packet
->d
.trans_id
, status
);
1132 static void netvsc_send_table(struct hv_device
*hdev
,
1133 struct nvsp_message
*nvmsg
)
1135 struct net_device
*ndev
= hv_get_drvdata(hdev
);
1136 struct net_device_context
*net_device_ctx
= netdev_priv(ndev
);
1140 count
= nvmsg
->msg
.v5_msg
.send_table
.count
;
1141 if (count
!= VRSS_SEND_TAB_SIZE
) {
1142 netdev_err(ndev
, "Received wrong send-table size:%u\n", count
);
1146 tab
= (u32
*)((unsigned long)&nvmsg
->msg
.v5_msg
.send_table
+
1147 nvmsg
->msg
.v5_msg
.send_table
.offset
);
1149 for (i
= 0; i
< count
; i
++)
1150 net_device_ctx
->tx_table
[i
] = tab
[i
];
1153 static void netvsc_send_vf(struct net_device_context
*net_device_ctx
,
1154 struct nvsp_message
*nvmsg
)
1156 net_device_ctx
->vf_alloc
= nvmsg
->msg
.v4_msg
.vf_assoc
.allocated
;
1157 net_device_ctx
->vf_serial
= nvmsg
->msg
.v4_msg
.vf_assoc
.serial
;
1160 static inline void netvsc_receive_inband(struct hv_device
*hdev
,
1161 struct net_device_context
*net_device_ctx
,
1162 struct nvsp_message
*nvmsg
)
1164 switch (nvmsg
->hdr
.msg_type
) {
1165 case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE
:
1166 netvsc_send_table(hdev
, nvmsg
);
1169 case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION
:
1170 netvsc_send_vf(net_device_ctx
, nvmsg
);
1175 static int netvsc_process_raw_pkt(struct hv_device
*device
,
1176 struct vmbus_channel
*channel
,
1177 struct netvsc_device
*net_device
,
1178 struct net_device
*ndev
,
1179 const struct vmpacket_descriptor
*desc
,
1182 struct net_device_context
*net_device_ctx
= netdev_priv(ndev
);
1183 struct nvsp_message
*nvmsg
= hv_pkt_data(desc
);
1185 trace_nvsp_recv(ndev
, channel
, nvmsg
);
1187 switch (desc
->type
) {
1189 netvsc_send_completion(net_device
, channel
, device
,
1193 case VM_PKT_DATA_USING_XFER_PAGES
:
1194 return netvsc_receive(ndev
, net_device
, net_device_ctx
,
1195 device
, channel
, desc
, nvmsg
);
1198 case VM_PKT_DATA_INBAND
:
1199 netvsc_receive_inband(device
, net_device_ctx
, nvmsg
);
1203 netdev_err(ndev
, "unhandled packet type %d, tid %llx\n",
1204 desc
->type
, desc
->trans_id
);
1211 static struct hv_device
*netvsc_channel_to_device(struct vmbus_channel
*channel
)
1213 struct vmbus_channel
*primary
= channel
->primary_channel
;
1215 return primary
? primary
->device_obj
: channel
->device_obj
;
1218 /* Network processing softirq
1219 * Process data in incoming ring buffer from host
1220 * Stops when ring is empty or budget is met or exceeded.
1222 int netvsc_poll(struct napi_struct
*napi
, int budget
)
1224 struct netvsc_channel
*nvchan
1225 = container_of(napi
, struct netvsc_channel
, napi
);
1226 struct netvsc_device
*net_device
= nvchan
->net_device
;
1227 struct vmbus_channel
*channel
= nvchan
->channel
;
1228 struct hv_device
*device
= netvsc_channel_to_device(channel
);
1229 struct net_device
*ndev
= hv_get_drvdata(device
);
1232 /* If starting a new interval */
1234 nvchan
->desc
= hv_pkt_iter_first(channel
);
1236 while (nvchan
->desc
&& work_done
< budget
) {
1237 work_done
+= netvsc_process_raw_pkt(device
, channel
, net_device
,
1238 ndev
, nvchan
->desc
, budget
);
1239 nvchan
->desc
= hv_pkt_iter_next(channel
, nvchan
->desc
);
1242 /* If send of pending receive completions suceeded
1243 * and did not exhaust NAPI budget this time
1244 * and not doing busy poll
1245 * then re-enable host interrupts
1246 * and reschedule if ring is not empty.
1248 if (send_recv_completions(ndev
, net_device
, nvchan
) == 0 &&
1249 work_done
< budget
&&
1250 napi_complete_done(napi
, work_done
) &&
1251 hv_end_read(&channel
->inbound
) &&
1252 napi_schedule_prep(napi
)) {
1253 hv_begin_read(&channel
->inbound
);
1254 __napi_schedule(napi
);
1257 /* Driver may overshoot since multiple packets per descriptor */
1258 return min(work_done
, budget
);
1261 /* Call back when data is available in host ring buffer.
1262 * Processing is deferred until network softirq (NAPI)
1264 void netvsc_channel_cb(void *context
)
1266 struct netvsc_channel
*nvchan
= context
;
1267 struct vmbus_channel
*channel
= nvchan
->channel
;
1268 struct hv_ring_buffer_info
*rbi
= &channel
->inbound
;
1270 /* preload first vmpacket descriptor */
1271 prefetch(hv_get_ring_buffer(rbi
) + rbi
->priv_read_index
);
1273 if (napi_schedule_prep(&nvchan
->napi
)) {
1274 /* disable interupts from host */
1277 __napi_schedule_irqoff(&nvchan
->napi
);
1282 * netvsc_device_add - Callback when the device belonging to this
1285 struct netvsc_device
*netvsc_device_add(struct hv_device
*device
,
1286 const struct netvsc_device_info
*device_info
)
1289 struct netvsc_device
*net_device
;
1290 struct net_device
*ndev
= hv_get_drvdata(device
);
1291 struct net_device_context
*net_device_ctx
= netdev_priv(ndev
);
1293 net_device
= alloc_net_device();
1295 return ERR_PTR(-ENOMEM
);
1297 for (i
= 0; i
< VRSS_SEND_TAB_SIZE
; i
++)
1298 net_device_ctx
->tx_table
[i
] = 0;
1300 /* Because the device uses NAPI, all the interrupt batching and
1301 * control is done via Net softirq, not the channel handling
1303 set_channel_read_mode(device
->channel
, HV_CALL_ISR
);
1305 /* If we're reopening the device we may have multiple queues, fill the
1306 * chn_table with the default channel to use it before subchannels are
1308 * Initialize the channel state before we open;
1309 * we can be interrupted as soon as we open the channel.
1312 for (i
= 0; i
< VRSS_CHANNEL_MAX
; i
++) {
1313 struct netvsc_channel
*nvchan
= &net_device
->chan_table
[i
];
1315 nvchan
->channel
= device
->channel
;
1316 nvchan
->net_device
= net_device
;
1317 u64_stats_init(&nvchan
->tx_stats
.syncp
);
1318 u64_stats_init(&nvchan
->rx_stats
.syncp
);
1321 /* Enable NAPI handler before init callbacks */
1322 netif_napi_add(ndev
, &net_device
->chan_table
[0].napi
,
1323 netvsc_poll
, NAPI_POLL_WEIGHT
);
1325 /* Open the channel */
1326 ret
= vmbus_open(device
->channel
, netvsc_ring_bytes
,
1327 netvsc_ring_bytes
, NULL
, 0,
1328 netvsc_channel_cb
, net_device
->chan_table
);
1331 netdev_err(ndev
, "unable to open channel: %d\n", ret
);
1335 /* Channel is opened */
1336 netdev_dbg(ndev
, "hv_netvsc channel opened successfully\n");
1338 napi_enable(&net_device
->chan_table
[0].napi
);
1340 /* Connect with the NetVsp */
1341 ret
= netvsc_connect_vsp(device
, net_device
, device_info
);
1344 "unable to connect to NetVSP - %d\n", ret
);
1348 /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
1351 rcu_assign_pointer(net_device_ctx
->nvdev
, net_device
);
1356 RCU_INIT_POINTER(net_device_ctx
->nvdev
, NULL
);
1357 napi_disable(&net_device
->chan_table
[0].napi
);
1359 /* Now, we can close the channel safely */
1360 vmbus_close(device
->channel
);
1363 netif_napi_del(&net_device
->chan_table
[0].napi
);
1364 free_netvsc_device(&net_device
->rcu
);
1366 return ERR_PTR(ret
);