2 * Copyright (c) 2014 VMware, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
28 #include "PacketParser.h"
31 #pragma warning( push )
32 #pragma warning( disable:4127 )
38 #define OVS_DBG_MOD OVS_DBG_VXLAN
41 /* Helper macro to check if a VXLAN ID is valid. */
42 #define VXLAN_ID_IS_VALID(vxlanID) (0 < (vxlanID) && (vxlanID) <= 0xffffff)
43 #define VXLAN_TUNNELID_TO_VNI(_tID) (UINT32)(((UINT64)(_tID)) >> 40)
44 #define VXLAN_VNI_TO_TUNNELID(_vni) (((UINT64)(_vni)) << 40)
45 #define IP_DF_NBO 0x0040
46 #define VXLAN_DEFAULT_TTL 64
47 #define VXLAN_MULTICAST_TTL 64
48 #define VXLAN_DEFAULT_INSTANCE_ID 1
50 /* Move to a header file */
51 extern POVS_SWITCH_CONTEXT gOvsSwitchContext
;
54 OvsInitVxlanTunnel(POVS_VPORT_ENTRY vport
,
55 POVS_VPORT_ADD_REQUEST addReq
)
57 POVS_VXLAN_VPORT vxlanPort
;
58 NTSTATUS status
= STATUS_SUCCESS
;
60 ASSERT(addReq
->type
== OVS_VPORT_TYPE_VXLAN
);
62 vxlanPort
= OvsAllocateMemory(sizeof (*vxlanPort
));
63 if (vxlanPort
== NULL
) {
64 status
= STATUS_INSUFFICIENT_RESOURCES
;
66 RtlZeroMemory(vxlanPort
, sizeof (*vxlanPort
));
67 vxlanPort
->dstPort
= addReq
->dstPort
;
69 * since we are installing the WFP filter before the port is created
70 * We need to check if it is the same number
71 * XXX should be removed later
73 ASSERT(vxlanPort
->dstPort
== VXLAN_UDP_PORT
);
74 vport
->priv
= (PVOID
)vxlanPort
;
81 OvsCleanupVxlanTunnel(POVS_VPORT_ENTRY vport
)
83 if (vport
->ovsType
!= OVS_VPORT_TYPE_VXLAN
||
84 vport
->priv
== NULL
) {
88 OvsFreeMemory(vport
->priv
);
94 *----------------------------------------------------------------------------
96 * Encapsulates the packet.
97 *----------------------------------------------------------------------------
99 static __inline NDIS_STATUS
100 OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl
,
101 OvsIPv4TunnelKey
*tunKey
,
102 POVS_FWD_INFO fwdInfo
,
103 POVS_PACKET_HDR_INFO layers
,
104 POVS_SWITCH_CONTEXT switchContext
,
105 PNET_BUFFER_LIST
*newNbl
)
115 UINT32 headRoom
= OvsGetVxlanTunHdrSize();
119 * XXX: the assumption currently is that the NBL is owned by OVS, and
120 * headroom has already been allocated as part of allocating the NBL and
123 curNb
= NET_BUFFER_LIST_FIRST_NB(curNbl
);
124 packetLength
= NET_BUFFER_DATA_LENGTH(curNb
);
126 NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo
;
128 tsoInfo
.Value
= NET_BUFFER_LIST_INFO(curNbl
,
129 TcpLargeSendNetBufferListInfo
);
130 OVS_LOG_TRACE("MSS %u packet len %u", tsoInfo
.LsoV1Transmit
.MSS
, packetLength
);
131 if (tsoInfo
.LsoV1Transmit
.MSS
) {
132 OVS_LOG_TRACE("l4Offset %d", layers
->l4Offset
);
133 *newNbl
= OvsTcpSegmentNBL(switchContext
, curNbl
, layers
,
134 tsoInfo
.LsoV1Transmit
.MSS
, headRoom
);
135 if (*newNbl
== NULL
) {
136 OVS_LOG_ERROR("Unable to segment NBL");
137 return NDIS_STATUS_FAILURE
;
141 /* If we didn't split the packet above, make a copy now */
142 if (*newNbl
== NULL
) {
143 *newNbl
= OvsPartialCopyNBL(switchContext
, curNbl
, 0, headRoom
,
145 if (*newNbl
== NULL
) {
146 OVS_LOG_ERROR("Unable to copy NBL");
147 return NDIS_STATUS_FAILURE
;
152 for (curNb
= NET_BUFFER_LIST_FIRST_NB(curNbl
); curNb
!= NULL
;
153 curNb
= curNb
->Next
) {
154 status
= NdisRetreatNetBufferDataStart(curNb
, headRoom
, 0, NULL
);
155 if (status
!= NDIS_STATUS_SUCCESS
) {
159 curMdl
= NET_BUFFER_CURRENT_MDL(curNb
);
160 bufferStart
= (PUINT8
)MmGetSystemAddressForMdlSafe(curMdl
, LowPagePriority
);
162 status
= NDIS_STATUS_RESOURCES
;
166 bufferStart
+= NET_BUFFER_CURRENT_MDL_OFFSET(curNb
);
167 if (NET_BUFFER_NEXT_NB(curNb
)) {
168 OVS_LOG_TRACE("nb length %u next %u", NET_BUFFER_DATA_LENGTH(curNb
),
169 NET_BUFFER_DATA_LENGTH(curNb
->Next
));
173 ethHdr
= (EthHdr
*)bufferStart
;
174 NdisMoveMemory(ethHdr
->Destination
, fwdInfo
->dstMacAddr
,
175 sizeof ethHdr
->Destination
+ sizeof ethHdr
->Source
);
176 ASSERT(((PCHAR
)&fwdInfo
->dstMacAddr
+ sizeof fwdInfo
->dstMacAddr
) ==
177 (PCHAR
)&fwdInfo
->srcMacAddr
);
178 ethHdr
->Type
= htons(ETH_TYPE_IPV4
);
180 // XXX: question: there are fields in the OvsIPv4TunnelKey for ttl and such,
181 // should we use those values instead? or will they end up being
184 ipHdr
= (IPHdr
*)((PCHAR
)ethHdr
+ sizeof *ethHdr
);
186 ipHdr
->ihl
= sizeof *ipHdr
/ 4;
187 ipHdr
->version
= IPV4
;
189 ipHdr
->tot_len
= htons(NET_BUFFER_DATA_LENGTH(curNb
) - sizeof *ethHdr
);
191 ipHdr
->frag_off
= IP_DF_NBO
;
192 ipHdr
->ttl
= tunKey
->ttl
? tunKey
->ttl
: VXLAN_DEFAULT_TTL
;
193 ipHdr
->protocol
= IPPROTO_UDP
;
194 ASSERT(tunKey
->dst
== fwdInfo
->dstIpAddr
);
195 ASSERT(tunKey
->src
== fwdInfo
->srcIpAddr
|| tunKey
->src
== 0);
196 ipHdr
->saddr
= fwdInfo
->srcIpAddr
;
197 ipHdr
->daddr
= fwdInfo
->dstIpAddr
;
199 ipHdr
->check
= IPChecksum((UINT8
*)ipHdr
, sizeof *ipHdr
, 0);
202 udpHdr
= (UDPHdr
*)((PCHAR
)ipHdr
+ sizeof *ipHdr
);
203 udpHdr
->source
= htons(tunKey
->flow_hash
| 32768);
204 udpHdr
->dest
= VXLAN_UDP_PORT_NBO
;
205 udpHdr
->len
= htons(NET_BUFFER_DATA_LENGTH(curNb
) - headRoom
+
206 sizeof *udpHdr
+ sizeof *vxlanHdr
);
210 vxlanHdr
= (VXLANHdr
*)((PCHAR
)udpHdr
+ sizeof *udpHdr
);
211 vxlanHdr
->flags1
= 0;
212 vxlanHdr
->locallyReplicate
= 0;
213 vxlanHdr
->flags2
= 0;
214 vxlanHdr
->reserved1
= 0;
215 if (tunKey
->flags
| OVS_TNL_F_KEY
) {
216 vxlanHdr
->vxlanID
= VXLAN_TUNNELID_TO_VNI(tunKey
->tunnelId
);
217 vxlanHdr
->instanceID
= 1;
219 vxlanHdr
->reserved2
= 0;
221 return STATUS_SUCCESS
;
224 OvsCompleteNBL(switchContext
, *newNbl
, TRUE
);
231 *----------------------------------------------------------------------------
233 * Encapsulates the packet if L2/L3 for destination resolves. Otherwise,
234 * enqueues a callback that does encapsulatation after resolution.
235 *----------------------------------------------------------------------------
238 OvsEncapVxlan(PNET_BUFFER_LIST curNbl
,
239 OvsIPv4TunnelKey
*tunKey
,
240 POVS_SWITCH_CONTEXT switchContext
,
241 VOID
*completionList
,
242 POVS_PACKET_HDR_INFO layers
,
243 PNET_BUFFER_LIST
*newNbl
)
246 OVS_FWD_INFO fwdInfo
;
247 UNREFERENCED_PARAMETER(completionList
);
249 status
= OvsLookupIPFwdInfo(tunKey
->dst
, &fwdInfo
);
250 if (status
!= STATUS_SUCCESS
) {
251 OvsFwdIPHelperRequest(NULL
, 0, tunKey
, NULL
, NULL
, NULL
);
252 // return NDIS_STATUS_PENDING;
254 * XXX: Don't know if the completionList will make any sense when
255 * accessed in the callback. Make sure the caveats are known.
257 * XXX: This code will work once we are able to grab locks in the
260 return NDIS_STATUS_FAILURE
;
263 return OvsDoEncapVxlan(curNbl
, tunKey
, &fwdInfo
, layers
,
264 switchContext
, newNbl
);
269 *----------------------------------------------------------------------------
270 * OvsIpHlprCbVxlan --
271 * Callback function for IP helper.
272 * XXX: not used currently
273 *----------------------------------------------------------------------------
276 OvsIpHlprCbVxlan(PNET_BUFFER_LIST curNbl
,
278 OvsIPv4TunnelKey
*tunKey
,
282 POVS_FWD_INFO fwdInfo
)
284 OVS_PACKET_HDR_INFO layers
;
287 UNREFERENCED_PARAMETER(inPort
);
289 status
= OvsExtractFlow(curNbl
, inPort
, &key
, &layers
, NULL
);
290 if (result
== STATUS_SUCCESS
) {
291 status
= OvsDoEncapVxlan(curNbl
, tunKey
, fwdInfo
, &layers
,
292 (POVS_SWITCH_CONTEXT
)cbData1
, NULL
);
294 status
= NDIS_STATUS_FAILURE
;
297 if (status
!= NDIS_STATUS_SUCCESS
) {
298 // XXX: Free up the NBL;
302 OvsLookupFlowOutput((POVS_SWITCH_CONTEXT
)cbData1
, cbData2
, curNbl
);
306 *----------------------------------------------------------------------------
307 * OvsCalculateUDPChecksum
308 * Calculate UDP checksum
309 *----------------------------------------------------------------------------
311 static __inline NDIS_STATUS
312 OvsCalculateUDPChecksum(PNET_BUFFER_LIST curNbl
,
318 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo
;
321 csumInfo
.Value
= NET_BUFFER_LIST_INFO(curNbl
, TcpIpChecksumNetBufferListInfo
);
323 /* Next check if UDP checksum has been calculated. */
324 if (!csumInfo
.Receive
.UdpChecksumSucceeded
) {
327 checkSum
= udpHdr
->check
;
329 l4Payload
= packetLength
- sizeof(EthHdr
) - ipHdr
->ihl
* 4;
332 IPPseudoChecksum((UINT32
*)&ipHdr
->saddr
,
333 (UINT32
*)&ipHdr
->daddr
,
334 IPPROTO_UDP
, (UINT16
)l4Payload
);
335 udpHdr
->check
= CalculateChecksumNB(curNb
, (UINT16
)l4Payload
,
336 sizeof(EthHdr
) + ipHdr
->ihl
* 4);
337 if (checkSum
!= udpHdr
->check
) {
338 OVS_LOG_TRACE("UDP checksum incorrect.");
339 return NDIS_STATUS_INVALID_PACKET
;
343 csumInfo
.Receive
.UdpChecksumSucceeded
= 1;
344 NET_BUFFER_LIST_INFO(curNbl
, TcpIpChecksumNetBufferListInfo
) = csumInfo
.Value
;
345 return NDIS_STATUS_SUCCESS
;
349 *----------------------------------------------------------------------------
351 * Decapsulates to tunnel header in 'curNbl' and puts into 'tunKey'.
352 *----------------------------------------------------------------------------
355 OvsDoDecapVxlan(POVS_SWITCH_CONTEXT switchContext
,
356 PNET_BUFFER_LIST curNbl
,
357 OvsIPv4TunnelKey
*tunKey
,
358 PNET_BUFFER_LIST
*newNbl
)
366 UINT32 tunnelSize
= 0, packetLength
= 0;
370 /* Check the the length of the UDP payload */
371 curNb
= NET_BUFFER_LIST_FIRST_NB(curNbl
);
372 packetLength
= NET_BUFFER_DATA_LENGTH(curNb
);
373 tunnelSize
= OvsGetVxlanTunHdrSize();
374 if (packetLength
<= tunnelSize
) {
375 return NDIS_STATUS_INVALID_LENGTH
;
379 * Create a copy of the NBL so that we have all the headers in one MDL.
381 *newNbl
= OvsPartialCopyNBL(switchContext
, curNbl
,
382 tunnelSize
+ OVS_DEFAULT_COPY_SIZE
, 0,
383 TRUE
/*copy NBL info */);
385 if (*newNbl
== NULL
) {
386 return NDIS_STATUS_RESOURCES
;
389 /* XXX: Handle VLAN header. */
391 curNb
= NET_BUFFER_LIST_FIRST_NB(curNbl
);
392 curMdl
= NET_BUFFER_CURRENT_MDL(curNb
);
393 bufferStart
= (PUINT8
)MmGetSystemAddressForMdlSafe(curMdl
, LowPagePriority
) +
394 NET_BUFFER_CURRENT_MDL_OFFSET(curNb
);
396 status
= NDIS_STATUS_RESOURCES
;
400 ethHdr
= (EthHdr
*)bufferStart
;
401 /* XXX: Handle IP options. */
402 ipHdr
= (IPHdr
*)((PCHAR
)ethHdr
+ sizeof *ethHdr
);
403 tunKey
->src
= ipHdr
->saddr
;
404 tunKey
->dst
= ipHdr
->daddr
;
405 tunKey
->tos
= ipHdr
->tos
;
406 tunKey
->ttl
= ipHdr
->ttl
;
408 udpHdr
= (UDPHdr
*)((PCHAR
)ipHdr
+ sizeof *ipHdr
);
410 /* Validate if NIC has indicated checksum failure. */
411 status
= OvsValidateUDPChecksum(curNbl
, udpHdr
->check
== 0);
412 if (status
!= NDIS_STATUS_SUCCESS
) {
416 /* Calculate and verify UDP checksum if NIC didn't do it. */
417 if (udpHdr
->check
!= 0) {
418 status
= OvsCalculateUDPChecksum(curNbl
, curNb
, ipHdr
, udpHdr
, packetLength
);
419 if (status
!= NDIS_STATUS_SUCCESS
) {
424 vxlanHdr
= (VXLANHdr
*)((PCHAR
)udpHdr
+ sizeof *udpHdr
);
425 if (vxlanHdr
->instanceID
) {
426 tunKey
->flags
= OVS_TNL_F_KEY
;
427 tunKey
->tunnelId
= VXLAN_VNI_TO_TUNNELID(vxlanHdr
->vxlanID
);
430 tunKey
->tunnelId
= 0;
433 /* Clear out the receive flag for the inner packet. */
434 NET_BUFFER_LIST_INFO(curNbl
, TcpIpChecksumNetBufferListInfo
) = 0;
435 NdisAdvanceNetBufferDataStart(curNb
, tunnelSize
, FALSE
, NULL
);
436 return NDIS_STATUS_SUCCESS
;
439 OvsCompleteNBL(switchContext
, *newNbl
, TRUE
);
446 OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet
,
447 OvsIPv4TunnelKey
*tunnelKey
)
449 NDIS_STATUS status
= NDIS_STATUS_FAILURE
;
452 VXLANHdr
*VxlanHeader
;
453 VXLANHdr VxlanHeaderBuffer
;
454 struct IPHdr ip_storage
;
455 const struct IPHdr
*nh
;
456 OVS_PACKET_HDR_INFO layers
;
461 nh
= OvsGetIp(packet
, layers
.l3Offset
, &ip_storage
);
463 layers
.l4Offset
= layers
.l3Offset
+ nh
->ihl
* 4;
468 /* make sure it's a VXLAN packet */
469 udp
= OvsGetUdp(packet
, layers
.l4Offset
, &udpStorage
);
471 layers
.l7Offset
= layers
.l4Offset
+ sizeof *udp
;
476 /* XXX Should be tested against the dynamic port # in the VXLAN vport */
477 ASSERT(udp
->dest
== RtlUshortByteSwap(VXLAN_UDP_PORT
));
479 VxlanHeader
= (VXLANHdr
*)OvsGetPacketBytes(packet
,
480 sizeof(*VxlanHeader
),
485 tunnelKey
->src
= nh
->saddr
;
486 tunnelKey
->dst
= nh
->daddr
;
487 tunnelKey
->ttl
= nh
->ttl
;
488 tunnelKey
->tos
= nh
->tos
;
489 if (VxlanHeader
->instanceID
) {
490 tunnelKey
->flags
= OVS_TNL_F_KEY
;
491 tunnelKey
->tunnelId
= VXLAN_VNI_TO_TUNNELID(VxlanHeader
->vxlanID
);
493 tunnelKey
->flags
= 0;
494 tunnelKey
->tunnelId
= 0;
499 status
= NDIS_STATUS_SUCCESS
;
506 #pragma warning( pop )