]> git.proxmox.com Git - mirror_ovs.git/blob - datapath-windows/ovsext/Vxlan.c
datapath-windows: fix OVS_VPORT_TYPE
[mirror_ovs.git] / datapath-windows / ovsext / Vxlan.c
1 /*
2 * Copyright (c) 2014 VMware, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "precomp.h"
18 #include "NetProto.h"
19 #include "Switch.h"
20 #include "Vport.h"
21 #include "Flow.h"
22 #include "Vxlan.h"
23 #include "IpHelper.h"
24 #include "Checksum.h"
25 #include "User.h"
26 #include "PacketIO.h"
27 #include "Flow.h"
28 #include "PacketParser.h"
29 #include "Checksum.h"
30
31 #pragma warning( push )
32 #pragma warning( disable:4127 )
33
34
35 #ifdef OVS_DBG_MOD
36 #undef OVS_DBG_MOD
37 #endif
38 #define OVS_DBG_MOD OVS_DBG_VXLAN
39 #include "Debug.h"
40
41 /* Helper macro to check if a VXLAN ID is valid. */
42 #define VXLAN_ID_IS_VALID(vxlanID) (0 < (vxlanID) && (vxlanID) <= 0xffffff)
43 #define VXLAN_TUNNELID_TO_VNI(_tID) (UINT32)(((UINT64)(_tID)) >> 40)
44 #define VXLAN_VNI_TO_TUNNELID(_vni) (((UINT64)(_vni)) << 40)
45 #define IP_DF_NBO 0x0040
46 #define VXLAN_DEFAULT_TTL 64
47 #define VXLAN_MULTICAST_TTL 64
48 #define VXLAN_DEFAULT_INSTANCE_ID 1
49
50 /* Move to a header file */
51 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
52
53 NTSTATUS
54 OvsInitVxlanTunnel(POVS_VPORT_ENTRY vport,
55 POVS_VPORT_ADD_REQUEST addReq)
56 {
57 POVS_VXLAN_VPORT vxlanPort;
58 NTSTATUS status = STATUS_SUCCESS;
59
60 ASSERT(addReq->type == OVS_VPORT_TYPE_VXLAN);
61
62 vxlanPort = OvsAllocateMemory(sizeof (*vxlanPort));
63 if (vxlanPort == NULL) {
64 status = STATUS_INSUFFICIENT_RESOURCES;
65 } else {
66 RtlZeroMemory(vxlanPort, sizeof (*vxlanPort));
67 vxlanPort->dstPort = addReq->dstPort;
68 /*
69 * since we are installing the WFP filter before the port is created
70 * We need to check if it is the same number
71 * XXX should be removed later
72 */
73 ASSERT(vxlanPort->dstPort == VXLAN_UDP_PORT);
74 vport->priv = (PVOID)vxlanPort;
75 }
76 return status;
77 }
78
79
80 VOID
81 OvsCleanupVxlanTunnel(POVS_VPORT_ENTRY vport)
82 {
83 if (vport->ovsType != OVS_VPORT_TYPE_VXLAN ||
84 vport->priv == NULL) {
85 return;
86 }
87
88 OvsFreeMemory(vport->priv);
89 vport->priv = NULL;
90 }
91
92
93 /*
94 *----------------------------------------------------------------------------
95 * OvsDoEncapVxlan
96 * Encapsulates the packet.
97 *----------------------------------------------------------------------------
98 */
99 static __inline NDIS_STATUS
100 OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl,
101 OvsIPv4TunnelKey *tunKey,
102 POVS_FWD_INFO fwdInfo,
103 POVS_PACKET_HDR_INFO layers,
104 POVS_SWITCH_CONTEXT switchContext,
105 PNET_BUFFER_LIST *newNbl)
106 {
107 NDIS_STATUS status;
108 PNET_BUFFER curNb;
109 PMDL curMdl;
110 PUINT8 bufferStart;
111 EthHdr *ethHdr;
112 IPHdr *ipHdr;
113 UDPHdr *udpHdr;
114 VXLANHdr *vxlanHdr;
115 UINT32 headRoom = OvsGetVxlanTunHdrSize();
116 UINT32 packetLength;
117
118 /*
119 * XXX: the assumption currently is that the NBL is owned by OVS, and
120 * headroom has already been allocated as part of allocating the NBL and
121 * MDL.
122 */
123 curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
124 packetLength = NET_BUFFER_DATA_LENGTH(curNb);
125 if (layers->isTcp) {
126 NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
127
128 tsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
129 TcpLargeSendNetBufferListInfo);
130 OVS_LOG_TRACE("MSS %u packet len %u", tsoInfo.LsoV1Transmit.MSS, packetLength);
131 if (tsoInfo.LsoV1Transmit.MSS) {
132 OVS_LOG_TRACE("l4Offset %d", layers->l4Offset);
133 *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers,
134 tsoInfo.LsoV1Transmit.MSS, headRoom);
135 if (*newNbl == NULL) {
136 OVS_LOG_ERROR("Unable to segment NBL");
137 return NDIS_STATUS_FAILURE;
138 }
139 }
140 }
141 /* If we didn't split the packet above, make a copy now */
142 if (*newNbl == NULL) {
143 *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom,
144 FALSE /*NBL info*/);
145 if (*newNbl == NULL) {
146 OVS_LOG_ERROR("Unable to copy NBL");
147 return NDIS_STATUS_FAILURE;
148 }
149 }
150
151 curNbl = *newNbl;
152 for (curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); curNb != NULL;
153 curNb = curNb->Next) {
154 status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL);
155 if (status != NDIS_STATUS_SUCCESS) {
156 goto ret_error;
157 }
158
159 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
160 bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority);
161 if (!bufferStart) {
162 status = NDIS_STATUS_RESOURCES;
163 goto ret_error;
164 }
165
166 bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
167 if (NET_BUFFER_NEXT_NB(curNb)) {
168 OVS_LOG_TRACE("nb length %u next %u", NET_BUFFER_DATA_LENGTH(curNb),
169 NET_BUFFER_DATA_LENGTH(curNb->Next));
170 }
171
172 /* L2 header */
173 ethHdr = (EthHdr *)bufferStart;
174 NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr,
175 sizeof ethHdr->Destination + sizeof ethHdr->Source);
176 ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) ==
177 (PCHAR)&fwdInfo->srcMacAddr);
178 ethHdr->Type = htons(ETH_TYPE_IPV4);
179
180 // XXX: question: there are fields in the OvsIPv4TunnelKey for ttl and such,
181 // should we use those values instead? or will they end up being
182 // uninitialized;
183 /* IP header */
184 ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
185
186 ipHdr->ihl = sizeof *ipHdr / 4;
187 ipHdr->version = IPV4;
188 ipHdr->tos = 0;
189 ipHdr->tot_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr);
190 ipHdr->id = 0;
191 ipHdr->frag_off = IP_DF_NBO;
192 ipHdr->ttl = tunKey->ttl ? tunKey->ttl : VXLAN_DEFAULT_TTL;
193 ipHdr->protocol = IPPROTO_UDP;
194 ASSERT(tunKey->dst == fwdInfo->dstIpAddr);
195 ASSERT(tunKey->src == fwdInfo->srcIpAddr || tunKey->src == 0);
196 ipHdr->saddr = fwdInfo->srcIpAddr;
197 ipHdr->daddr = fwdInfo->dstIpAddr;
198 ipHdr->check = 0;
199 ipHdr->check = IPChecksum((UINT8 *)ipHdr, sizeof *ipHdr, 0);
200
201 /* UDP header */
202 udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
203 udpHdr->source = htons(tunKey->flow_hash | 32768);
204 udpHdr->dest = VXLAN_UDP_PORT_NBO;
205 udpHdr->len = htons(NET_BUFFER_DATA_LENGTH(curNb) - headRoom +
206 sizeof *udpHdr + sizeof *vxlanHdr);
207 udpHdr->check = 0;
208
209 /* VXLAN header */
210 vxlanHdr = (VXLANHdr *)((PCHAR)udpHdr + sizeof *udpHdr);
211 vxlanHdr->flags1 = 0;
212 vxlanHdr->locallyReplicate = 0;
213 vxlanHdr->flags2 = 0;
214 vxlanHdr->reserved1 = 0;
215 if (tunKey->flags | OVS_TNL_F_KEY) {
216 vxlanHdr->vxlanID = VXLAN_TUNNELID_TO_VNI(tunKey->tunnelId);
217 vxlanHdr->instanceID = 1;
218 }
219 vxlanHdr->reserved2 = 0;
220 }
221 return STATUS_SUCCESS;
222
223 ret_error:
224 OvsCompleteNBL(switchContext, *newNbl, TRUE);
225 *newNbl = NULL;
226 return status;
227 }
228
229
230 /*
231 *----------------------------------------------------------------------------
232 * OvsEncapVxlan --
233 * Encapsulates the packet if L2/L3 for destination resolves. Otherwise,
234 * enqueues a callback that does encapsulatation after resolution.
235 *----------------------------------------------------------------------------
236 */
237 NDIS_STATUS
238 OvsEncapVxlan(PNET_BUFFER_LIST curNbl,
239 OvsIPv4TunnelKey *tunKey,
240 POVS_SWITCH_CONTEXT switchContext,
241 VOID *completionList,
242 POVS_PACKET_HDR_INFO layers,
243 PNET_BUFFER_LIST *newNbl)
244 {
245 NTSTATUS status;
246 OVS_FWD_INFO fwdInfo;
247 UNREFERENCED_PARAMETER(completionList);
248
249 status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo);
250 if (status != STATUS_SUCCESS) {
251 OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL);
252 // return NDIS_STATUS_PENDING;
253 /*
254 * XXX: Don't know if the completionList will make any sense when
255 * accessed in the callback. Make sure the caveats are known.
256 *
257 * XXX: This code will work once we are able to grab locks in the
258 * callback.
259 */
260 return NDIS_STATUS_FAILURE;
261 }
262
263 return OvsDoEncapVxlan(curNbl, tunKey, &fwdInfo, layers,
264 switchContext, newNbl);
265 }
266
267
268 /*
269 *----------------------------------------------------------------------------
270 * OvsIpHlprCbVxlan --
271 * Callback function for IP helper.
272 * XXX: not used currently
273 *----------------------------------------------------------------------------
274 */
275 static VOID
276 OvsIpHlprCbVxlan(PNET_BUFFER_LIST curNbl,
277 UINT32 inPort,
278 OvsIPv4TunnelKey *tunKey,
279 PVOID cbData1,
280 PVOID cbData2,
281 NTSTATUS result,
282 POVS_FWD_INFO fwdInfo)
283 {
284 OVS_PACKET_HDR_INFO layers;
285 OvsFlowKey key;
286 NDIS_STATUS status;
287 UNREFERENCED_PARAMETER(inPort);
288
289 status = OvsExtractFlow(curNbl, inPort, &key, &layers, NULL);
290 if (result == STATUS_SUCCESS) {
291 status = OvsDoEncapVxlan(curNbl, tunKey, fwdInfo, &layers,
292 (POVS_SWITCH_CONTEXT)cbData1, NULL);
293 } else {
294 status = NDIS_STATUS_FAILURE;
295 }
296
297 if (status != NDIS_STATUS_SUCCESS) {
298 // XXX: Free up the NBL;
299 return;
300 }
301
302 OvsLookupFlowOutput((POVS_SWITCH_CONTEXT)cbData1, cbData2, curNbl);
303 }
304
305 /*
306 *----------------------------------------------------------------------------
307 * OvsCalculateUDPChecksum
308 * Calculate UDP checksum
309 *----------------------------------------------------------------------------
310 */
311 static __inline NDIS_STATUS
312 OvsCalculateUDPChecksum(PNET_BUFFER_LIST curNbl,
313 PNET_BUFFER curNb,
314 IPHdr *ipHdr,
315 UDPHdr *udpHdr,
316 UINT32 packetLength)
317 {
318 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
319 UINT16 checkSum;
320
321 csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo);
322
323 /* Next check if UDP checksum has been calculated. */
324 if (!csumInfo.Receive.UdpChecksumSucceeded) {
325 UINT32 l4Payload;
326
327 checkSum = udpHdr->check;
328
329 l4Payload = packetLength - sizeof(EthHdr) - ipHdr->ihl * 4;
330 udpHdr->check = 0;
331 udpHdr->check =
332 IPPseudoChecksum((UINT32 *)&ipHdr->saddr,
333 (UINT32 *)&ipHdr->daddr,
334 IPPROTO_UDP, (UINT16)l4Payload);
335 udpHdr->check = CalculateChecksumNB(curNb, (UINT16)l4Payload,
336 sizeof(EthHdr) + ipHdr->ihl * 4);
337 if (checkSum != udpHdr->check) {
338 OVS_LOG_TRACE("UDP checksum incorrect.");
339 return NDIS_STATUS_INVALID_PACKET;
340 }
341 }
342
343 csumInfo.Receive.UdpChecksumSucceeded = 1;
344 NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
345 return NDIS_STATUS_SUCCESS;
346 }
347
348 /*
349 *----------------------------------------------------------------------------
350 * OvsDoDecapVxlan
351 * Decapsulates to tunnel header in 'curNbl' and puts into 'tunKey'.
352 *----------------------------------------------------------------------------
353 */
354 NDIS_STATUS
355 OvsDoDecapVxlan(POVS_SWITCH_CONTEXT switchContext,
356 PNET_BUFFER_LIST curNbl,
357 OvsIPv4TunnelKey *tunKey,
358 PNET_BUFFER_LIST *newNbl)
359 {
360 PNET_BUFFER curNb;
361 PMDL curMdl;
362 EthHdr *ethHdr;
363 IPHdr *ipHdr;
364 UDPHdr *udpHdr;
365 VXLANHdr *vxlanHdr;
366 UINT32 tunnelSize = 0, packetLength = 0;
367 PUINT8 bufferStart;
368 NDIS_STATUS status;
369
370 /* Check the the length of the UDP payload */
371 curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
372 packetLength = NET_BUFFER_DATA_LENGTH(curNb);
373 tunnelSize = OvsGetVxlanTunHdrSize();
374 if (packetLength <= tunnelSize) {
375 return NDIS_STATUS_INVALID_LENGTH;
376 }
377
378 /*
379 * Create a copy of the NBL so that we have all the headers in one MDL.
380 */
381 *newNbl = OvsPartialCopyNBL(switchContext, curNbl,
382 tunnelSize + OVS_DEFAULT_COPY_SIZE, 0,
383 TRUE /*copy NBL info */);
384
385 if (*newNbl == NULL) {
386 return NDIS_STATUS_RESOURCES;
387 }
388
389 /* XXX: Handle VLAN header. */
390 curNbl = *newNbl;
391 curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
392 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
393 bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority) +
394 NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
395 if (!bufferStart) {
396 status = NDIS_STATUS_RESOURCES;
397 goto dropNbl;
398 }
399
400 ethHdr = (EthHdr *)bufferStart;
401 /* XXX: Handle IP options. */
402 ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
403 tunKey->src = ipHdr->saddr;
404 tunKey->dst = ipHdr->daddr;
405 tunKey->tos = ipHdr->tos;
406 tunKey->ttl = ipHdr->ttl;
407 tunKey->pad = 0;
408 udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
409
410 /* Validate if NIC has indicated checksum failure. */
411 status = OvsValidateUDPChecksum(curNbl, udpHdr->check == 0);
412 if (status != NDIS_STATUS_SUCCESS) {
413 goto dropNbl;
414 }
415
416 /* Calculate and verify UDP checksum if NIC didn't do it. */
417 if (udpHdr->check != 0) {
418 status = OvsCalculateUDPChecksum(curNbl, curNb, ipHdr, udpHdr, packetLength);
419 if (status != NDIS_STATUS_SUCCESS) {
420 goto dropNbl;
421 }
422 }
423
424 vxlanHdr = (VXLANHdr *)((PCHAR)udpHdr + sizeof *udpHdr);
425 if (vxlanHdr->instanceID) {
426 tunKey->flags = OVS_TNL_F_KEY;
427 tunKey->tunnelId = VXLAN_VNI_TO_TUNNELID(vxlanHdr->vxlanID);
428 } else {
429 tunKey->flags = 0;
430 tunKey->tunnelId = 0;
431 }
432
433 /* Clear out the receive flag for the inner packet. */
434 NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0;
435 NdisAdvanceNetBufferDataStart(curNb, tunnelSize, FALSE, NULL);
436 return NDIS_STATUS_SUCCESS;
437
438 dropNbl:
439 OvsCompleteNBL(switchContext, *newNbl, TRUE);
440 *newNbl = NULL;
441 return status;
442 }
443
444
445 NDIS_STATUS
446 OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet,
447 OvsIPv4TunnelKey *tunnelKey)
448 {
449 NDIS_STATUS status = NDIS_STATUS_FAILURE;
450 UDPHdr udpStorage;
451 const UDPHdr *udp;
452 VXLANHdr *VxlanHeader;
453 VXLANHdr VxlanHeaderBuffer;
454 struct IPHdr ip_storage;
455 const struct IPHdr *nh;
456 OVS_PACKET_HDR_INFO layers;
457
458 layers.value = 0;
459
460 do {
461 nh = OvsGetIp(packet, layers.l3Offset, &ip_storage);
462 if (nh) {
463 layers.l4Offset = layers.l3Offset + nh->ihl * 4;
464 } else {
465 break;
466 }
467
468 /* make sure it's a VXLAN packet */
469 udp = OvsGetUdp(packet, layers.l4Offset, &udpStorage);
470 if (udp) {
471 layers.l7Offset = layers.l4Offset + sizeof *udp;
472 } else {
473 break;
474 }
475
476 /* XXX Should be tested against the dynamic port # in the VXLAN vport */
477 ASSERT(udp->dest == RtlUshortByteSwap(VXLAN_UDP_PORT));
478
479 VxlanHeader = (VXLANHdr *)OvsGetPacketBytes(packet,
480 sizeof(*VxlanHeader),
481 layers.l7Offset,
482 &VxlanHeaderBuffer);
483
484 if (VxlanHeader) {
485 tunnelKey->src = nh->saddr;
486 tunnelKey->dst = nh->daddr;
487 tunnelKey->ttl = nh->ttl;
488 tunnelKey->tos = nh->tos;
489 if (VxlanHeader->instanceID) {
490 tunnelKey->flags = OVS_TNL_F_KEY;
491 tunnelKey->tunnelId = VXLAN_VNI_TO_TUNNELID(VxlanHeader->vxlanID);
492 } else {
493 tunnelKey->flags = 0;
494 tunnelKey->tunnelId = 0;
495 }
496 } else {
497 break;
498 }
499 status = NDIS_STATUS_SUCCESS;
500
501 } while(FALSE);
502
503 return status;
504 }
505
506 #pragma warning( pop )