]> git.proxmox.com Git - mirror_ovs.git/blob - datapath-windows/ovsext/Geneve.c
210716d5aba33676e8221f957285cc8ca8a463f1
[mirror_ovs.git] / datapath-windows / ovsext / Geneve.c
1 /*
2 * Copyright (c) 2016 VMware, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "precomp.h"
18
19 #include "Atomic.h"
20 #include "Debug.h"
21 #include "Flow.h"
22 #include "IpHelper.h"
23 #include "Jhash.h"
24 #include "NetProto.h"
25 #include "Offload.h"
26 #include "PacketIO.h"
27 #include "PacketParser.h"
28 #include "Geneve.h"
29 #include "Switch.h"
30 #include "User.h"
31 #include "Util.h"
32 #include "Vport.h"
33
34 #ifdef OVS_DBG_MOD
35 #undef OVS_DBG_MOD
36 #endif
37 #define OVS_DBG_MOD OVS_DBG_GENEVE
38
39
40 NTSTATUS OvsInitGeneveTunnel(POVS_VPORT_ENTRY vport,
41 UINT16 udpDestPort)
42 {
43 POVS_GENEVE_VPORT genevePort;
44
45 genevePort = (POVS_GENEVE_VPORT)
46 OvsAllocateMemoryWithTag(sizeof(*genevePort), OVS_GENEVE_POOL_TAG);
47 if (!genevePort) {
48 OVS_LOG_ERROR("Insufficient memory, can't allocate GENEVE_VPORT");
49 return STATUS_INSUFFICIENT_RESOURCES;
50 }
51
52 RtlZeroMemory(genevePort, sizeof(*genevePort));
53 genevePort->dstPort = udpDestPort;
54 vport->priv = (PVOID) genevePort;
55 return STATUS_SUCCESS;
56 }
57
58 VOID
59 OvsCleanupGeneveTunnel(POVS_VPORT_ENTRY vport)
60 {
61 if (vport->ovsType != OVS_VPORT_TYPE_GENEVE ||
62 vport->priv == NULL) {
63 return;
64 }
65
66 OvsFreeMemoryWithTag(vport->priv, OVS_GENEVE_POOL_TAG);
67 vport->priv = NULL;
68 }
69
70 NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport,
71 PNET_BUFFER_LIST curNbl,
72 OvsIPv4TunnelKey *tunKey,
73 POVS_SWITCH_CONTEXT switchContext,
74 POVS_PACKET_HDR_INFO layers,
75 PNET_BUFFER_LIST *newNbl,
76 POVS_FWD_INFO switchFwdInfo)
77 {
78 NTSTATUS status;
79 OVS_FWD_INFO fwdInfo;
80 PNET_BUFFER curNb;
81 PMDL curMdl;
82 PUINT8 bufferStart;
83 EthHdr *ethHdr;
84 IPHdr *ipHdr;
85 UDPHdr *udpHdr;
86 GeneveHdr *geneveHdr;
87 GeneveOptionHdr *optHdr;
88 POVS_GENEVE_VPORT vportGeneve;
89 UINT32 headRoom = OvsGetGeneveTunHdrMinSize() + tunKey->tunOptLen;
90 UINT32 packetLength;
91 ULONG mss = 0;
92 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
93
94 status = OvsLookupIPFwdInfo(tunKey->src, tunKey->dst, &fwdInfo);
95 if (status != STATUS_SUCCESS) {
96 OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL);
97 // return NDIS_STATUS_PENDING;
98 /*
99 * XXX: Don't know if the completionList will make any sense when
100 * accessed in the callback. Make sure the caveats are known.
101 *
102 * XXX: This code will work once we are able to grab locks in the
103 * callback.
104 */
105 return NDIS_STATUS_FAILURE;
106 }
107
108 RtlCopyMemory(switchFwdInfo->value, fwdInfo.value, sizeof fwdInfo.value);
109
110 curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
111 packetLength = NET_BUFFER_DATA_LENGTH(curNb);
112
113 if (layers->isTcp) {
114 mss = OVSGetTcpMSS(curNbl);
115
116 OVS_LOG_TRACE("MSS %u packet len %u", mss,
117 packetLength);
118 if (mss) {
119 OVS_LOG_TRACE("l4Offset %d", layers->l4Offset);
120 *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers,
121 mss, headRoom, FALSE);
122 if (*newNbl == NULL) {
123 OVS_LOG_ERROR("Unable to segment NBL");
124 return NDIS_STATUS_FAILURE;
125 }
126 /* Clear out LSO flags after this point */
127 NET_BUFFER_LIST_INFO(*newNbl, TcpLargeSendNetBufferListInfo) = 0;
128 }
129 }
130
131 vportGeneve = (POVS_GENEVE_VPORT) GetOvsVportPriv(vport);
132 ASSERT(vportGeneve != NULL);
133
134 /* If we didn't split the packet above, make a copy now */
135 if (*newNbl == NULL) {
136 *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom,
137 FALSE /*NBL info*/);
138 if (*newNbl == NULL) {
139 OVS_LOG_ERROR("Unable to copy NBL");
140 return NDIS_STATUS_FAILURE;
141 }
142 csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
143 TcpIpChecksumNetBufferListInfo);
144 status = OvsApplySWChecksumOnNB(layers, *newNbl, &csumInfo);
145
146 if (status != NDIS_STATUS_SUCCESS) {
147 goto ret_error;
148 }
149 }
150
151 curNbl = *newNbl;
152 for (curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); curNb != NULL;
153 curNb = curNb->Next) {
154 status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL);
155 if (status != NDIS_STATUS_SUCCESS) {
156 goto ret_error;
157 }
158
159 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
160 bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl,
161 LowPagePriority);
162 if (!bufferStart) {
163 status = NDIS_STATUS_RESOURCES;
164 goto ret_error;
165 }
166
167 bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
168 if (NET_BUFFER_NEXT_NB(curNb)) {
169 OVS_LOG_TRACE("nb length %u next %u",
170 NET_BUFFER_DATA_LENGTH(curNb),
171 NET_BUFFER_DATA_LENGTH(curNb->Next));
172 }
173
174 /* L2 header */
175 ethHdr = (EthHdr *)bufferStart;
176 NdisMoveMemory(ethHdr->Destination, fwdInfo.dstMacAddr,
177 sizeof ethHdr->Destination);
178 NdisMoveMemory(ethHdr->Source, fwdInfo.srcMacAddr,
179 sizeof ethHdr->Source);
180 ethHdr->Type = htons(ETH_TYPE_IPV4);
181
182 /* IP header */
183 ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
184
185 ipHdr->ihl = sizeof *ipHdr / 4;
186 ipHdr->version = IPPROTO_IPV4;
187 ipHdr->tos = tunKey->tos;
188 ipHdr->tot_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr);
189 ipHdr->id = (uint16)atomic_add64(&vportGeneve->ipId,
190 NET_BUFFER_DATA_LENGTH(curNb));
191 ipHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ?
192 IP_DF_NBO : 0;
193 ipHdr->ttl = tunKey->ttl ? tunKey->ttl : GENEVE_DEFAULT_TTL;
194 ipHdr->protocol = IPPROTO_UDP;
195 ASSERT(tunKey->dst == fwdInfo.dstIpAddr);
196 ASSERT(tunKey->src == fwdInfo.srcIpAddr || tunKey->src == 0);
197 ipHdr->saddr = fwdInfo.srcIpAddr;
198 ipHdr->daddr = fwdInfo.dstIpAddr;
199 ipHdr->check = 0;
200
201 /* UDP header */
202 udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
203 udpHdr->source = htons(tunKey->flow_hash | MAXINT16);
204 udpHdr->dest = tunKey->dst_port ? tunKey->dst_port :
205 htons(vportGeneve->dstPort);
206 udpHdr->len = htons(NET_BUFFER_DATA_LENGTH(curNb) - headRoom +
207 sizeof *udpHdr + sizeof *geneveHdr +
208 tunKey->tunOptLen);
209 if (tunKey->flags & OVS_TNL_F_CSUM) {
210 UINT16 udpChksumLen = (UINT16) NET_BUFFER_DATA_LENGTH(curNb) -
211 sizeof *ipHdr - sizeof *ethHdr;
212 udpHdr->check = IPPseudoChecksum(&ipHdr->saddr, &ipHdr->daddr,
213 IPPROTO_UDP, udpChksumLen);
214 } else {
215 udpHdr->check = 0;
216 }
217 /* Geneve header */
218 geneveHdr = (GeneveHdr *)((PCHAR)udpHdr + sizeof *udpHdr);
219 geneveHdr->version = GENEVE_VER;
220 geneveHdr->optLen = tunKey->tunOptLen / 4;
221 geneveHdr->oam = !!(tunKey->flags & OVS_TNL_F_OAM);
222 geneveHdr->critical = !!(tunKey->flags & OVS_TNL_F_CRT_OPT);
223 geneveHdr->reserved1 = 0;
224 geneveHdr->protocol = ETH_P_TEB_NBO;
225 geneveHdr->vni = GENEVE_TUNNELID_TO_VNI(tunKey->tunnelId);
226 geneveHdr->reserved2 = 0;
227
228 /* Geneve header options */
229 optHdr = (GeneveOptionHdr *)(geneveHdr + 1);
230 memcpy(optHdr, TunnelKeyGetOptions(tunKey), tunKey->tunOptLen);
231
232 csumInfo.Value = 0;
233 csumInfo.Transmit.IpHeaderChecksum = 1;
234 csumInfo.Transmit.IsIPv4 = 1;
235 if (tunKey->flags & OVS_TNL_F_CSUM) {
236 csumInfo.Transmit.UdpChecksum = 1;
237 }
238 NET_BUFFER_LIST_INFO(curNbl,
239 TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
240 }
241 return STATUS_SUCCESS;
242
243 ret_error:
244 OvsCompleteNBL(switchContext, *newNbl, TRUE);
245 *newNbl = NULL;
246 return status;
247 }
248
249 NDIS_STATUS OvsDecapGeneve(POVS_SWITCH_CONTEXT switchContext,
250 PNET_BUFFER_LIST curNbl,
251 OvsIPv4TunnelKey *tunKey,
252 PNET_BUFFER_LIST *newNbl)
253 {
254 PNET_BUFFER curNb;
255 PMDL curMdl;
256 EthHdr *ethHdr;
257 IPHdr *ipHdr;
258 UDPHdr *udpHdr;
259 GeneveHdr *geneveHdr;
260 UINT32 tunnelSize;
261 UINT32 packetLength;
262 PUINT8 bufferStart;
263 PVOID optStart;
264 NDIS_STATUS status;
265 OVS_PACKET_HDR_INFO layers = { 0 };
266
267 status = OvsExtractLayers(curNbl, &layers);
268 if (status != NDIS_STATUS_SUCCESS) {
269 return status;
270 }
271
272 /* Check the length of the UDP payload */
273 curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
274 tunnelSize = OvsGetGeneveTunHdrSizeFromLayers(&layers);
275 packetLength = NET_BUFFER_DATA_LENGTH(curNb);
276 if (packetLength <= tunnelSize) {
277 return NDIS_STATUS_INVALID_LENGTH;
278 }
279
280 /*
281 * Create a copy of the NBL so that we have all the headers in one MDL.
282 */
283 *newNbl = OvsPartialCopyNBL(switchContext, curNbl,
284 tunnelSize, 0,
285 TRUE /*copy NBL info */);
286
287 if (*newNbl == NULL) {
288 return NDIS_STATUS_RESOURCES;
289 }
290
291 /* XXX: Handle VLAN header. */
292 curNbl = *newNbl;
293 curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
294 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
295 bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority)
296 + NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
297 if (!bufferStart) {
298 status = NDIS_STATUS_RESOURCES;
299 goto dropNbl;
300 }
301
302 ethHdr = (EthHdr *)bufferStart;
303 /* XXX: Handle IP options. */
304 ipHdr = (IPHdr *)(bufferStart + layers.l3Offset);
305 tunKey->src = ipHdr->saddr;
306 tunKey->dst = ipHdr->daddr;
307 tunKey->tos = ipHdr->tos;
308 tunKey->ttl = ipHdr->ttl;
309 tunKey->pad = 0;
310 udpHdr = (UDPHdr *)(bufferStart + layers.l4Offset);
311
312 /* Validate if NIC has indicated checksum failure. */
313 status = OvsValidateUDPChecksum(curNbl, udpHdr->check == 0);
314 if (status != NDIS_STATUS_SUCCESS) {
315 goto dropNbl;
316 }
317
318 /* Calculate and verify UDP checksum if NIC didn't do it. */
319 if (udpHdr->check != 0) {
320 status = OvsCalculateUDPChecksum(curNbl, curNb, ipHdr, udpHdr,
321 packetLength, &layers);
322 tunKey->flags |= OVS_TNL_F_CSUM;
323 if (status != NDIS_STATUS_SUCCESS) {
324 goto dropNbl;
325 }
326 }
327
328 geneveHdr = (GeneveHdr *)((PCHAR)udpHdr + sizeof *udpHdr);
329 if (geneveHdr->protocol != ETH_P_TEB_NBO) {
330 status = STATUS_NDIS_INVALID_PACKET;
331 goto dropNbl;
332 }
333 /* Update tunnelKey flags. */
334 tunKey->flags = OVS_TNL_F_KEY | (geneveHdr->oam ? OVS_TNL_F_OAM : 0) |
335 (geneveHdr->critical ? OVS_TNL_F_CRT_OPT : 0);
336
337 tunKey->tunnelId = GENEVE_VNI_TO_TUNNELID(geneveHdr->vni);
338 tunKey->tunOptLen = (uint8)geneveHdr->optLen * 4;
339 if (tunKey->tunOptLen > TUN_OPT_MAX_LEN ||
340 packetLength < tunnelSize + tunKey->tunOptLen) {
341 status = NDIS_STATUS_INVALID_LENGTH;
342 goto dropNbl;
343 }
344 /* Clear out the receive flag for the inner packet. */
345 NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0;
346
347 NdisAdvanceNetBufferDataStart(curNb, tunnelSize, FALSE, NULL);
348 if (tunKey->tunOptLen > 0) {
349 optStart = NdisGetDataBuffer(curNb, tunKey->tunOptLen,
350 TunnelKeyGetOptions(tunKey), 1, 0);
351
352 /* If data is contiguous in the buffer, NdisGetDataBuffer will not copy
353 data to the storage. Manual copy is needed. */
354 if (optStart != TunnelKeyGetOptions(tunKey)) {
355 memcpy(TunnelKeyGetOptions(tunKey), optStart, tunKey->tunOptLen);
356 }
357 NdisAdvanceNetBufferDataStart(curNb, tunKey->tunOptLen, FALSE, NULL);
358 tunKey->flags |= OVS_TNL_F_GENEVE_OPT;
359 }
360
361 return NDIS_STATUS_SUCCESS;
362
363 dropNbl:
364 OvsCompleteNBL(switchContext, *newNbl, TRUE);
365 *newNbl = NULL;
366 return status;
367 }