2 * Copyright (c) 2016 VMware, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
27 #include "PacketParser.h"
37 #define OVS_DBG_MOD OVS_DBG_GENEVE
40 NTSTATUS
OvsInitGeneveTunnel(POVS_VPORT_ENTRY vport
,
43 POVS_GENEVE_VPORT genevePort
;
45 genevePort
= (POVS_GENEVE_VPORT
)
46 OvsAllocateMemoryWithTag(sizeof(*genevePort
), OVS_GENEVE_POOL_TAG
);
48 OVS_LOG_ERROR("Insufficient memory, can't allocate GENEVE_VPORT");
49 return STATUS_INSUFFICIENT_RESOURCES
;
52 RtlZeroMemory(genevePort
, sizeof(*genevePort
));
53 genevePort
->dstPort
= udpDestPort
;
54 vport
->priv
= (PVOID
) genevePort
;
55 return STATUS_SUCCESS
;
59 OvsCleanupGeneveTunnel(POVS_VPORT_ENTRY vport
)
61 if (vport
->ovsType
!= OVS_VPORT_TYPE_GENEVE
||
62 vport
->priv
== NULL
) {
66 OvsFreeMemoryWithTag(vport
->priv
, OVS_GENEVE_POOL_TAG
);
70 NDIS_STATUS
OvsEncapGeneve(POVS_VPORT_ENTRY vport
,
71 PNET_BUFFER_LIST curNbl
,
72 OvsIPv4TunnelKey
*tunKey
,
73 POVS_SWITCH_CONTEXT switchContext
,
74 POVS_PACKET_HDR_INFO layers
,
75 PNET_BUFFER_LIST
*newNbl
,
76 POVS_FWD_INFO switchFwdInfo
)
87 GeneveOptionHdr
*optHdr
;
88 POVS_GENEVE_VPORT vportGeneve
;
89 UINT32 headRoom
= OvsGetGeneveTunHdrMinSize() + tunKey
->tunOptLen
;
92 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo
;
94 status
= OvsLookupIPFwdInfo(tunKey
->src
, tunKey
->dst
, &fwdInfo
);
95 if (status
!= STATUS_SUCCESS
) {
96 OvsFwdIPHelperRequest(NULL
, 0, tunKey
, NULL
, NULL
, NULL
);
97 // return NDIS_STATUS_PENDING;
99 * XXX: Don't know if the completionList will make any sense when
100 * accessed in the callback. Make sure the caveats are known.
102 * XXX: This code will work once we are able to grab locks in the
105 return NDIS_STATUS_FAILURE
;
108 RtlCopyMemory(switchFwdInfo
->value
, fwdInfo
.value
, sizeof fwdInfo
.value
);
110 curNb
= NET_BUFFER_LIST_FIRST_NB(curNbl
);
111 packetLength
= NET_BUFFER_DATA_LENGTH(curNb
);
114 mss
= OVSGetTcpMSS(curNbl
);
116 OVS_LOG_TRACE("MSS %u packet len %u", mss
,
119 OVS_LOG_TRACE("l4Offset %d", layers
->l4Offset
);
120 *newNbl
= OvsTcpSegmentNBL(switchContext
, curNbl
, layers
,
121 mss
, headRoom
, FALSE
);
122 if (*newNbl
== NULL
) {
123 OVS_LOG_ERROR("Unable to segment NBL");
124 return NDIS_STATUS_FAILURE
;
126 /* Clear out LSO flags after this point */
127 NET_BUFFER_LIST_INFO(*newNbl
, TcpLargeSendNetBufferListInfo
) = 0;
131 vportGeneve
= (POVS_GENEVE_VPORT
) GetOvsVportPriv(vport
);
132 ASSERT(vportGeneve
!= NULL
);
134 /* If we didn't split the packet above, make a copy now */
135 if (*newNbl
== NULL
) {
136 *newNbl
= OvsPartialCopyNBL(switchContext
, curNbl
, 0, headRoom
,
138 if (*newNbl
== NULL
) {
139 OVS_LOG_ERROR("Unable to copy NBL");
140 return NDIS_STATUS_FAILURE
;
142 csumInfo
.Value
= NET_BUFFER_LIST_INFO(curNbl
,
143 TcpIpChecksumNetBufferListInfo
);
144 status
= OvsApplySWChecksumOnNB(layers
, *newNbl
, &csumInfo
);
146 if (status
!= NDIS_STATUS_SUCCESS
) {
152 for (curNb
= NET_BUFFER_LIST_FIRST_NB(curNbl
); curNb
!= NULL
;
153 curNb
= curNb
->Next
) {
154 status
= NdisRetreatNetBufferDataStart(curNb
, headRoom
, 0, NULL
);
155 if (status
!= NDIS_STATUS_SUCCESS
) {
159 curMdl
= NET_BUFFER_CURRENT_MDL(curNb
);
160 bufferStart
= (PUINT8
)MmGetSystemAddressForMdlSafe(curMdl
,
163 status
= NDIS_STATUS_RESOURCES
;
167 bufferStart
+= NET_BUFFER_CURRENT_MDL_OFFSET(curNb
);
168 if (NET_BUFFER_NEXT_NB(curNb
)) {
169 OVS_LOG_TRACE("nb length %u next %u",
170 NET_BUFFER_DATA_LENGTH(curNb
),
171 NET_BUFFER_DATA_LENGTH(curNb
->Next
));
175 ethHdr
= (EthHdr
*)bufferStart
;
176 NdisMoveMemory(ethHdr
->Destination
, fwdInfo
.dstMacAddr
,
177 sizeof ethHdr
->Destination
);
178 NdisMoveMemory(ethHdr
->Source
, fwdInfo
.srcMacAddr
,
179 sizeof ethHdr
->Source
);
180 ethHdr
->Type
= htons(ETH_TYPE_IPV4
);
183 ipHdr
= (IPHdr
*)((PCHAR
)ethHdr
+ sizeof *ethHdr
);
185 ipHdr
->ihl
= sizeof *ipHdr
/ 4;
186 ipHdr
->version
= IPPROTO_IPV4
;
187 ipHdr
->tos
= tunKey
->tos
;
188 ipHdr
->tot_len
= htons(NET_BUFFER_DATA_LENGTH(curNb
) - sizeof *ethHdr
);
189 ipHdr
->id
= (uint16
)atomic_add64(&vportGeneve
->ipId
,
190 NET_BUFFER_DATA_LENGTH(curNb
));
191 ipHdr
->frag_off
= (tunKey
->flags
& OVS_TNL_F_DONT_FRAGMENT
) ?
193 ipHdr
->ttl
= tunKey
->ttl
? tunKey
->ttl
: GENEVE_DEFAULT_TTL
;
194 ipHdr
->protocol
= IPPROTO_UDP
;
195 ASSERT(tunKey
->dst
== fwdInfo
.dstIpAddr
);
196 ASSERT(tunKey
->src
== fwdInfo
.srcIpAddr
|| tunKey
->src
== 0);
197 ipHdr
->saddr
= fwdInfo
.srcIpAddr
;
198 ipHdr
->daddr
= fwdInfo
.dstIpAddr
;
202 udpHdr
= (UDPHdr
*)((PCHAR
)ipHdr
+ sizeof *ipHdr
);
203 udpHdr
->source
= htons(tunKey
->flow_hash
| MAXINT16
);
204 udpHdr
->dest
= tunKey
->dst_port
? tunKey
->dst_port
:
205 htons(vportGeneve
->dstPort
);
206 udpHdr
->len
= htons(NET_BUFFER_DATA_LENGTH(curNb
) - headRoom
+
207 sizeof *udpHdr
+ sizeof *geneveHdr
+
209 if (tunKey
->flags
& OVS_TNL_F_CSUM
) {
210 UINT16 udpChksumLen
= (UINT16
) NET_BUFFER_DATA_LENGTH(curNb
) -
211 sizeof *ipHdr
- sizeof *ethHdr
;
212 udpHdr
->check
= IPPseudoChecksum(&ipHdr
->saddr
, &ipHdr
->daddr
,
213 IPPROTO_UDP
, udpChksumLen
);
218 geneveHdr
= (GeneveHdr
*)((PCHAR
)udpHdr
+ sizeof *udpHdr
);
219 geneveHdr
->version
= GENEVE_VER
;
220 geneveHdr
->optLen
= tunKey
->tunOptLen
/ 4;
221 geneveHdr
->oam
= !!(tunKey
->flags
& OVS_TNL_F_OAM
);
222 geneveHdr
->critical
= !!(tunKey
->flags
& OVS_TNL_F_CRT_OPT
);
223 geneveHdr
->reserved1
= 0;
224 geneveHdr
->protocol
= ETH_P_TEB_NBO
;
225 geneveHdr
->vni
= GENEVE_TUNNELID_TO_VNI(tunKey
->tunnelId
);
226 geneveHdr
->reserved2
= 0;
228 /* Geneve header options */
229 optHdr
= (GeneveOptionHdr
*)(geneveHdr
+ 1);
230 memcpy(optHdr
, TunnelKeyGetOptions(tunKey
), tunKey
->tunOptLen
);
233 csumInfo
.Transmit
.IpHeaderChecksum
= 1;
234 csumInfo
.Transmit
.IsIPv4
= 1;
235 if (tunKey
->flags
& OVS_TNL_F_CSUM
) {
236 csumInfo
.Transmit
.UdpChecksum
= 1;
238 NET_BUFFER_LIST_INFO(curNbl
,
239 TcpIpChecksumNetBufferListInfo
) = csumInfo
.Value
;
241 return STATUS_SUCCESS
;
244 OvsCompleteNBL(switchContext
, *newNbl
, TRUE
);
249 NDIS_STATUS
OvsDecapGeneve(POVS_SWITCH_CONTEXT switchContext
,
250 PNET_BUFFER_LIST curNbl
,
251 OvsIPv4TunnelKey
*tunKey
,
252 PNET_BUFFER_LIST
*newNbl
)
259 GeneveHdr
*geneveHdr
;
265 OVS_PACKET_HDR_INFO layers
= { 0 };
267 status
= OvsExtractLayers(curNbl
, &layers
);
268 if (status
!= NDIS_STATUS_SUCCESS
) {
272 /* Check the length of the UDP payload */
273 curNb
= NET_BUFFER_LIST_FIRST_NB(curNbl
);
274 tunnelSize
= OvsGetGeneveTunHdrSizeFromLayers(&layers
);
275 packetLength
= NET_BUFFER_DATA_LENGTH(curNb
);
276 if (packetLength
<= tunnelSize
) {
277 return NDIS_STATUS_INVALID_LENGTH
;
281 * Create a copy of the NBL so that we have all the headers in one MDL.
283 *newNbl
= OvsPartialCopyNBL(switchContext
, curNbl
,
285 TRUE
/*copy NBL info */);
287 if (*newNbl
== NULL
) {
288 return NDIS_STATUS_RESOURCES
;
291 /* XXX: Handle VLAN header. */
293 curNb
= NET_BUFFER_LIST_FIRST_NB(curNbl
);
294 curMdl
= NET_BUFFER_CURRENT_MDL(curNb
);
295 bufferStart
= (PUINT8
)MmGetSystemAddressForMdlSafe(curMdl
, LowPagePriority
)
296 + NET_BUFFER_CURRENT_MDL_OFFSET(curNb
);
298 status
= NDIS_STATUS_RESOURCES
;
302 ethHdr
= (EthHdr
*)bufferStart
;
303 /* XXX: Handle IP options. */
304 ipHdr
= (IPHdr
*)(bufferStart
+ layers
.l3Offset
);
305 tunKey
->src
= ipHdr
->saddr
;
306 tunKey
->dst
= ipHdr
->daddr
;
307 tunKey
->tos
= ipHdr
->tos
;
308 tunKey
->ttl
= ipHdr
->ttl
;
310 udpHdr
= (UDPHdr
*)(bufferStart
+ layers
.l4Offset
);
312 /* Validate if NIC has indicated checksum failure. */
313 status
= OvsValidateUDPChecksum(curNbl
, udpHdr
->check
== 0);
314 if (status
!= NDIS_STATUS_SUCCESS
) {
318 /* Calculate and verify UDP checksum if NIC didn't do it. */
319 if (udpHdr
->check
!= 0) {
320 status
= OvsCalculateUDPChecksum(curNbl
, curNb
, ipHdr
, udpHdr
,
321 packetLength
, &layers
);
322 tunKey
->flags
|= OVS_TNL_F_CSUM
;
323 if (status
!= NDIS_STATUS_SUCCESS
) {
328 geneveHdr
= (GeneveHdr
*)((PCHAR
)udpHdr
+ sizeof *udpHdr
);
329 if (geneveHdr
->protocol
!= ETH_P_TEB_NBO
) {
330 status
= STATUS_NDIS_INVALID_PACKET
;
333 /* Update tunnelKey flags. */
334 tunKey
->flags
= OVS_TNL_F_KEY
| (geneveHdr
->oam
? OVS_TNL_F_OAM
: 0) |
335 (geneveHdr
->critical
? OVS_TNL_F_CRT_OPT
: 0);
337 tunKey
->tunnelId
= GENEVE_VNI_TO_TUNNELID(geneveHdr
->vni
);
338 tunKey
->tunOptLen
= (uint8
)geneveHdr
->optLen
* 4;
339 if (tunKey
->tunOptLen
> TUN_OPT_MAX_LEN
||
340 packetLength
< tunnelSize
+ tunKey
->tunOptLen
) {
341 status
= NDIS_STATUS_INVALID_LENGTH
;
344 /* Clear out the receive flag for the inner packet. */
345 NET_BUFFER_LIST_INFO(curNbl
, TcpIpChecksumNetBufferListInfo
) = 0;
347 NdisAdvanceNetBufferDataStart(curNb
, tunnelSize
, FALSE
, NULL
);
348 if (tunKey
->tunOptLen
> 0) {
349 optStart
= NdisGetDataBuffer(curNb
, tunKey
->tunOptLen
,
350 TunnelKeyGetOptions(tunKey
), 1, 0);
352 /* If data is contiguous in the buffer, NdisGetDataBuffer will not copy
353 data to the storage. Manual copy is needed. */
354 if (optStart
!= TunnelKeyGetOptions(tunKey
)) {
355 memcpy(TunnelKeyGetOptions(tunKey
), optStart
, tunKey
->tunOptLen
);
357 NdisAdvanceNetBufferDataStart(curNb
, tunKey
->tunOptLen
, FALSE
, NULL
);
358 tunKey
->flags
|= OVS_TNL_F_GENEVE_OPT
;
361 return NDIS_STATUS_SUCCESS
;
364 OvsCompleteNBL(switchContext
, *newNbl
, TRUE
);