2 * Copyright (c) 2014 VMware, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "OvsSwitch.h"
23 #include "OvsNetProto.h"
26 #include "OvsChecksum.h"
27 #include "OvsPacketIO.h"
32 #define OVS_DBG_MOD OVS_DBG_ACTION
35 typedef struct _OVS_ACTION_STATS
{
41 UINT32 failedFlowMiss
;
43 UINT32 failedFlowExtract
;
48 UINT32 cannotGrowDest
;
50 UINT32 failedChecksum
;
51 } OVS_ACTION_STATS
, *POVS_ACTION_STATS
;
53 OVS_ACTION_STATS ovsActionStats
;
56 * There a lot of data that needs to be maintained while executing the pipeline
57 * as dictated by the actions of a flow, across different functions at different
58 * levels. Such data is put together in a 'context' structure. Care should be
59 * exercised while adding new members to the structure - only add ones that get
60 * used across multiple stages in the pipeline/get used in multiple functions.
62 #define OVS_DEST_PORTS_ARRAY_MIN_SIZE 2
63 typedef struct OvsForwardingContext
{
64 POVS_SWITCH_CONTEXT switchContext
;
65 /* The NBL currently used in the pipeline. */
66 PNET_BUFFER_LIST curNbl
;
67 /* NDIS forwarding detail for 'curNbl'. */
68 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail
;
69 /* Array of destination ports for 'curNbl'. */
70 PNDIS_SWITCH_FORWARDING_DESTINATION_ARRAY destinationPorts
;
71 /* send flags while sending 'curNbl' into NDIS. */
73 /* Total number of output ports, used + unused, in 'curNbl'. */
74 UINT32 destPortsSizeIn
;
75 /* Total number of used output ports in 'curNbl'. */
76 UINT32 destPortsSizeOut
;
78 * If 'curNbl' is not owned by OVS, they need to be tracked, if they need to
81 OvsCompletionList
*completionList
;
83 * vport number of 'curNbl' when it is passed from the PIF bridge to the INT
84 * bridge. ie. during tunneling on the Rx side.
90 * - specified in actions during tunneling Tx
91 * - extracted from an NBL during tunneling Rx
93 OvsIPv4TunnelKey tunKey
;
97 * To store the output port, when it is a tunneled port. We don't foresee
98 * multiple tunneled ports as outport for any given NBL.
100 POVS_VPORT_ENTRY tunnelTxNic
;
104 * Points to the Internal port on the PIF Bridge, if the packet needs to be
107 POVS_VPORT_ENTRY tunnelRxNic
;
109 /* header information */
110 OVS_PACKET_HDR_INFO layers
;
111 } OvsForwardingContext
;
115 * --------------------------------------------------------------------------
116 * OvsInitForwardingCtx --
117 * Function to init/re-init the 'ovsFwdCtx' context as the actions pipeline
121 * NDIS_STATUS_SUCCESS on success
122 * Other NDIS_STATUS upon failure. Upon failure, it is safe to call
123 * OvsCompleteNBLForwardingCtx(), since 'ovsFwdCtx' has been initialized
124 * enough for OvsCompleteNBLForwardingCtx() to do its work.
125 * --------------------------------------------------------------------------
127 static __inline NDIS_STATUS
128 OvsInitForwardingCtx(OvsForwardingContext
*ovsFwdCtx
,
129 POVS_SWITCH_CONTEXT switchContext
,
130 PNET_BUFFER_LIST curNbl
,
133 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail
,
134 OvsCompletionList
*completionList
,
135 OVS_PACKET_HDR_INFO
*layers
,
136 BOOLEAN resetTunnelInfo
)
139 ASSERT(switchContext
);
144 * Set values for curNbl and switchContext so upon failures, we have enough
145 * information to do cleanup.
147 ovsFwdCtx
->curNbl
= curNbl
;
148 ovsFwdCtx
->switchContext
= switchContext
;
149 ovsFwdCtx
->completionList
= completionList
;
150 ovsFwdCtx
->fwdDetail
= fwdDetail
;
152 if (fwdDetail
->NumAvailableDestinations
> 0) {
154 * XXX: even though MSDN says GetNetBufferListDestinations() returns
155 * NDIS_STATUS, the header files say otherwise.
157 switchContext
->NdisSwitchHandlers
.GetNetBufferListDestinations(
158 switchContext
->NdisSwitchContext
, curNbl
,
159 &ovsFwdCtx
->destinationPorts
);
161 ASSERT(ovsFwdCtx
->destinationPorts
);
162 /* Ensure that none of the elements are consumed yet. */
163 ASSERT(ovsFwdCtx
->destinationPorts
->NumElements
==
164 fwdDetail
->NumAvailableDestinations
);
166 ovsFwdCtx
->destinationPorts
= NULL
;
168 ovsFwdCtx
->destPortsSizeIn
= fwdDetail
->NumAvailableDestinations
;
169 ovsFwdCtx
->destPortsSizeOut
= 0;
170 ovsFwdCtx
->srcVportNo
= srcVportNo
;
171 ovsFwdCtx
->sendFlags
= sendFlags
;
173 ovsFwdCtx
->layers
= *layers
;
175 RtlZeroMemory(&ovsFwdCtx
->layers
, sizeof ovsFwdCtx
->layers
);
177 if (resetTunnelInfo
) {
178 ovsFwdCtx
->tunnelTxNic
= NULL
;
179 ovsFwdCtx
->tunnelRxNic
= NULL
;
180 RtlZeroMemory(&ovsFwdCtx
->tunKey
, sizeof ovsFwdCtx
->tunKey
);
183 return NDIS_STATUS_SUCCESS
;
187 * --------------------------------------------------------------------------
188 * OvsDetectTunnelRxPkt --
189 * Utility function for an RX packet to detect its tunnel type.
192 * True - if the tunnel type was detected.
193 * False - if not a tunnel packet or tunnel type not supported.
194 * --------------------------------------------------------------------------
196 static __inline BOOLEAN
197 OvsDetectTunnelRxPkt(OvsForwardingContext
*ovsFwdCtx
,
198 const OvsFlowKey
*flowKey
)
200 POVS_VPORT_ENTRY tunnelVport
= NULL
;
202 /* XXX: we should also check for the length of the UDP payload to pick
203 * packets only if they are at least VXLAN header size.
205 if (!flowKey
->ipKey
.nwFrag
&&
206 flowKey
->ipKey
.nwProto
== IPPROTO_UDP
&&
207 flowKey
->ipKey
.l4
.tpDst
== VXLAN_UDP_PORT_NBO
) {
208 tunnelVport
= OvsGetTunnelVport(OVSWIN_VPORT_TYPE_VXLAN
);
209 ovsActionStats
.rxVxlan
++;
212 // We might get tunnel packets even before the tunnel gets initialized.
214 ASSERT(ovsFwdCtx
->tunnelRxNic
== NULL
);
215 ovsFwdCtx
->tunnelRxNic
= tunnelVport
;
223 * --------------------------------------------------------------------------
224 * OvsDetectTunnelPkt --
225 * Utility function to detect if a packet is to be subjected to
226 * tunneling (Tx) or de-tunneling (Rx). Various factors such as source
227 * port, destination port, packet contents, and previously setup tunnel
231 * True - If the packet is to be subjected to tunneling.
232 * In case of invalid tunnel context, the tunneling functionality is
233 * a no-op and is completed within this function itself by consuming
234 * all of the tunneling context.
235 * False - If not a tunnel packet or tunnel type not supported. Caller should
236 * process the packet as a non-tunnel packet.
237 * --------------------------------------------------------------------------
239 static __inline BOOLEAN
240 OvsDetectTunnelPkt(OvsForwardingContext
*ovsFwdCtx
,
241 const POVS_VPORT_ENTRY dstVport
,
242 const OvsFlowKey
*flowKey
)
244 if (OvsIsInternalVportType(dstVport
->ovsType
)) {
247 * The source of NBL during tunneling Rx could be the external
248 * port or if it is being executed from userspace, the source port is
251 BOOLEAN validSrcPort
= (ovsFwdCtx
->fwdDetail
->SourcePortId
==
252 ovsFwdCtx
->switchContext
->externalPortId
) ||
253 (ovsFwdCtx
->fwdDetail
->SourcePortId
==
254 NDIS_SWITCH_DEFAULT_PORT_ID
);
256 if (validSrcPort
&& OvsDetectTunnelRxPkt(ovsFwdCtx
, flowKey
)) {
257 ASSERT(ovsFwdCtx
->tunnelTxNic
== NULL
);
258 ASSERT(ovsFwdCtx
->tunnelRxNic
!= NULL
);
261 } else if (OvsIsTunnelVportType(dstVport
->ovsType
)) {
262 ASSERT(ovsFwdCtx
->tunnelTxNic
== NULL
);
263 ASSERT(ovsFwdCtx
->tunnelRxNic
== NULL
);
267 * The destination port is a tunnel port. Encapsulation must be
268 * performed only on packets that originate from a VIF port or from
269 * userspace (default port)
271 * If the packet will not be encapsulated, consume the tunnel context
274 if (ovsFwdCtx
->srcVportNo
!= OVS_DEFAULT_PORT_NO
&&
275 !OvsIsVifVportNo(ovsFwdCtx
->srcVportNo
)) {
276 ovsFwdCtx
->tunKey
.dst
= 0;
279 /* Tunnel the packet only if tunnel context is set. */
280 if (ovsFwdCtx
->tunKey
.dst
!= 0) {
281 ovsActionStats
.txVxlan
++;
282 ovsFwdCtx
->tunnelTxNic
= dstVport
;
293 * --------------------------------------------------------------------------
295 * Add the specified destination vport into the forwarding context. If the
296 * vport is a VIF/external port, it is added directly to the NBL. If it is
297 * a tunneling port, it is NOT added to the NBL.
300 * NDIS_STATUS_SUCCESS on success
301 * Other NDIS_STATUS upon failure.
302 * --------------------------------------------------------------------------
304 static __inline NDIS_STATUS
305 OvsAddPorts(OvsForwardingContext
*ovsFwdCtx
,
307 NDIS_SWITCH_PORT_ID dstPortId
,
308 BOOLEAN preserveVLAN
,
309 BOOLEAN preservePriority
)
311 POVS_VPORT_ENTRY vport
;
312 PNDIS_SWITCH_PORT_DESTINATION fwdPort
;
314 POVS_SWITCH_CONTEXT switchContext
= ovsFwdCtx
->switchContext
;
317 * We hold the dispatch lock that protects the list of vports, so vports
318 * validated here can be added as destinations safely before we call into
321 * Some of the vports can be tunnelled ports as well in which case
322 * they should be added to a separate list of tunnelled destination ports
323 * instead of the VIF ports. The context for the tunnel is settable
324 * in OvsForwardingContext.
326 vport
= OvsFindVportByPortNo(ovsFwdCtx
->switchContext
, dstPortId
);
327 if (vport
== NULL
|| vport
->ovsState
!= OVS_STATE_CONNECTED
) {
329 * There may be some latency between a port disappearing, and userspace
330 * updating the recalculated flows. In the meantime, handle invalid
333 ovsActionStats
.noVport
++;
334 return NDIS_STATUS_SUCCESS
;
336 ASSERT(vport
->nicState
== NdisSwitchNicStateConnected
);
337 vport
->stats
.txPackets
++;
338 vport
->stats
.txBytes
+=
339 NET_BUFFER_DATA_LENGTH(NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx
->curNbl
));
341 if (OvsDetectTunnelPkt(ovsFwdCtx
, vport
, flowKey
)) {
342 return NDIS_STATUS_SUCCESS
;
345 if (ovsFwdCtx
->destPortsSizeOut
== ovsFwdCtx
->destPortsSizeIn
) {
346 if (ovsFwdCtx
->destPortsSizeIn
== 0) {
347 ASSERT(ovsFwdCtx
->destinationPorts
== NULL
);
348 ASSERT(ovsFwdCtx
->fwdDetail
->NumAvailableDestinations
== 0);
350 switchContext
->NdisSwitchHandlers
.GrowNetBufferListDestinations(
351 switchContext
->NdisSwitchContext
, ovsFwdCtx
->curNbl
,
352 OVS_DEST_PORTS_ARRAY_MIN_SIZE
,
353 &ovsFwdCtx
->destinationPorts
);
354 if (status
!= NDIS_STATUS_SUCCESS
) {
355 ovsActionStats
.cannotGrowDest
++;
358 ovsFwdCtx
->destPortsSizeIn
=
359 ovsFwdCtx
->fwdDetail
->NumAvailableDestinations
;
360 ASSERT(ovsFwdCtx
->destinationPorts
);
362 ASSERT(ovsFwdCtx
->destinationPorts
!= NULL
);
365 * A ULONG value that specifies the total number of
366 * NDIS_SWITCH_PORT_DESTINATION elements in the
367 * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure.
370 * A ULONG value that specifies the number of
371 * NDIS_SWITCH_PORT_DESTINATION elements in the
372 * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure that
373 * specify port destinations.
375 * NumAvailableDestinations:
376 * A value that specifies the number of unused extensible switch
377 * destination ports elements within an NET_BUFFER_LIST structure.
379 ASSERT(ovsFwdCtx
->destinationPorts
->NumElements
==
380 ovsFwdCtx
->destPortsSizeIn
);
381 ASSERT(ovsFwdCtx
->destinationPorts
->NumDestinations
==
382 ovsFwdCtx
->destPortsSizeOut
-
383 ovsFwdCtx
->fwdDetail
->NumAvailableDestinations
);
384 ASSERT(ovsFwdCtx
->fwdDetail
->NumAvailableDestinations
> 0);
386 * Before we grow the array of destination ports, the current set
387 * of ports needs to be committed. Only the ports added since the
388 * last commit need to be part of the new update.
390 status
= switchContext
->NdisSwitchHandlers
.UpdateNetBufferListDestinations(
391 switchContext
->NdisSwitchContext
, ovsFwdCtx
->curNbl
,
392 ovsFwdCtx
->fwdDetail
->NumAvailableDestinations
,
393 ovsFwdCtx
->destinationPorts
);
394 if (status
!= NDIS_STATUS_SUCCESS
) {
395 ovsActionStats
.cannotGrowDest
++;
398 ASSERT(ovsFwdCtx
->destinationPorts
->NumElements
==
399 ovsFwdCtx
->destPortsSizeIn
);
400 ASSERT(ovsFwdCtx
->destinationPorts
->NumDestinations
==
401 ovsFwdCtx
->destPortsSizeOut
);
402 ASSERT(ovsFwdCtx
->fwdDetail
->NumAvailableDestinations
== 0);
404 status
= switchContext
->NdisSwitchHandlers
.GrowNetBufferListDestinations(
405 switchContext
->NdisSwitchContext
, ovsFwdCtx
->curNbl
,
406 ovsFwdCtx
->destPortsSizeIn
, &ovsFwdCtx
->destinationPorts
);
407 if (status
!= NDIS_STATUS_SUCCESS
) {
408 ovsActionStats
.cannotGrowDest
++;
411 ASSERT(ovsFwdCtx
->destinationPorts
!= NULL
);
412 ovsFwdCtx
->destPortsSizeIn
<<= 1;
416 ASSERT(ovsFwdCtx
->destPortsSizeOut
< ovsFwdCtx
->destPortsSizeIn
);
418 NDIS_SWITCH_PORT_DESTINATION_AT_ARRAY_INDEX(ovsFwdCtx
->destinationPorts
,
419 ovsFwdCtx
->destPortsSizeOut
);
421 fwdPort
->PortId
= vport
->portId
;
422 fwdPort
->NicIndex
= vport
->nicIndex
;
423 fwdPort
->IsExcluded
= 0;
424 fwdPort
->PreserveVLAN
= preserveVLAN
;
425 fwdPort
->PreservePriority
= preservePriority
;
426 ovsFwdCtx
->destPortsSizeOut
+= 1;
428 return NDIS_STATUS_SUCCESS
;
433 * --------------------------------------------------------------------------
434 * OvsClearTunTxCtx --
435 * Utility function to clear tx tunneling context.
436 * --------------------------------------------------------------------------
439 OvsClearTunTxCtx(OvsForwardingContext
*ovsFwdCtx
)
441 ovsFwdCtx
->tunnelTxNic
= NULL
;
442 ovsFwdCtx
->tunKey
.dst
= 0;
447 * --------------------------------------------------------------------------
448 * OvsClearTunRxCtx --
449 * Utility function to clear rx tunneling context.
450 * --------------------------------------------------------------------------
453 OvsClearTunRxCtx(OvsForwardingContext
*ovsFwdCtx
)
455 ovsFwdCtx
->tunnelRxNic
= NULL
;
456 ovsFwdCtx
->tunKey
.dst
= 0;
461 * --------------------------------------------------------------------------
462 * OvsCompleteNBLForwardingCtx --
463 * This utility function is responsible for freeing/completing an NBL - either
464 * by adding it to a completion list or by freeing it.
467 * It also resets the necessary fields in 'ovsFwdCtx'.
468 * --------------------------------------------------------------------------
471 OvsCompleteNBLForwardingCtx(OvsForwardingContext
*ovsFwdCtx
,
474 NDIS_STRING filterReason
;
476 RtlInitUnicodeString(&filterReason
, dropReason
);
477 if (ovsFwdCtx
->completionList
) {
478 OvsAddPktCompletionList(ovsFwdCtx
->completionList
, TRUE
,
479 ovsFwdCtx
->fwdDetail
->SourcePortId
, ovsFwdCtx
->curNbl
, 1,
481 ovsFwdCtx
->curNbl
= NULL
;
483 /* If there is no completionList, we assume this is ovs created NBL */
484 ovsFwdCtx
->curNbl
= OvsCompleteNBL(ovsFwdCtx
->switchContext
,
485 ovsFwdCtx
->curNbl
, TRUE
);
486 ASSERT(ovsFwdCtx
->curNbl
== NULL
);
488 /* XXX: these can be made debug only to save cycles. Ideally the pipeline
489 * using these fields should reset the values at the end of the pipeline. */
490 ovsFwdCtx
->destPortsSizeOut
= 0;
491 ovsFwdCtx
->tunnelTxNic
= NULL
;
492 ovsFwdCtx
->tunnelRxNic
= NULL
;
496 * --------------------------------------------------------------------------
497 * OvsDoFlowLookupOutput --
498 * Function to be used for the second stage of a tunneling workflow, ie.:
499 * - On the encapsulated packet on Tx path, to do a flow extract, flow
500 * lookup and excuting the actions.
501 * - On the decapsulated packet on Rx path, to do a flow extract, flow
502 * lookup and excuting the actions.
504 * XXX: It is assumed that the NBL in 'ovsFwdCtx' is owned by OVS. This is
505 * until the new buffer management framework is adopted.
508 * The NBL in 'ovsFwdCtx' is consumed.
509 * --------------------------------------------------------------------------
511 static __inline NDIS_STATUS
512 OvsDoFlowLookupOutput(OvsForwardingContext
*ovsFwdCtx
)
518 POVS_VPORT_ENTRY vport
=
519 OvsFindVportByPortNo(ovsFwdCtx
->switchContext
, ovsFwdCtx
->srcVportNo
);
520 if (vport
== NULL
|| vport
->ovsState
!= OVS_STATE_CONNECTED
) {
521 ASSERT(FALSE
); // XXX: let's catch this for now
522 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
523 L
"OVS-Dropped due to internal/tunnel port removal");
524 ovsActionStats
.noVport
++;
525 return NDIS_STATUS_SUCCESS
;
527 ASSERT(vport
->nicState
== NdisSwitchNicStateConnected
);
529 /* Assert that in the Rx direction, key is always setup. */
530 ASSERT(ovsFwdCtx
->tunnelRxNic
== NULL
|| ovsFwdCtx
->tunKey
.dst
!= 0);
531 status
= OvsExtractFlow(ovsFwdCtx
->curNbl
, ovsFwdCtx
->srcVportNo
,
532 &key
, &ovsFwdCtx
->layers
, ovsFwdCtx
->tunKey
.dst
!= 0 ?
533 &ovsFwdCtx
->tunKey
: NULL
);
534 if (status
!= NDIS_STATUS_SUCCESS
) {
535 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
536 L
"OVS-Flow extract failed");
537 ovsActionStats
.failedFlowExtract
++;
541 flow
= OvsLookupFlow(&ovsFwdCtx
->switchContext
->datapath
, &key
, &hash
, FALSE
);
543 OvsFlowUsed(flow
, ovsFwdCtx
->curNbl
, &ovsFwdCtx
->layers
);
544 ovsFwdCtx
->switchContext
->datapath
.hits
++;
545 status
= OvsActionsExecute(ovsFwdCtx
->switchContext
,
546 ovsFwdCtx
->completionList
, ovsFwdCtx
->curNbl
,
547 ovsFwdCtx
->srcVportNo
, ovsFwdCtx
->sendFlags
,
548 &key
, &hash
, &ovsFwdCtx
->layers
,
549 flow
->actions
, flow
->actionsLen
);
550 ovsFwdCtx
->curNbl
= NULL
;
552 LIST_ENTRY missedPackets
;
554 ovsFwdCtx
->switchContext
->datapath
.misses
++;
555 InitializeListHead(&missedPackets
);
556 status
= OvsCreateAndAddPackets(
557 OVS_DEFAULT_PACKET_QUEUE
, NULL
, 0, OVS_PACKET_CMD_MISS
,
558 ovsFwdCtx
->srcVportNo
,
559 key
.tunKey
.dst
!= 0 ?
560 (OvsIPv4TunnelKey
*)&key
.tunKey
: NULL
,
562 ovsFwdCtx
->tunnelRxNic
!= NULL
, &ovsFwdCtx
->layers
,
563 ovsFwdCtx
->switchContext
, &missedPackets
, &num
);
565 OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE
, &missedPackets
, num
);
567 if (status
== NDIS_STATUS_SUCCESS
) {
568 /* Complete the packet since it was copied to user buffer. */
569 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
570 L
"OVS-Dropped since packet was copied to userspace");
571 ovsActionStats
.flowMiss
++;
572 status
= NDIS_STATUS_SUCCESS
;
574 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
575 L
"OVS-Dropped due to failure to queue to userspace");
576 status
= NDIS_STATUS_FAILURE
;
577 ovsActionStats
.failedFlowMiss
++;
585 * --------------------------------------------------------------------------
587 * The start function for Tx tunneling - encapsulates the packet, and
588 * outputs the packet on the PIF bridge.
591 * The NBL in 'ovsFwdCtx' is consumed.
592 * --------------------------------------------------------------------------
594 static __inline NDIS_STATUS
595 OvsTunnelPortTx(OvsForwardingContext
*ovsFwdCtx
)
597 NDIS_STATUS status
= NDIS_STATUS_FAILURE
;
598 PNET_BUFFER_LIST newNbl
= NULL
;
601 * Setup the source port to be the internal port to as to facilitate the
602 * second OvsLookupFlow.
604 if (ovsFwdCtx
->switchContext
->internalVport
== NULL
) {
605 OvsClearTunTxCtx(ovsFwdCtx
);
606 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
607 L
"OVS-Dropped since internal port is absent");
608 return NDIS_STATUS_FAILURE
;
610 ovsFwdCtx
->srcVportNo
=
611 ((POVS_VPORT_ENTRY
)ovsFwdCtx
->switchContext
->internalVport
)->portNo
;
613 ovsFwdCtx
->fwdDetail
->SourcePortId
= ovsFwdCtx
->switchContext
->internalPortId
;
614 ovsFwdCtx
->fwdDetail
->SourceNicIndex
=
615 ((POVS_VPORT_ENTRY
)ovsFwdCtx
->switchContext
->internalVport
)->nicIndex
;
617 /* Do the encap. Encap function does not consume the NBL. */
618 switch(ovsFwdCtx
->tunnelTxNic
->ovsType
) {
619 case OVSWIN_VPORT_TYPE_VXLAN
:
620 status
= OvsEncapVxlan(ovsFwdCtx
->curNbl
, &ovsFwdCtx
->tunKey
,
621 ovsFwdCtx
->switchContext
,
622 (VOID
*)ovsFwdCtx
->completionList
,
623 &ovsFwdCtx
->layers
, &newNbl
);
626 ASSERT(! "Tx: Unhandled tunnel type");
629 /* Reset the tunnel context so that it doesn't get used after this point. */
630 OvsClearTunTxCtx(ovsFwdCtx
);
632 if (status
== NDIS_STATUS_SUCCESS
) {
634 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
635 L
"Complete after cloning NBL for encapsulation");
636 ovsFwdCtx
->curNbl
= newNbl
;
637 status
= OvsDoFlowLookupOutput(ovsFwdCtx
);
638 ASSERT(ovsFwdCtx
->curNbl
== NULL
);
641 * XXX: Temporary freeing of the packet until we register a
642 * callback to IP helper.
644 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
645 L
"OVS-Dropped due to encap failure");
646 ovsActionStats
.failedEncap
++;
647 status
= NDIS_STATUS_SUCCESS
;
654 * --------------------------------------------------------------------------
656 * Decapsulate the incoming NBL based on the tunnel type and goes through
657 * the flow lookup for the inner packet.
659 * Note: IP checksum is validate here, but L4 checksum validation needs
660 * to be done by the corresponding tunnel types.
663 * The NBL in 'ovsFwdCtx' is consumed.
664 * --------------------------------------------------------------------------
666 static __inline NDIS_STATUS
667 OvsTunnelPortRx(OvsForwardingContext
*ovsFwdCtx
)
669 NDIS_STATUS status
= NDIS_STATUS_SUCCESS
;
670 PNET_BUFFER_LIST newNbl
= NULL
;
671 POVS_VPORT_ENTRY tunnelRxVport
= ovsFwdCtx
->tunnelRxNic
;
673 if (OvsValidateIPChecksum(ovsFwdCtx
->curNbl
, &ovsFwdCtx
->layers
)
674 != NDIS_STATUS_SUCCESS
) {
675 ovsActionStats
.failedChecksum
++;
676 OVS_LOG_INFO("Packet dropped due to IP checksum failure.");
680 switch(tunnelRxVport
->ovsType
) {
681 case OVSWIN_VPORT_TYPE_VXLAN
:
683 * OvsDoDecapVxlan should return a new NBL if it was copied, and
684 * this new NBL should be setup as the ovsFwdCtx->curNbl.
686 status
= OvsDoDecapVxlan(ovsFwdCtx
->switchContext
, ovsFwdCtx
->curNbl
,
687 &ovsFwdCtx
->tunKey
, &newNbl
);
690 OVS_LOG_ERROR("Rx: Unhandled tunnel type: %d\n",
691 tunnelRxVport
->ovsType
);
692 ASSERT(! "Rx: Unhandled tunnel type");
693 status
= NDIS_STATUS_NOT_SUPPORTED
;
696 if (status
!= NDIS_STATUS_SUCCESS
) {
697 ovsActionStats
.failedDecap
++;
702 * tunnelRxNic and other fields will be cleared, re-init the context
705 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
706 L
"OVS-dropped due to new decap packet");
708 /* Decapsulated packet is in a new NBL */
709 ovsFwdCtx
->tunnelRxNic
= tunnelRxVport
;
710 OvsInitForwardingCtx(ovsFwdCtx
, ovsFwdCtx
->switchContext
,
711 newNbl
, tunnelRxVport
->portNo
, 0,
712 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl
),
713 ovsFwdCtx
->completionList
,
714 &ovsFwdCtx
->layers
, FALSE
);
717 * Set the NBL's SourcePortId and SourceNicIndex to default values to
718 * keep NDIS happy when we forward the packet.
720 ovsFwdCtx
->fwdDetail
->SourcePortId
= NDIS_SWITCH_DEFAULT_PORT_ID
;
721 ovsFwdCtx
->fwdDetail
->SourceNicIndex
= 0;
723 status
= OvsDoFlowLookupOutput(ovsFwdCtx
);
724 ASSERT(ovsFwdCtx
->curNbl
== NULL
);
725 OvsClearTunRxCtx(ovsFwdCtx
);
730 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
731 L
"OVS-dropped due to decap failure");
732 OvsClearTunRxCtx(ovsFwdCtx
);
738 * --------------------------------------------------------------------------
739 * OvsOutputForwardingCtx --
740 * This function outputs an NBL to NDIS or to a tunneling pipeline based on
741 * the ports added so far into 'ovsFwdCtx'.
744 * This function consumes the NBL - either by forwarding it successfully to
745 * NDIS, or adding it to the completion list in 'ovsFwdCtx', or freeing it.
747 * Also makes sure that the list of destination ports - tunnel or otherwise is
749 * --------------------------------------------------------------------------
751 static __inline NDIS_STATUS
752 OvsOutputForwardingCtx(OvsForwardingContext
*ovsFwdCtx
)
754 NDIS_STATUS status
= STATUS_SUCCESS
;
755 POVS_SWITCH_CONTEXT switchContext
= ovsFwdCtx
->switchContext
;
758 * Handle the case where the some of the destination ports are tunneled
759 * ports - the non-tunneled ports get a unmodified copy of the NBL, and the
760 * tunneling pipeline starts when we output the packet to tunneled port.
762 if (ovsFwdCtx
->destPortsSizeOut
> 0) {
763 PNET_BUFFER_LIST newNbl
= NULL
;
765 UINT32 portsToUpdate
=
766 ovsFwdCtx
->fwdDetail
->NumAvailableDestinations
-
767 (ovsFwdCtx
->destPortsSizeIn
- ovsFwdCtx
->destPortsSizeOut
);
769 ASSERT(ovsFwdCtx
->destinationPorts
!= NULL
);
772 * Create a copy of the packet in order to do encap on it later. Also,
773 * don't copy the offload context since the encap'd packet has a
774 * different set of headers. This will change when we implement offloads
775 * before doing encapsulation.
777 if (ovsFwdCtx
->tunnelTxNic
!= NULL
|| ovsFwdCtx
->tunnelRxNic
!= NULL
) {
778 nb
= NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx
->curNbl
);
779 newNbl
= OvsPartialCopyNBL(ovsFwdCtx
->switchContext
, ovsFwdCtx
->curNbl
,
780 0, 0, TRUE
/*copy NBL info*/);
781 if (newNbl
== NULL
) {
782 status
= NDIS_STATUS_RESOURCES
;
783 ovsActionStats
.noCopiedNbl
++;
788 /* It does not seem like we'll get here unless 'portsToUpdate' > 0. */
789 ASSERT(portsToUpdate
> 0);
790 status
= switchContext
->NdisSwitchHandlers
.UpdateNetBufferListDestinations(
791 switchContext
->NdisSwitchContext
, ovsFwdCtx
->curNbl
,
792 portsToUpdate
, ovsFwdCtx
->destinationPorts
);
793 if (status
!= NDIS_STATUS_SUCCESS
) {
794 OvsCompleteNBL(ovsFwdCtx
->switchContext
, newNbl
, TRUE
);
795 ovsActionStats
.cannotGrowDest
++;
799 OvsSendNBLIngress(ovsFwdCtx
->switchContext
, ovsFwdCtx
->curNbl
,
800 ovsFwdCtx
->sendFlags
);
801 /* End this pipeline by resetting the corresponding context. */
802 ovsFwdCtx
->destPortsSizeOut
= 0;
803 ovsFwdCtx
->curNbl
= NULL
;
805 status
= OvsInitForwardingCtx(ovsFwdCtx
, ovsFwdCtx
->switchContext
,
806 newNbl
, ovsFwdCtx
->srcVportNo
, 0,
807 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl
),
808 ovsFwdCtx
->completionList
,
809 &ovsFwdCtx
->layers
, FALSE
);
810 if (status
!= NDIS_STATUS_SUCCESS
) {
811 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
812 L
"Dropped due to resouces");
818 if (ovsFwdCtx
->tunnelTxNic
!= NULL
) {
819 status
= OvsTunnelPortTx(ovsFwdCtx
);
820 ASSERT(ovsFwdCtx
->tunnelTxNic
== NULL
);
821 ASSERT(ovsFwdCtx
->tunKey
.dst
== 0);
822 } else if (ovsFwdCtx
->tunnelRxNic
!= NULL
) {
823 status
= OvsTunnelPortRx(ovsFwdCtx
);
824 ASSERT(ovsFwdCtx
->tunnelRxNic
== NULL
);
825 ASSERT(ovsFwdCtx
->tunKey
.dst
== 0);
827 ASSERT(ovsFwdCtx
->curNbl
== NULL
);
832 if (status
!= NDIS_STATUS_SUCCESS
) {
833 OvsCompleteNBLForwardingCtx(ovsFwdCtx
, L
"Dropped due to XXX");
841 * --------------------------------------------------------------------------
842 * OvsLookupFlowOutput --
843 * Utility function for external callers to do flow extract, lookup,
844 * actions execute on a given NBL.
846 * Note: If this is being used from a callback function, make sure that the
847 * arguments specified are still valid in the asynchronous context.
850 * This function consumes the NBL.
851 * --------------------------------------------------------------------------
854 OvsLookupFlowOutput(POVS_SWITCH_CONTEXT switchContext
,
856 PNET_BUFFER_LIST curNbl
)
859 OvsForwardingContext ovsFwdCtx
;
860 POVS_VPORT_ENTRY internalVport
=
861 (POVS_VPORT_ENTRY
)switchContext
->internalVport
;
863 /* XXX: make sure comp list was not a stack variable previously. */
864 OvsCompletionList
*completionList
= (OvsCompletionList
*)compList
;
867 * XXX: can internal port disappear while we are busy doing ARP resolution?
868 * It could, but will we get this callback from IP helper in that case. Need
871 ASSERT(switchContext
->internalVport
);
872 status
= OvsInitForwardingCtx(&ovsFwdCtx
, switchContext
, curNbl
,
873 internalVport
->portNo
, 0,
874 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl
),
875 completionList
, NULL
, TRUE
);
876 if (status
!= NDIS_STATUS_SUCCESS
) {
877 OvsCompleteNBLForwardingCtx(&ovsFwdCtx
,
878 L
"OVS-Dropped due to resources");
884 * XXX: We need to acquire the dispatch lock and the datapath lock.
887 OvsDoFlowLookupOutput(&ovsFwdCtx
);
892 * --------------------------------------------------------------------------
893 * OvsOutputBeforeSetAction --
894 * Function to be called to complete one set of actions on an NBL, before
895 * we start the next one.
896 * --------------------------------------------------------------------------
898 static __inline NDIS_STATUS
899 OvsOutputBeforeSetAction(OvsForwardingContext
*ovsFwdCtx
)
901 PNET_BUFFER_LIST newNbl
;
902 NDIS_STATUS status
= NDIS_STATUS_SUCCESS
;
906 * Create a copy and work on the copy after this point. The original NBL is
907 * forwarded. One reason to not use the copy for forwarding is that
908 * ports have already been added to the original NBL, and it might be
909 * inefficient/impossible to remove/re-add them to the copy. There's no
910 * notion of removing the ports, the ports need to be marked as
911 * "isExcluded". There's seems no real advantage to retaining the original
912 * and sending out the copy instead.
914 * XXX: We are copying the offload context here. This is to handle actions
916 * outport, pop_vlan(), outport, push_vlan(), outport
918 * copy size needs to include inner ether + IP + TCP, need to revisit
919 * if we support IP options.
920 * XXX Head room needs to include the additional encap.
921 * XXX copySize check is not considering multiple NBs.
923 nb
= NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx
->curNbl
);
924 newNbl
= OvsPartialCopyNBL(ovsFwdCtx
->switchContext
, ovsFwdCtx
->curNbl
,
925 0, 0, TRUE
/*copy NBL info*/);
927 ASSERT(ovsFwdCtx
->destPortsSizeOut
> 0 ||
928 ovsFwdCtx
->tunnelTxNic
!= NULL
|| ovsFwdCtx
->tunnelRxNic
!= NULL
);
930 /* Send the original packet out */
931 status
= OvsOutputForwardingCtx(ovsFwdCtx
);
932 ASSERT(ovsFwdCtx
->curNbl
== NULL
);
933 ASSERT(ovsFwdCtx
->destPortsSizeOut
== 0);
934 ASSERT(ovsFwdCtx
->tunnelRxNic
== NULL
);
935 ASSERT(ovsFwdCtx
->tunnelTxNic
== NULL
);
937 /* If we didn't make a copy, can't continue. */
938 if (newNbl
== NULL
) {
939 ovsActionStats
.noCopiedNbl
++;
940 return NDIS_STATUS_RESOURCES
;
943 /* Finish the remaining actions with the new NBL */
944 if (status
!= NDIS_STATUS_SUCCESS
) {
945 OvsCompleteNBL(ovsFwdCtx
->switchContext
, newNbl
, TRUE
);
947 status
= OvsInitForwardingCtx(ovsFwdCtx
, ovsFwdCtx
->switchContext
,
948 newNbl
, ovsFwdCtx
->srcVportNo
, 0,
949 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl
),
950 ovsFwdCtx
->completionList
,
951 &ovsFwdCtx
->layers
, FALSE
);
959 * --------------------------------------------------------------------------
960 * OvsPopVlanInPktBuf --
961 * Function to pop a VLAN tag when the tag is in the packet buffer.
962 * --------------------------------------------------------------------------
964 static __inline NDIS_STATUS
965 OvsPopVlanInPktBuf(OvsForwardingContext
*ovsFwdCtx
)
970 ULONG dataLength
= sizeof (DL_EUI48
) + sizeof (DL_EUI48
);
971 UINT32 packetLen
, mdlLen
;
972 PNET_BUFFER_LIST newNbl
;
976 * Declare a dummy vlanTag structure since we need to compute the size
977 * of shiftLength. The NDIS one is a unionized structure.
979 NDIS_PACKET_8021Q_INFO vlanTag
= {0};
980 ULONG shiftLength
= sizeof (vlanTag
.TagHeader
);
981 PUINT8 tempBuffer
[sizeof (DL_EUI48
) + sizeof (DL_EUI48
)];
983 newNbl
= OvsPartialCopyNBL(ovsFwdCtx
->switchContext
, ovsFwdCtx
->curNbl
,
984 0, 0, TRUE
/* copy NBL info */);
986 ovsActionStats
.noCopiedNbl
++;
987 return NDIS_STATUS_RESOURCES
;
990 /* Complete the original NBL and create a copy to modify. */
991 OvsCompleteNBLForwardingCtx(ovsFwdCtx
, L
"OVS-Dropped due to copy");
993 status
= OvsInitForwardingCtx(ovsFwdCtx
, ovsFwdCtx
->switchContext
,
994 newNbl
, ovsFwdCtx
->srcVportNo
, 0,
995 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl
),
996 NULL
, &ovsFwdCtx
->layers
, FALSE
);
997 if (status
!= NDIS_STATUS_SUCCESS
) {
998 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
999 L
"Dropped due to resouces");
1000 return NDIS_STATUS_RESOURCES
;
1003 curNb
= NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx
->curNbl
);
1004 packetLen
= NET_BUFFER_DATA_LENGTH(curNb
);
1005 ASSERT(curNb
->Next
== NULL
);
1006 curMdl
= NET_BUFFER_CURRENT_MDL(curNb
);
1007 NdisQueryMdl(curMdl
, &bufferStart
, &mdlLen
, LowPagePriority
);
1009 return NDIS_STATUS_RESOURCES
;
1011 mdlLen
-= NET_BUFFER_CURRENT_MDL_OFFSET(curNb
);
1012 /* Bail out if L2 + VLAN header is not contiguous in the first buffer. */
1013 if (MIN(packetLen
, mdlLen
) < sizeof (EthHdr
) + shiftLength
) {
1015 return NDIS_STATUS_FAILURE
;
1017 bufferStart
+= NET_BUFFER_CURRENT_MDL_OFFSET(curNb
);
1018 RtlCopyMemory(tempBuffer
, bufferStart
, dataLength
);
1019 RtlCopyMemory(bufferStart
+ shiftLength
, tempBuffer
, dataLength
);
1020 NdisAdvanceNetBufferDataStart(curNb
, shiftLength
, FALSE
, NULL
);
1022 return NDIS_STATUS_SUCCESS
;
1026 * --------------------------------------------------------------------------
1027 * OvsTunnelAttrToIPv4TunnelKey --
1028 * Convert tunnel attribute to OvsIPv4TunnelKey.
1029 * --------------------------------------------------------------------------
1031 static __inline NDIS_STATUS
1032 OvsTunnelAttrToIPv4TunnelKey(PNL_ATTR attr
,
1033 OvsIPv4TunnelKey
*tunKey
)
1038 tunKey
->attr
[0] = 0;
1039 tunKey
->attr
[1] = 0;
1040 tunKey
->attr
[2] = 0;
1041 ASSERT(NlAttrType(attr
) == OVS_KEY_ATTR_TUNNEL
);
1043 NL_ATTR_FOR_EACH_UNSAFE (a
, rem
, NlAttrData(attr
),
1044 NlAttrGetSize(attr
)) {
1045 switch (NlAttrType(a
)) {
1046 case OVS_TUNNEL_KEY_ATTR_ID
:
1047 tunKey
->tunnelId
= NlAttrGetBe64(a
);
1048 tunKey
->flags
|= OVS_TNL_F_KEY
;
1050 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC
:
1051 tunKey
->src
= NlAttrGetBe32(a
);
1053 case OVS_TUNNEL_KEY_ATTR_IPV4_DST
:
1054 tunKey
->dst
= NlAttrGetBe32(a
);
1056 case OVS_TUNNEL_KEY_ATTR_TOS
:
1057 tunKey
->tos
= NlAttrGetU8(a
);
1059 case OVS_TUNNEL_KEY_ATTR_TTL
:
1060 tunKey
->ttl
= NlAttrGetU8(a
);
1062 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT
:
1063 tunKey
->flags
|= OVS_TNL_F_DONT_FRAGMENT
;
1065 case OVS_TUNNEL_KEY_ATTR_CSUM
:
1066 tunKey
->flags
|= OVS_TNL_F_CSUM
;
1073 return NDIS_STATUS_SUCCESS
;
1077 *----------------------------------------------------------------------------
1078 * OvsUpdateEthHeader --
1079 * Updates the ethernet header in ovsFwdCtx.curNbl inline based on the
1081 *----------------------------------------------------------------------------
1083 static __inline NDIS_STATUS
1084 OvsUpdateEthHeader(OvsForwardingContext
*ovsFwdCtx
,
1085 const struct ovs_key_ethernet
*ethAttr
)
1091 UINT32 packetLen
, mdlLen
;
1093 curNb
= NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx
->curNbl
);
1094 ASSERT(curNb
->Next
== NULL
);
1095 packetLen
= NET_BUFFER_DATA_LENGTH(curNb
);
1096 curMdl
= NET_BUFFER_CURRENT_MDL(curNb
);
1097 NdisQueryMdl(curMdl
, &bufferStart
, &mdlLen
, LowPagePriority
);
1099 ovsActionStats
.noResource
++;
1100 return NDIS_STATUS_RESOURCES
;
1102 mdlLen
-= NET_BUFFER_CURRENT_MDL_OFFSET(curNb
);
1104 /* Bail out if the L2 header is not in a contiguous buffer. */
1105 if (MIN(packetLen
, mdlLen
) < sizeof *ethHdr
) {
1107 return NDIS_STATUS_FAILURE
;
1109 ethHdr
= (EthHdr
*)(bufferStart
+ NET_BUFFER_CURRENT_MDL_OFFSET(curNb
));
1111 RtlCopyMemory(ethHdr
->Destination
, ethAttr
->eth_dst
,
1112 sizeof ethHdr
->Destination
);
1113 RtlCopyMemory(ethHdr
->Source
, ethAttr
->eth_src
, sizeof ethHdr
->Source
);
1115 return NDIS_STATUS_SUCCESS
;
1119 *----------------------------------------------------------------------------
1120 * OvsUpdateIPv4Header --
1121 * Updates the IPv4 header in ovsFwdCtx.curNbl inline based on the
1123 *----------------------------------------------------------------------------
1125 static __inline NDIS_STATUS
1126 OvsUpdateIPv4Header(OvsForwardingContext
*ovsFwdCtx
,
1127 const struct ovs_key_ipv4
*ipAttr
)
1133 UINT32 mdlLen
, hdrSize
, packetLen
;
1134 OVS_PACKET_HDR_INFO
*layers
= &ovsFwdCtx
->layers
;
1137 TCPHdr
*tcpHdr
= NULL
;
1138 UDPHdr
*udpHdr
= NULL
;
1140 ASSERT(layers
->value
!= 0);
1143 * Peek into the MDL to get a handle to the IP header and if required
1144 * the TCP/UDP header as well. We check if the required headers are in one
1145 * contiguous MDL, and if not, we copy them over to one MDL.
1147 curNb
= NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx
->curNbl
);
1148 ASSERT(curNb
->Next
== NULL
);
1149 packetLen
= NET_BUFFER_DATA_LENGTH(curNb
);
1150 curMdl
= NET_BUFFER_CURRENT_MDL(curNb
);
1151 NdisQueryMdl(curMdl
, &bufferStart
, &mdlLen
, LowPagePriority
);
1153 ovsActionStats
.noResource
++;
1154 return NDIS_STATUS_RESOURCES
;
1156 curMdlOffset
= NET_BUFFER_CURRENT_MDL_OFFSET(curNb
);
1157 mdlLen
-= curMdlOffset
;
1158 ASSERT((INT
)mdlLen
>= 0);
1160 if (layers
->isTcp
|| layers
->isUdp
) {
1161 hdrSize
= layers
->l4Offset
+
1162 layers
->isTcp
? sizeof (*tcpHdr
) : sizeof (*udpHdr
);
1164 hdrSize
= layers
->l3Offset
+ sizeof (*ipHdr
);
1167 /* Count of number of bytes of valid data there are in the first MDL. */
1168 mdlLen
= MIN(packetLen
, mdlLen
);
1169 if (mdlLen
< hdrSize
) {
1170 PNET_BUFFER_LIST newNbl
;
1171 newNbl
= OvsPartialCopyNBL(ovsFwdCtx
->switchContext
, ovsFwdCtx
->curNbl
,
1172 hdrSize
, 0, TRUE
/*copy NBL info*/);
1174 ovsActionStats
.noCopiedNbl
++;
1175 return NDIS_STATUS_RESOURCES
;
1177 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
1178 L
"Complete after partial copy.");
1180 status
= OvsInitForwardingCtx(ovsFwdCtx
, ovsFwdCtx
->switchContext
,
1181 newNbl
, ovsFwdCtx
->srcVportNo
, 0,
1182 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl
),
1183 NULL
, &ovsFwdCtx
->layers
, FALSE
);
1184 if (status
!= NDIS_STATUS_SUCCESS
) {
1185 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
1186 L
"OVS-Dropped due to resources");
1187 return NDIS_STATUS_RESOURCES
;
1190 curNb
= NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx
->curNbl
);
1191 ASSERT(curNb
->Next
== NULL
);
1192 curMdl
= NET_BUFFER_CURRENT_MDL(curNb
);
1193 NdisQueryMdl(curMdl
, &bufferStart
, &mdlLen
, LowPagePriority
);
1195 ovsActionStats
.noResource
++;
1196 return NDIS_STATUS_RESOURCES
;
1198 curMdlOffset
= NET_BUFFER_CURRENT_MDL_OFFSET(curNb
);
1199 mdlLen
-= curMdlOffset
;
1200 ASSERT(mdlLen
>= hdrSize
);
1203 ipHdr
= (IPHdr
*)(bufferStart
+ curMdlOffset
+ layers
->l3Offset
);
1205 if (layers
->isTcp
) {
1206 tcpHdr
= (TCPHdr
*)(bufferStart
+ curMdlOffset
+ layers
->l4Offset
);
1207 } else if (layers
->isUdp
) {
1208 udpHdr
= (UDPHdr
*)(bufferStart
+ curMdlOffset
+ layers
->l4Offset
);
1212 * Adjust the IP header inline as dictated by the action, nad also update
1213 * the IP and the TCP checksum for the data modified.
1215 * In the future, this could be optimized to make one call to
1216 * ChecksumUpdate32(). Ignoring this for now, since for the most common
1217 * case, we only update the TTL.
1219 if (ipHdr
->saddr
!= ipAttr
->ipv4_src
) {
1221 tcpHdr
->check
= ChecksumUpdate32(tcpHdr
->check
, ipHdr
->saddr
,
1223 } else if (udpHdr
&& udpHdr
->check
) {
1224 udpHdr
->check
= ChecksumUpdate32(udpHdr
->check
, ipHdr
->saddr
,
1228 if (ipHdr
->check
!= 0) {
1229 ipHdr
->check
= ChecksumUpdate32(ipHdr
->check
, ipHdr
->saddr
,
1232 ipHdr
->saddr
= ipAttr
->ipv4_src
;
1234 if (ipHdr
->daddr
!= ipAttr
->ipv4_dst
) {
1236 tcpHdr
->check
= ChecksumUpdate32(tcpHdr
->check
, ipHdr
->daddr
,
1238 } else if (udpHdr
&& udpHdr
->check
) {
1239 udpHdr
->check
= ChecksumUpdate32(udpHdr
->check
, ipHdr
->daddr
,
1243 if (ipHdr
->check
!= 0) {
1244 ipHdr
->check
= ChecksumUpdate32(ipHdr
->check
, ipHdr
->daddr
,
1247 ipHdr
->daddr
= ipAttr
->ipv4_dst
;
1249 if (ipHdr
->protocol
!= ipAttr
->ipv4_proto
) {
1250 UINT16 oldProto
= (ipHdr
->protocol
<< 16) & 0xff00;
1251 UINT16 newProto
= (ipAttr
->ipv4_proto
<< 16) & 0xff00;
1253 tcpHdr
->check
= ChecksumUpdate16(tcpHdr
->check
, oldProto
, newProto
);
1254 } else if (udpHdr
&& udpHdr
->check
) {
1255 udpHdr
->check
= ChecksumUpdate16(udpHdr
->check
, oldProto
, newProto
);
1258 if (ipHdr
->check
!= 0) {
1259 ipHdr
->check
= ChecksumUpdate16(ipHdr
->check
, oldProto
, newProto
);
1261 ipHdr
->protocol
= ipAttr
->ipv4_proto
;
1263 if (ipHdr
->ttl
!= ipAttr
->ipv4_ttl
) {
1264 UINT16 oldTtl
= (ipHdr
->ttl
) & 0xff;
1265 UINT16 newTtl
= (ipAttr
->ipv4_ttl
) & 0xff;
1266 if (ipHdr
->check
!= 0) {
1267 ipHdr
->check
= ChecksumUpdate16(ipHdr
->check
, oldTtl
, newTtl
);
1269 ipHdr
->ttl
= ipAttr
->ipv4_ttl
;
1272 return NDIS_STATUS_SUCCESS
;
1276 * --------------------------------------------------------------------------
1277 * OvsExecuteSetAction --
1278 * Executes a set() action, but storing the actions into 'ovsFwdCtx'
1279 * --------------------------------------------------------------------------
1281 static __inline NDIS_STATUS
1282 OvsExecuteSetAction(OvsForwardingContext
*ovsFwdCtx
,
1287 enum ovs_key_attr type
= NlAttrType(a
);
1288 NDIS_STATUS status
= NDIS_STATUS_SUCCESS
;
1291 case OVS_KEY_ATTR_ETHERNET
:
1292 status
= OvsUpdateEthHeader(ovsFwdCtx
,
1293 NlAttrGetUnspec(a
, sizeof(struct ovs_key_ethernet
)));
1296 case OVS_KEY_ATTR_IPV4
:
1297 status
= OvsUpdateIPv4Header(ovsFwdCtx
,
1298 NlAttrGetUnspec(a
, sizeof(struct ovs_key_ipv4
)));
1301 case OVS_KEY_ATTR_TUNNEL
:
1303 OvsIPv4TunnelKey tunKey
;
1305 status
= OvsTunnelAttrToIPv4TunnelKey((PNL_ATTR
)a
, &tunKey
);
1306 ASSERT(status
== NDIS_STATUS_SUCCESS
);
1307 tunKey
.flow_hash
= (uint16
)(hash
? *hash
: OvsHashFlow(key
));
1308 RtlCopyMemory(&ovsFwdCtx
->tunKey
, &tunKey
, sizeof ovsFwdCtx
->tunKey
);
1312 case OVS_KEY_ATTR_SKB_MARK
:
1313 /* XXX: Not relevant to Hyper-V. Return OK */
1315 case OVS_KEY_ATTR_UNSPEC
:
1316 case OVS_KEY_ATTR_ENCAP
:
1317 case OVS_KEY_ATTR_ETHERTYPE
:
1318 case OVS_KEY_ATTR_IN_PORT
:
1319 case OVS_KEY_ATTR_VLAN
:
1320 case OVS_KEY_ATTR_ICMP
:
1321 case OVS_KEY_ATTR_ICMPV6
:
1322 case OVS_KEY_ATTR_ARP
:
1323 case OVS_KEY_ATTR_ND
:
1324 case __OVS_KEY_ATTR_MAX
:
1326 OVS_LOG_INFO("Unhandled attribute %#x", type
);
1333 * --------------------------------------------------------------------------
1334 * OvsActionsExecute --
1335 * Interpret and execute the specified 'actions' on the specifed packet
1336 * 'curNbl'. The expectation is that if the packet needs to be dropped
1337 * (completed) for some reason, it is added to 'completionList' so that the
1338 * caller can complete the packet. If 'completionList' is NULL, the NBL is
1339 * assumed to be generated by OVS and freed up. Otherwise, the function
1340 * consumes the NBL by generating a NDIS send indication for the packet.
1342 * There are one or more of "clone" NBLs that may get generated while
1343 * executing the actions. Upon any failures, the "cloned" NBLs are freed up,
1344 * and the caller does not have to worry about them.
1346 * Success or failure is returned based on whether the specified actions
1347 * were executed successfully on the packet or not.
1348 * --------------------------------------------------------------------------
1351 OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext
,
1352 OvsCompletionList
*completionList
,
1353 PNET_BUFFER_LIST curNbl
,
1358 OVS_PACKET_HDR_INFO
*layers
,
1359 const PNL_ATTR actions
,
1365 OvsForwardingContext ovsFwdCtx
;
1366 PCWSTR dropReason
= L
"";
1368 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail
=
1369 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl
);
1371 /* XXX: ASSERT that the flow table lock is held. */
1372 status
= OvsInitForwardingCtx(&ovsFwdCtx
, switchContext
, curNbl
, portNo
,
1373 sendFlags
, fwdDetail
, completionList
,
1375 if (status
!= NDIS_STATUS_SUCCESS
) {
1376 dropReason
= L
"OVS-initing destination port list failed";
1380 if (actionsLen
== 0) {
1381 dropReason
= L
"OVS-Dropped due to Flow action";
1382 ovsActionStats
.zeroActionLen
++;
1386 NL_ATTR_FOR_EACH_UNSAFE (a
, rem
, actions
, actionsLen
) {
1387 switch(NlAttrType(a
)) {
1388 case OVS_ACTION_ATTR_OUTPUT
:
1389 dstPortID
= NlAttrGetU32(a
);
1390 status
= OvsAddPorts(&ovsFwdCtx
, key
, dstPortID
,
1392 if (status
!= NDIS_STATUS_SUCCESS
) {
1393 dropReason
= L
"OVS-adding destination port failed";
1398 case OVS_ACTION_ATTR_PUSH_VLAN
:
1400 struct ovs_action_push_vlan
*vlan
;
1402 PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag
;
1404 if (ovsFwdCtx
.destPortsSizeOut
> 0 || ovsFwdCtx
.tunnelTxNic
!= NULL
1405 || ovsFwdCtx
.tunnelRxNic
!= NULL
) {
1406 status
= OvsOutputBeforeSetAction(&ovsFwdCtx
);
1407 if (status
!= NDIS_STATUS_SUCCESS
) {
1408 dropReason
= L
"OVS-adding destination failed";
1413 vlanTagValue
= NET_BUFFER_LIST_INFO(ovsFwdCtx
.curNbl
,
1414 Ieee8021QNetBufferListInfo
);
1415 if (vlanTagValue
!= NULL
) {
1417 * XXX: We don't support double VLAN tag offload. In such cases,
1418 * we need to insert the existing one into the packet buffer,
1419 * and add the new one as offload. This will take care of
1420 * guest tag-in-tag case as well as OVS rules that specify
1425 vlanTag
= (PNDIS_NET_BUFFER_LIST_8021Q_INFO
)(PVOID
*)&vlanTagValue
;
1426 vlan
= (struct ovs_action_push_vlan
*)NlAttrGet((const PNL_ATTR
)a
);
1427 vlanTag
->TagHeader
.VlanId
= ntohs(vlan
->vlan_tci
) & 0xfff;
1428 vlanTag
->TagHeader
.UserPriority
= ntohs(vlan
->vlan_tci
) >> 13;
1430 NET_BUFFER_LIST_INFO(ovsFwdCtx
.curNbl
,
1431 Ieee8021QNetBufferListInfo
) = vlanTagValue
;
1436 case OVS_ACTION_ATTR_POP_VLAN
:
1438 if (ovsFwdCtx
.destPortsSizeOut
> 0 || ovsFwdCtx
.tunnelTxNic
!= NULL
1439 || ovsFwdCtx
.tunnelRxNic
!= NULL
) {
1440 status
= OvsOutputBeforeSetAction(&ovsFwdCtx
);
1441 if (status
!= NDIS_STATUS_SUCCESS
) {
1442 dropReason
= L
"OVS-adding destination failed";
1447 if (NET_BUFFER_LIST_INFO(ovsFwdCtx
.curNbl
,
1448 Ieee8021QNetBufferListInfo
) != 0) {
1449 NET_BUFFER_LIST_INFO(ovsFwdCtx
.curNbl
,
1450 Ieee8021QNetBufferListInfo
) = 0;
1453 * The VLAN tag is inserted into the packet buffer. Pop the tag
1454 * by packet buffer modification.
1456 status
= OvsPopVlanInPktBuf(&ovsFwdCtx
);
1457 if (status
!= NDIS_STATUS_SUCCESS
) {
1458 dropReason
= L
"OVS-pop vlan action failed";
1465 case OVS_ACTION_ATTR_USERSPACE
:
1467 PNL_ATTR userdataAttr
;
1469 POVS_PACKET_QUEUE_ELEM elem
;
1470 UINT32 queueId
= OVS_DEFAULT_PACKET_QUEUE
;
1471 //XXX confusing that portNo is actually portId for external port.
1472 BOOLEAN isRecv
= (portNo
== switchContext
->externalPortId
)
1473 || OvsIsTunnelVportNo(portNo
);
1475 queueAttr
= NlAttrFindNested(a
, OVS_USERSPACE_ATTR_PID
);
1476 userdataAttr
= NlAttrFindNested(a
, OVS_USERSPACE_ATTR_USERDATA
);
1478 elem
= OvsCreateQueuePacket(queueId
, (PVOID
)userdataAttr
,
1479 userdataAttr
->nlaLen
,
1480 OVS_PACKET_CMD_ACTION
,
1481 portNo
, (OvsIPv4TunnelKey
*)&key
->tunKey
,
1483 NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx
.curNbl
),
1487 LIST_ENTRY missedPackets
;
1488 InitializeListHead(&missedPackets
);
1489 InsertTailList(&missedPackets
, &elem
->link
);
1490 OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE
, &missedPackets
, 1);
1491 dropReason
= L
"OVS-Completed since packet was copied to "
1494 dropReason
= L
"OVS-Dropped due to failure to queue to "
1500 case OVS_ACTION_ATTR_SET
:
1502 if (ovsFwdCtx
.destPortsSizeOut
> 0 || ovsFwdCtx
.tunnelTxNic
!= NULL
1503 || ovsFwdCtx
.tunnelRxNic
!= NULL
) {
1504 status
= OvsOutputBeforeSetAction(&ovsFwdCtx
);
1505 if (status
!= NDIS_STATUS_SUCCESS
) {
1506 dropReason
= L
"OVS-adding destination failed";
1511 status
= OvsExecuteSetAction(&ovsFwdCtx
, key
, hash
,
1512 (const PNL_ATTR
)NlAttrGet
1513 ((const PNL_ATTR
)a
));
1514 if (status
!= NDIS_STATUS_SUCCESS
) {
1515 dropReason
= L
"OVS-set action failed";
1520 case OVS_ACTION_ATTR_SAMPLE
:
1522 case OVS_ACTION_ATTR_UNSPEC
:
1523 case __OVS_ACTION_ATTR_MAX
:
1529 if (ovsFwdCtx
.destPortsSizeOut
> 0 || ovsFwdCtx
.tunnelTxNic
!= NULL
1530 || ovsFwdCtx
.tunnelRxNic
!= NULL
) {
1531 status
= OvsOutputForwardingCtx(&ovsFwdCtx
);
1532 ASSERT(ovsFwdCtx
.curNbl
== NULL
);
1535 ASSERT(ovsFwdCtx
.destPortsSizeOut
== 0);
1536 ASSERT(ovsFwdCtx
.tunnelRxNic
== NULL
);
1537 ASSERT(ovsFwdCtx
.tunnelTxNic
== NULL
);
1541 * If curNbl != NULL, it implies the NBL has not been not freed up so far.
1543 if (ovsFwdCtx
.curNbl
) {
1544 OvsCompleteNBLForwardingCtx(&ovsFwdCtx
, dropReason
);