2 * Copyright (c) 2014, 2016 VMware, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
20 #include "Conntrack.h"
37 #include "IpFragment.h"
42 #define OVS_DBG_MOD OVS_DBG_ACTION
44 #define OVS_DEST_PORTS_ARRAY_MIN_SIZE 2
46 typedef struct _OVS_ACTION_STATS
{
58 UINT32 failedFlowMiss
;
60 UINT32 failedFlowExtract
;
65 UINT32 cannotGrowDest
;
67 UINT32 failedChecksum
;
68 UINT32 deferredActionsQueueFull
;
69 UINT32 deferredActionsExecLimit
;
70 } OVS_ACTION_STATS
, *POVS_ACTION_STATS
;
72 OVS_ACTION_STATS ovsActionStats
;
75 * --------------------------------------------------------------------------
76 * OvsInitForwardingCtx --
77 * Function to init/re-init the 'ovsFwdCtx' context as the actions pipeline
81 * NDIS_STATUS_SUCCESS on success
82 * Other NDIS_STATUS upon failure. Upon failure, it is safe to call
83 * OvsCompleteNBLForwardingCtx(), since 'ovsFwdCtx' has been initialized
84 * enough for OvsCompleteNBLForwardingCtx() to do its work.
85 * --------------------------------------------------------------------------
87 static __inline NDIS_STATUS
88 OvsInitForwardingCtx(OvsForwardingContext
*ovsFwdCtx
,
89 POVS_SWITCH_CONTEXT switchContext
,
90 PNET_BUFFER_LIST curNbl
,
93 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail
,
94 OvsCompletionList
*completionList
,
95 OVS_PACKET_HDR_INFO
*layers
,
96 BOOLEAN resetTunnelInfo
)
99 ASSERT(switchContext
);
104 * Set values for curNbl and switchContext so upon failures, we have enough
105 * information to do cleanup.
107 ovsFwdCtx
->curNbl
= curNbl
;
108 ovsFwdCtx
->switchContext
= switchContext
;
109 ovsFwdCtx
->completionList
= completionList
;
110 ovsFwdCtx
->fwdDetail
= fwdDetail
;
112 if (fwdDetail
->NumAvailableDestinations
> 0) {
114 * XXX: even though MSDN says GetNetBufferListDestinations() returns
115 * NDIS_STATUS, the header files say otherwise.
117 switchContext
->NdisSwitchHandlers
.GetNetBufferListDestinations(
118 switchContext
->NdisSwitchContext
, curNbl
,
119 &ovsFwdCtx
->destinationPorts
);
121 ASSERT(ovsFwdCtx
->destinationPorts
);
122 /* Ensure that none of the elements are consumed yet. */
123 ASSERT(ovsFwdCtx
->destinationPorts
->NumElements
==
124 fwdDetail
->NumAvailableDestinations
);
126 ovsFwdCtx
->destinationPorts
= NULL
;
128 ovsFwdCtx
->destPortsSizeIn
= fwdDetail
->NumAvailableDestinations
;
129 ovsFwdCtx
->destPortsSizeOut
= 0;
130 ovsFwdCtx
->srcVportNo
= srcVportNo
;
131 ovsFwdCtx
->sendFlags
= sendFlags
;
133 ovsFwdCtx
->layers
= *layers
;
135 RtlZeroMemory(&ovsFwdCtx
->layers
, sizeof ovsFwdCtx
->layers
);
137 if (resetTunnelInfo
) {
138 ovsFwdCtx
->tunnelTxNic
= NULL
;
139 ovsFwdCtx
->tunnelRxNic
= NULL
;
140 RtlZeroMemory(&ovsFwdCtx
->tunKey
, sizeof ovsFwdCtx
->tunKey
);
143 return NDIS_STATUS_SUCCESS
;
147 * --------------------------------------------------------------------------
148 * OvsDoFragmentNbl --
149 * Utility function to Fragment nbl based on mru.
150 * --------------------------------------------------------------------------
153 OvsDoFragmentNbl(OvsForwardingContext
*ovsFwdCtx
, UINT16 mru
)
155 PNET_BUFFER_LIST fragNbl
= NULL
;
156 fragNbl
= OvsFragmentNBL(ovsFwdCtx
->switchContext
,
158 &(ovsFwdCtx
->layers
),
161 if (fragNbl
!= NULL
) {
162 OvsCompleteNBL(ovsFwdCtx
->switchContext
, ovsFwdCtx
->curNbl
, TRUE
);
163 ovsFwdCtx
->curNbl
= fragNbl
;
165 OVS_LOG_INFO("Fragment NBL failed for MRU = %u", mru
);
170 * --------------------------------------------------------------------------
171 * OvsDetectTunnelRxPkt --
172 * Utility function for an RX packet to detect its tunnel type.
175 * True - if the tunnel type was detected.
176 * False - if not a tunnel packet or tunnel type not supported.
177 * --------------------------------------------------------------------------
179 static __inline BOOLEAN
180 OvsDetectTunnelRxPkt(OvsForwardingContext
*ovsFwdCtx
,
181 const OvsFlowKey
*flowKey
)
183 POVS_VPORT_ENTRY tunnelVport
= NULL
;
185 /* XXX: we should also check for the length of the UDP payload to pick
186 * packets only if they are at least VXLAN header size.
190 * For some of the tunnel types such as GRE, the dstPort is not applicable
191 * since GRE does not have a L4 port. We use '0' for convenience.
193 if (!flowKey
->ipKey
.nwFrag
) {
194 UINT16 dstPort
= htons(flowKey
->ipKey
.l4
.tpDst
);
196 ASSERT(flowKey
->ipKey
.nwProto
!= IPPROTO_GRE
|| dstPort
== 0);
199 OvsFindTunnelVportByDstPortAndNWProto(ovsFwdCtx
->switchContext
,
201 flowKey
->ipKey
.nwProto
);
203 switch(tunnelVport
->ovsType
) {
204 case OVS_VPORT_TYPE_STT
:
205 ovsActionStats
.rxStt
++;
207 case OVS_VPORT_TYPE_VXLAN
:
208 ovsActionStats
.rxVxlan
++;
210 case OVS_VPORT_TYPE_GENEVE
:
211 ovsActionStats
.rxGeneve
++;
213 case OVS_VPORT_TYPE_GRE
:
214 ovsActionStats
.rxGre
++;
220 // We might get tunnel packets even before the tunnel gets initialized.
222 ASSERT(ovsFwdCtx
->tunnelRxNic
== NULL
);
223 ovsFwdCtx
->tunnelRxNic
= tunnelVport
;
231 * --------------------------------------------------------------------------
232 * OvsDetectTunnelPkt --
233 * Utility function to detect if a packet is to be subjected to
234 * tunneling (Tx) or de-tunneling (Rx). Various factors such as source
235 * port, destination port, packet contents, and previously setup tunnel
239 * True - If the packet is to be subjected to tunneling.
240 * In case of invalid tunnel context, the tunneling functionality is
241 * a no-op and is completed within this function itself by consuming
242 * all of the tunneling context.
243 * False - If not a tunnel packet or tunnel type not supported. Caller should
244 * process the packet as a non-tunnel packet.
245 * --------------------------------------------------------------------------
247 static __inline BOOLEAN
248 OvsDetectTunnelPkt(OvsForwardingContext
*ovsFwdCtx
,
249 const POVS_VPORT_ENTRY dstVport
,
250 const OvsFlowKey
*flowKey
)
252 if (OvsIsInternalVportType(dstVport
->ovsType
)) {
255 * The source of NBL during tunneling Rx could be the external
256 * port or if it is being executed from userspace, the source port is
259 BOOLEAN validSrcPort
=
260 (OvsIsExternalVportByPortId(ovsFwdCtx
->switchContext
,
261 ovsFwdCtx
->fwdDetail
->SourcePortId
)) ||
262 (ovsFwdCtx
->fwdDetail
->SourcePortId
==
263 NDIS_SWITCH_DEFAULT_PORT_ID
);
265 if (validSrcPort
&& OvsDetectTunnelRxPkt(ovsFwdCtx
, flowKey
)) {
266 ASSERT(ovsFwdCtx
->tunnelTxNic
== NULL
);
267 ASSERT(ovsFwdCtx
->tunnelRxNic
!= NULL
);
270 } else if (OvsIsTunnelVportType(dstVport
->ovsType
)) {
271 ASSERT(ovsFwdCtx
->tunnelRxNic
== NULL
);
275 * The destination port is a tunnel port. Encapsulation must be
276 * performed only on packets that originate from:
278 * - a bridge-internal port (packets generated from userspace)
281 * If the packet will not be encapsulated, consume the tunnel context
284 if (ovsFwdCtx
->srcVportNo
!= OVS_DPPORT_NUMBER_INVALID
) {
286 POVS_VPORT_ENTRY vport
= OvsFindVportByPortNo(
287 ovsFwdCtx
->switchContext
, ovsFwdCtx
->srcVportNo
);
290 (vport
->ovsType
!= OVS_VPORT_TYPE_NETDEV
&&
291 vport
->ovsType
!= OVS_VPORT_TYPE_INTERNAL
&&
292 !OvsIsTunnelVportType(vport
->ovsType
))) {
293 ovsFwdCtx
->tunKey
.dst
= 0;
297 /* Tunnel the packet only if tunnel context is set. */
298 if (ovsFwdCtx
->tunKey
.dst
!= 0) {
299 switch(dstVport
->ovsType
) {
300 case OVS_VPORT_TYPE_GRE
:
301 ovsActionStats
.txGre
++;
303 case OVS_VPORT_TYPE_VXLAN
:
304 ovsActionStats
.txVxlan
++;
306 case OVS_VPORT_TYPE_STT
:
307 ovsActionStats
.txStt
++;
309 case OVS_VPORT_TYPE_GENEVE
:
310 ovsActionStats
.txGeneve
++;
313 ovsFwdCtx
->tunnelTxNic
= dstVport
;
324 * --------------------------------------------------------------------------
326 * Add the specified destination vport into the forwarding context. If the
327 * vport is a VIF/external port, it is added directly to the NBL. If it is
328 * a tunneling port, it is NOT added to the NBL.
331 * NDIS_STATUS_SUCCESS on success
332 * Other NDIS_STATUS upon failure.
333 * --------------------------------------------------------------------------
335 static __inline NDIS_STATUS
336 OvsAddPorts(OvsForwardingContext
*ovsFwdCtx
,
338 NDIS_SWITCH_PORT_ID dstPortId
,
339 BOOLEAN preserveVLAN
,
340 BOOLEAN preservePriority
)
342 POVS_VPORT_ENTRY vport
;
343 PNDIS_SWITCH_PORT_DESTINATION fwdPort
;
345 POVS_SWITCH_CONTEXT switchContext
= ovsFwdCtx
->switchContext
;
348 * We hold the dispatch lock that protects the list of vports, so vports
349 * validated here can be added as destinations safely before we call into
352 * Some of the vports can be tunnelled ports as well in which case
353 * they should be added to a separate list of tunnelled destination ports
354 * instead of the VIF ports. The context for the tunnel is settable
355 * in OvsForwardingContext.
357 vport
= OvsFindVportByPortNo(ovsFwdCtx
->switchContext
, dstPortId
);
358 if (vport
== NULL
|| vport
->ovsState
!= OVS_STATE_CONNECTED
) {
360 * There may be some latency between a port disappearing, and userspace
361 * updating the recalculated flows. In the meantime, handle invalid
364 ovsActionStats
.noVport
++;
365 return NDIS_STATUS_SUCCESS
;
367 ASSERT(vport
->nicState
== NdisSwitchNicStateConnected
);
368 vport
->stats
.txPackets
++;
369 vport
->stats
.txBytes
+=
370 NET_BUFFER_DATA_LENGTH(NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx
->curNbl
));
372 if (OvsDetectTunnelPkt(ovsFwdCtx
, vport
, flowKey
)) {
373 return NDIS_STATUS_SUCCESS
;
376 if (ovsFwdCtx
->destPortsSizeOut
== ovsFwdCtx
->destPortsSizeIn
) {
377 if (ovsFwdCtx
->destPortsSizeIn
== 0) {
378 ASSERT(ovsFwdCtx
->destinationPorts
== NULL
);
379 ASSERT(ovsFwdCtx
->fwdDetail
->NumAvailableDestinations
== 0);
381 switchContext
->NdisSwitchHandlers
.GrowNetBufferListDestinations(
382 switchContext
->NdisSwitchContext
, ovsFwdCtx
->curNbl
,
383 OVS_DEST_PORTS_ARRAY_MIN_SIZE
,
384 &ovsFwdCtx
->destinationPorts
);
385 if (status
!= NDIS_STATUS_SUCCESS
) {
386 ovsActionStats
.cannotGrowDest
++;
389 ovsFwdCtx
->destPortsSizeIn
=
390 ovsFwdCtx
->fwdDetail
->NumAvailableDestinations
;
391 ASSERT(ovsFwdCtx
->destinationPorts
);
393 ASSERT(ovsFwdCtx
->destinationPorts
!= NULL
);
396 * A ULONG value that specifies the total number of
397 * NDIS_SWITCH_PORT_DESTINATION elements in the
398 * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure.
401 * A ULONG value that specifies the number of
402 * NDIS_SWITCH_PORT_DESTINATION elements in the
403 * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure that
404 * specify port destinations.
406 * NumAvailableDestinations:
407 * A value that specifies the number of unused extensible switch
408 * destination ports elements within an NET_BUFFER_LIST structure.
410 ASSERT(ovsFwdCtx
->destinationPorts
->NumElements
==
411 ovsFwdCtx
->destPortsSizeIn
);
412 ASSERT(ovsFwdCtx
->destinationPorts
->NumDestinations
==
413 ovsFwdCtx
->destPortsSizeOut
-
414 ovsFwdCtx
->fwdDetail
->NumAvailableDestinations
);
415 ASSERT(ovsFwdCtx
->fwdDetail
->NumAvailableDestinations
> 0);
417 * Before we grow the array of destination ports, the current set
418 * of ports needs to be committed. Only the ports added since the
419 * last commit need to be part of the new update.
421 status
= switchContext
->NdisSwitchHandlers
.UpdateNetBufferListDestinations(
422 switchContext
->NdisSwitchContext
, ovsFwdCtx
->curNbl
,
423 ovsFwdCtx
->fwdDetail
->NumAvailableDestinations
,
424 ovsFwdCtx
->destinationPorts
);
425 if (status
!= NDIS_STATUS_SUCCESS
) {
426 ovsActionStats
.cannotGrowDest
++;
429 ASSERT(ovsFwdCtx
->destinationPorts
->NumElements
==
430 ovsFwdCtx
->destPortsSizeIn
);
431 ASSERT(ovsFwdCtx
->destinationPorts
->NumDestinations
==
432 ovsFwdCtx
->destPortsSizeOut
);
433 ASSERT(ovsFwdCtx
->fwdDetail
->NumAvailableDestinations
== 0);
435 status
= switchContext
->NdisSwitchHandlers
.GrowNetBufferListDestinations(
436 switchContext
->NdisSwitchContext
, ovsFwdCtx
->curNbl
,
437 ovsFwdCtx
->destPortsSizeIn
, &ovsFwdCtx
->destinationPorts
);
438 if (status
!= NDIS_STATUS_SUCCESS
) {
439 ovsActionStats
.cannotGrowDest
++;
442 ASSERT(ovsFwdCtx
->destinationPorts
!= NULL
);
443 ovsFwdCtx
->destPortsSizeIn
<<= 1;
447 ASSERT(ovsFwdCtx
->destPortsSizeOut
< ovsFwdCtx
->destPortsSizeIn
);
449 NDIS_SWITCH_PORT_DESTINATION_AT_ARRAY_INDEX(ovsFwdCtx
->destinationPorts
,
450 ovsFwdCtx
->destPortsSizeOut
);
452 fwdPort
->PortId
= vport
->portId
;
453 fwdPort
->NicIndex
= vport
->nicIndex
;
454 fwdPort
->IsExcluded
= 0;
455 fwdPort
->PreserveVLAN
= preserveVLAN
;
456 fwdPort
->PreservePriority
= preservePriority
;
457 ovsFwdCtx
->destPortsSizeOut
+= 1;
459 return NDIS_STATUS_SUCCESS
;
464 * --------------------------------------------------------------------------
465 * OvsClearTunTxCtx --
466 * Utility function to clear tx tunneling context.
467 * --------------------------------------------------------------------------
470 OvsClearTunTxCtx(OvsForwardingContext
*ovsFwdCtx
)
472 ovsFwdCtx
->tunnelTxNic
= NULL
;
473 ovsFwdCtx
->tunKey
.dst
= 0;
478 * --------------------------------------------------------------------------
479 * OvsClearTunRxCtx --
480 * Utility function to clear rx tunneling context.
481 * --------------------------------------------------------------------------
484 OvsClearTunRxCtx(OvsForwardingContext
*ovsFwdCtx
)
486 ovsFwdCtx
->tunnelRxNic
= NULL
;
487 ovsFwdCtx
->tunKey
.dst
= 0;
492 * --------------------------------------------------------------------------
493 * OvsCompleteNBLForwardingCtx --
494 * This utility function is responsible for freeing/completing an NBL - either
495 * by adding it to a completion list or by freeing it.
498 * It also resets the necessary fields in 'ovsFwdCtx'.
499 * --------------------------------------------------------------------------
502 OvsCompleteNBLForwardingCtx(OvsForwardingContext
*ovsFwdCtx
,
505 NDIS_STRING filterReason
;
507 RtlInitUnicodeString(&filterReason
, dropReason
);
508 if (ovsFwdCtx
->completionList
) {
509 OvsAddPktCompletionList(ovsFwdCtx
->completionList
, TRUE
,
510 ovsFwdCtx
->fwdDetail
->SourcePortId
, ovsFwdCtx
->curNbl
, 1,
512 ovsFwdCtx
->curNbl
= NULL
;
514 /* If there is no completionList, we assume this is ovs created NBL */
515 ovsFwdCtx
->curNbl
= OvsCompleteNBL(ovsFwdCtx
->switchContext
,
516 ovsFwdCtx
->curNbl
, TRUE
);
517 ASSERT(ovsFwdCtx
->curNbl
== NULL
);
519 /* XXX: these can be made debug only to save cycles. Ideally the pipeline
520 * using these fields should reset the values at the end of the pipeline. */
521 ovsFwdCtx
->destPortsSizeOut
= 0;
522 ovsFwdCtx
->tunnelTxNic
= NULL
;
523 ovsFwdCtx
->tunnelRxNic
= NULL
;
527 * --------------------------------------------------------------------------
528 * OvsDoFlowLookupOutput --
529 * Function to be used for the second stage of a tunneling workflow, ie.:
530 * - On the encapsulated packet on Tx path, to do a flow extract, flow
531 * lookup and excuting the actions.
532 * - On the decapsulated packet on Rx path, to do a flow extract, flow
533 * lookup and excuting the actions.
535 * XXX: It is assumed that the NBL in 'ovsFwdCtx' is owned by OVS. This is
536 * until the new buffer management framework is adopted.
539 * The NBL in 'ovsFwdCtx' is consumed.
540 * --------------------------------------------------------------------------
542 static __inline NDIS_STATUS
543 OvsDoFlowLookupOutput(OvsForwardingContext
*ovsFwdCtx
)
545 OvsFlowKey key
= { 0 };
546 OvsFlow
*flow
= NULL
;
548 NDIS_STATUS status
= NDIS_STATUS_SUCCESS
;
549 POVS_VPORT_ENTRY vport
=
550 OvsFindVportByPortNo(ovsFwdCtx
->switchContext
, ovsFwdCtx
->srcVportNo
);
551 if (vport
== NULL
|| vport
->ovsState
!= OVS_STATE_CONNECTED
) {
552 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
553 L
"OVS-Dropped due to internal/tunnel port removal");
554 ovsActionStats
.noVport
++;
555 return NDIS_STATUS_SUCCESS
;
557 ASSERT(vport
->nicState
== NdisSwitchNicStateConnected
);
559 /* Assert that in the Rx direction, key is always setup. */
560 ASSERT(ovsFwdCtx
->tunnelRxNic
== NULL
|| ovsFwdCtx
->tunKey
.dst
!= 0);
562 OvsExtractFlow(ovsFwdCtx
->curNbl
, ovsFwdCtx
->srcVportNo
,
563 &key
, &ovsFwdCtx
->layers
,
564 ovsFwdCtx
->tunKey
.dst
!= 0 ? &ovsFwdCtx
->tunKey
: NULL
);
565 if (status
!= NDIS_STATUS_SUCCESS
) {
566 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
567 L
"OVS-Flow extract failed");
568 ovsActionStats
.failedFlowExtract
++;
572 flow
= OvsLookupFlow(&ovsFwdCtx
->switchContext
->datapath
, &key
, &hash
, FALSE
);
574 OvsFlowUsed(flow
, ovsFwdCtx
->curNbl
, &ovsFwdCtx
->layers
);
575 ovsFwdCtx
->switchContext
->datapath
.hits
++;
576 status
= OvsDoExecuteActions(ovsFwdCtx
->switchContext
,
577 ovsFwdCtx
->completionList
,
579 ovsFwdCtx
->srcVportNo
,
580 ovsFwdCtx
->sendFlags
,
581 &key
, &hash
, &ovsFwdCtx
->layers
,
582 flow
->actions
, flow
->actionsLen
);
583 ovsFwdCtx
->curNbl
= NULL
;
585 LIST_ENTRY missedPackets
;
587 ovsFwdCtx
->switchContext
->datapath
.misses
++;
588 InitializeListHead(&missedPackets
);
589 status
= OvsCreateAndAddPackets(NULL
, 0, OVS_PACKET_CMD_MISS
, vport
,
590 &key
,ovsFwdCtx
->curNbl
,
591 FALSE
, &ovsFwdCtx
->layers
,
592 ovsFwdCtx
->switchContext
, &missedPackets
, &num
);
594 OvsQueuePackets(&missedPackets
, num
);
596 if (status
== NDIS_STATUS_SUCCESS
) {
597 /* Complete the packet since it was copied to user buffer. */
598 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
599 L
"OVS-Dropped since packet was copied to userspace");
600 ovsActionStats
.flowMiss
++;
601 status
= NDIS_STATUS_SUCCESS
;
603 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
604 L
"OVS-Dropped due to failure to queue to userspace");
605 status
= NDIS_STATUS_FAILURE
;
606 ovsActionStats
.failedFlowMiss
++;
614 * --------------------------------------------------------------------------
616 * The start function for Tx tunneling - encapsulates the packet, and
617 * outputs the packet on the PIF bridge.
620 * The NBL in 'ovsFwdCtx' is consumed.
621 * --------------------------------------------------------------------------
623 static __inline NDIS_STATUS
624 OvsTunnelPortTx(OvsForwardingContext
*ovsFwdCtx
)
626 NDIS_STATUS status
= NDIS_STATUS_FAILURE
;
627 PNET_BUFFER_LIST newNbl
= NULL
;
629 NDIS_SWITCH_NIC_INDEX srcNicIndex
;
630 NDIS_SWITCH_PORT_ID srcPortId
;
631 POVS_BUFFER_CONTEXT ctx
;
634 * Setup the source port to be the internal port to as to facilitate the
635 * second OvsLookupFlow.
637 if (ovsFwdCtx
->switchContext
->countInternalVports
<= 0 ||
638 ovsFwdCtx
->switchContext
->virtualExternalVport
== NULL
) {
639 OvsClearTunTxCtx(ovsFwdCtx
);
640 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
641 L
"OVS-Dropped since either internal or external port is absent");
642 return NDIS_STATUS_FAILURE
;
645 ctx
= (POVS_BUFFER_CONTEXT
)NET_BUFFER_LIST_CONTEXT_DATA_START(ovsFwdCtx
->curNbl
);
647 OvsDoFragmentNbl(ovsFwdCtx
, ctx
->mru
);
649 OVS_FWD_INFO switchFwdInfo
= { 0 };
650 /* Apply the encapsulation. The encapsulation will not consume the NBL. */
651 switch(ovsFwdCtx
->tunnelTxNic
->ovsType
) {
652 case OVS_VPORT_TYPE_GRE
:
653 status
= OvsEncapGre(ovsFwdCtx
->tunnelTxNic
, ovsFwdCtx
->curNbl
,
654 &ovsFwdCtx
->tunKey
, ovsFwdCtx
->switchContext
,
655 &ovsFwdCtx
->layers
, &newNbl
, &switchFwdInfo
);
657 case OVS_VPORT_TYPE_VXLAN
:
658 status
= OvsEncapVxlan(ovsFwdCtx
->tunnelTxNic
, ovsFwdCtx
->curNbl
,
659 &ovsFwdCtx
->tunKey
, ovsFwdCtx
->switchContext
,
660 &ovsFwdCtx
->layers
, &newNbl
, &switchFwdInfo
);
662 case OVS_VPORT_TYPE_STT
:
663 status
= OvsEncapStt(ovsFwdCtx
->tunnelTxNic
, ovsFwdCtx
->curNbl
,
664 &ovsFwdCtx
->tunKey
, ovsFwdCtx
->switchContext
,
665 &ovsFwdCtx
->layers
, &newNbl
, &switchFwdInfo
);
667 case OVS_VPORT_TYPE_GENEVE
:
668 status
= OvsEncapGeneve(ovsFwdCtx
->tunnelTxNic
, ovsFwdCtx
->curNbl
,
669 &ovsFwdCtx
->tunKey
, ovsFwdCtx
->switchContext
,
670 &ovsFwdCtx
->layers
, &newNbl
, &switchFwdInfo
);
673 ASSERT(! "Tx: Unhandled tunnel type");
676 /* Reset the tunnel context so that it doesn't get used after this point. */
677 OvsClearTunTxCtx(ovsFwdCtx
);
679 if (status
== NDIS_STATUS_SUCCESS
&& switchFwdInfo
.vport
!= NULL
) {
682 * Save the 'srcVportNo', 'srcPortId', 'srcNicIndex' so that
683 * this can be applied to the new NBL later on.
685 srcVportNo
= switchFwdInfo
.vport
->portNo
;
686 srcPortId
= switchFwdInfo
.vport
->portId
;
687 srcNicIndex
= switchFwdInfo
.vport
->nicIndex
;
689 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
690 L
"Complete after cloning NBL for encapsulation");
691 status
= OvsInitForwardingCtx(ovsFwdCtx
, ovsFwdCtx
->switchContext
,
692 newNbl
, srcVportNo
, 0,
693 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl
),
694 ovsFwdCtx
->completionList
,
695 &ovsFwdCtx
->layers
, FALSE
);
696 ovsFwdCtx
->curNbl
= newNbl
;
697 /* Update the forwarding detail for the new NBL */
698 ovsFwdCtx
->fwdDetail
->SourcePortId
= srcPortId
;
699 ovsFwdCtx
->fwdDetail
->SourceNicIndex
= srcNicIndex
;
700 status
= OvsDoFlowLookupOutput(ovsFwdCtx
);
701 ASSERT(ovsFwdCtx
->curNbl
== NULL
);
704 * XXX: Temporary freeing of the packet until we register a
705 * callback to IP helper.
707 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
708 L
"OVS-Dropped due to encap failure");
709 ovsActionStats
.failedEncap
++;
710 status
= NDIS_STATUS_SUCCESS
;
717 * --------------------------------------------------------------------------
719 * Decapsulate the incoming NBL based on the tunnel type and goes through
720 * the flow lookup for the inner packet.
722 * Note: IP checksum is validate here, but L4 checksum validation needs
723 * to be done by the corresponding tunnel types.
726 * The NBL in 'ovsFwdCtx' is consumed.
727 * --------------------------------------------------------------------------
729 static __inline NDIS_STATUS
730 OvsTunnelPortRx(OvsForwardingContext
*ovsFwdCtx
)
732 NDIS_STATUS status
= NDIS_STATUS_SUCCESS
;
733 PNET_BUFFER_LIST newNbl
= NULL
;
734 POVS_VPORT_ENTRY tunnelRxVport
= ovsFwdCtx
->tunnelRxNic
;
735 PCWSTR dropReason
= L
"OVS-dropped due to new decap packet";
737 if (OvsValidateIPChecksum(ovsFwdCtx
->curNbl
, &ovsFwdCtx
->layers
)
738 != NDIS_STATUS_SUCCESS
) {
739 ovsActionStats
.failedChecksum
++;
740 OVS_LOG_INFO("Packet dropped due to IP checksum failure.");
745 * Decap port functions should return a new NBL if it was copied, and
746 * this new NBL should be setup as the ovsFwdCtx->curNbl.
749 switch(tunnelRxVport
->ovsType
) {
750 case OVS_VPORT_TYPE_GRE
:
751 status
= OvsDecapGre(ovsFwdCtx
->switchContext
, ovsFwdCtx
->curNbl
,
752 &ovsFwdCtx
->tunKey
, &newNbl
);
754 case OVS_VPORT_TYPE_VXLAN
:
755 status
= OvsDecapVxlan(ovsFwdCtx
->switchContext
, ovsFwdCtx
->curNbl
,
756 &ovsFwdCtx
->tunKey
, &newNbl
);
758 case OVS_VPORT_TYPE_STT
:
759 status
= OvsDecapStt(ovsFwdCtx
->switchContext
, ovsFwdCtx
->curNbl
,
760 &ovsFwdCtx
->tunKey
, &newNbl
);
761 if (status
== NDIS_STATUS_SUCCESS
&& newNbl
== NULL
) {
762 /* This was an STT-LSO Fragment */
763 dropReason
= L
"OVS-STT segment is cached";
766 case OVS_VPORT_TYPE_GENEVE
:
767 status
= OvsDecapGeneve(ovsFwdCtx
->switchContext
, ovsFwdCtx
->curNbl
,
768 &ovsFwdCtx
->tunKey
, &newNbl
);
771 OVS_LOG_ERROR("Rx: Unhandled tunnel type: %d\n",
772 tunnelRxVport
->ovsType
);
773 ASSERT(! "Rx: Unhandled tunnel type");
774 status
= NDIS_STATUS_NOT_SUPPORTED
;
777 if (status
!= NDIS_STATUS_SUCCESS
) {
778 ovsActionStats
.failedDecap
++;
783 * tunnelRxNic and other fields will be cleared, re-init the context
786 OvsCompleteNBLForwardingCtx(ovsFwdCtx
, dropReason
);
789 /* Decapsulated packet is in a new NBL */
790 ovsFwdCtx
->tunnelRxNic
= tunnelRxVport
;
791 OvsInitForwardingCtx(ovsFwdCtx
, ovsFwdCtx
->switchContext
,
792 newNbl
, tunnelRxVport
->portNo
, 0,
793 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl
),
794 ovsFwdCtx
->completionList
,
795 &ovsFwdCtx
->layers
, FALSE
);
798 * Set the NBL's SourcePortId and SourceNicIndex to default values to
799 * keep NDIS happy when we forward the packet.
801 ovsFwdCtx
->fwdDetail
->SourcePortId
= NDIS_SWITCH_DEFAULT_PORT_ID
;
802 ovsFwdCtx
->fwdDetail
->SourceNicIndex
= 0;
804 status
= OvsDoFlowLookupOutput(ovsFwdCtx
);
806 ASSERT(ovsFwdCtx
->curNbl
== NULL
);
807 OvsClearTunRxCtx(ovsFwdCtx
);
812 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
813 L
"OVS-dropped due to decap failure");
814 OvsClearTunRxCtx(ovsFwdCtx
);
820 * --------------------------------------------------------------------------
821 * OvsOutputForwardingCtx --
822 * This function outputs an NBL to NDIS or to a tunneling pipeline based on
823 * the ports added so far into 'ovsFwdCtx'.
826 * This function consumes the NBL - either by forwarding it successfully to
827 * NDIS, or adding it to the completion list in 'ovsFwdCtx', or freeing it.
829 * Also makes sure that the list of destination ports - tunnel or otherwise is
831 * --------------------------------------------------------------------------
833 static __inline NDIS_STATUS
834 OvsOutputForwardingCtx(OvsForwardingContext
*ovsFwdCtx
)
836 NDIS_STATUS status
= STATUS_SUCCESS
;
837 POVS_SWITCH_CONTEXT switchContext
= ovsFwdCtx
->switchContext
;
839 POVS_BUFFER_CONTEXT ctx
;
842 * Handle the case where the some of the destination ports are tunneled
843 * ports - the non-tunneled ports get a unmodified copy of the NBL, and the
844 * tunneling pipeline starts when we output the packet to tunneled port.
846 if (ovsFwdCtx
->destPortsSizeOut
> 0) {
847 PNET_BUFFER_LIST newNbl
= NULL
;
849 UINT32 portsToUpdate
=
850 ovsFwdCtx
->fwdDetail
->NumAvailableDestinations
-
851 (ovsFwdCtx
->destPortsSizeIn
- ovsFwdCtx
->destPortsSizeOut
);
853 ASSERT(ovsFwdCtx
->destinationPorts
!= NULL
);
856 * Create a copy of the packet in order to do encap on it later. Also,
857 * don't copy the offload context since the encap'd packet has a
858 * different set of headers. This will change when we implement offloads
859 * before doing encapsulation.
861 if (ovsFwdCtx
->tunnelTxNic
!= NULL
|| ovsFwdCtx
->tunnelRxNic
!= NULL
) {
862 POVS_BUFFER_CONTEXT oldCtx
, newCtx
;
863 nb
= NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx
->curNbl
);
864 oldCtx
= (POVS_BUFFER_CONTEXT
)
865 NET_BUFFER_LIST_CONTEXT_DATA_START(ovsFwdCtx
->curNbl
);
866 newNbl
= OvsPartialCopyNBL(ovsFwdCtx
->switchContext
,
868 0, 0, TRUE
/*copy NBL info*/);
869 if (newNbl
== NULL
) {
870 status
= NDIS_STATUS_RESOURCES
;
871 ovsActionStats
.noCopiedNbl
++;
872 dropReason
= L
"Dropped due to failure to create NBL copy.";
875 newCtx
= (POVS_BUFFER_CONTEXT
)
876 NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl
);
877 newCtx
->mru
= oldCtx
->mru
;
880 /* It does not seem like we'll get here unless 'portsToUpdate' > 0. */
881 ASSERT(portsToUpdate
> 0);
882 status
= switchContext
->NdisSwitchHandlers
.UpdateNetBufferListDestinations(
883 switchContext
->NdisSwitchContext
, ovsFwdCtx
->curNbl
,
884 portsToUpdate
, ovsFwdCtx
->destinationPorts
);
885 if (status
!= NDIS_STATUS_SUCCESS
) {
886 OvsCompleteNBL(ovsFwdCtx
->switchContext
, newNbl
, TRUE
);
887 ovsActionStats
.cannotGrowDest
++;
888 dropReason
= L
"Dropped due to failure to update destinations.";
892 ctx
= (POVS_BUFFER_CONTEXT
)NET_BUFFER_LIST_CONTEXT_DATA_START(ovsFwdCtx
->curNbl
);
894 OvsDoFragmentNbl(ovsFwdCtx
, ctx
->mru
);
897 OvsSendNBLIngress(ovsFwdCtx
->switchContext
, ovsFwdCtx
->curNbl
,
898 ovsFwdCtx
->sendFlags
);
899 /* End this pipeline by resetting the corresponding context. */
900 ovsFwdCtx
->destPortsSizeOut
= 0;
901 ovsFwdCtx
->curNbl
= NULL
;
903 status
= OvsInitForwardingCtx(ovsFwdCtx
, ovsFwdCtx
->switchContext
,
904 newNbl
, ovsFwdCtx
->srcVportNo
, 0,
905 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl
),
906 ovsFwdCtx
->completionList
,
907 &ovsFwdCtx
->layers
, FALSE
);
908 if (status
!= NDIS_STATUS_SUCCESS
) {
909 dropReason
= L
"Dropped due to resouces.";
915 if (ovsFwdCtx
->tunnelTxNic
!= NULL
) {
916 status
= OvsTunnelPortTx(ovsFwdCtx
);
917 ASSERT(ovsFwdCtx
->tunnelTxNic
== NULL
);
918 ASSERT(ovsFwdCtx
->tunKey
.dst
== 0);
919 } else if (ovsFwdCtx
->tunnelRxNic
!= NULL
) {
920 status
= OvsTunnelPortRx(ovsFwdCtx
);
921 ASSERT(ovsFwdCtx
->tunnelRxNic
== NULL
);
922 ASSERT(ovsFwdCtx
->tunKey
.dst
== 0);
924 ASSERT(ovsFwdCtx
->curNbl
== NULL
);
929 if (status
!= NDIS_STATUS_SUCCESS
) {
930 OvsCompleteNBLForwardingCtx(ovsFwdCtx
, dropReason
);
938 * --------------------------------------------------------------------------
939 * OvsLookupFlowOutput --
940 * Utility function for external callers to do flow extract, lookup,
941 * actions execute on a given NBL.
943 * Note: If this is being used from a callback function, make sure that the
944 * arguments specified are still valid in the asynchronous context.
947 * This function consumes the NBL.
948 * --------------------------------------------------------------------------
951 OvsLookupFlowOutput(POVS_SWITCH_CONTEXT switchContext
,
953 PNET_BUFFER_LIST curNbl
,
954 POVS_VPORT_ENTRY internalVport
)
957 OvsForwardingContext ovsFwdCtx
;
959 /* XXX: make sure comp list was not a stack variable previously. */
960 OvsCompletionList
*completionList
= (OvsCompletionList
*)compList
;
963 * XXX: can internal port disappear while we are busy doing ARP resolution?
964 * It could, but will we get this callback from IP helper in that case. Need
967 ASSERT(switchContext
->countInternalVports
> 0);
968 status
= OvsInitForwardingCtx(&ovsFwdCtx
, switchContext
, curNbl
,
969 internalVport
->portNo
, 0,
970 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl
),
971 completionList
, NULL
, TRUE
);
972 if (status
!= NDIS_STATUS_SUCCESS
) {
973 OvsCompleteNBLForwardingCtx(&ovsFwdCtx
,
974 L
"OVS-Dropped due to resources");
980 * XXX: We need to acquire the dispatch lock and the datapath lock.
983 OvsDoFlowLookupOutput(&ovsFwdCtx
);
988 * --------------------------------------------------------------------------
989 * OvsOutputBeforeSetAction --
990 * Function to be called to complete one set of actions on an NBL, before
991 * we start the next one.
992 * --------------------------------------------------------------------------
994 static __inline NDIS_STATUS
995 OvsOutputBeforeSetAction(OvsForwardingContext
*ovsFwdCtx
)
997 PNET_BUFFER_LIST newNbl
;
1001 * Create a copy and work on the copy after this point. The original NBL is
1002 * forwarded. One reason to not use the copy for forwarding is that
1003 * ports have already been added to the original NBL, and it might be
1004 * inefficient/impossible to remove/re-add them to the copy. There's no
1005 * notion of removing the ports, the ports need to be marked as
1006 * "isExcluded". There's seems no real advantage to retaining the original
1007 * and sending out the copy instead.
1009 * XXX: We are copying the offload context here. This is to handle actions
1011 * outport, pop_vlan(), outport, push_vlan(), outport
1013 * copy size needs to include inner ether + IP + TCP, need to revisit
1014 * if we support IP options.
1015 * XXX Head room needs to include the additional encap.
1016 * XXX copySize check is not considering multiple NBs.
1018 newNbl
= OvsPartialCopyNBL(ovsFwdCtx
->switchContext
, ovsFwdCtx
->curNbl
,
1019 0, 0, TRUE
/*copy NBL info*/);
1021 ASSERT(ovsFwdCtx
->destPortsSizeOut
> 0 ||
1022 ovsFwdCtx
->tunnelTxNic
!= NULL
|| ovsFwdCtx
->tunnelRxNic
!= NULL
);
1024 /* Send the original packet out and save the original source port number */
1025 UINT32 tempVportNo
= ovsFwdCtx
->srcVportNo
;
1026 status
= OvsOutputForwardingCtx(ovsFwdCtx
);
1027 ASSERT(ovsFwdCtx
->curNbl
== NULL
);
1028 ASSERT(ovsFwdCtx
->destPortsSizeOut
== 0);
1029 ASSERT(ovsFwdCtx
->tunnelRxNic
== NULL
);
1030 ASSERT(ovsFwdCtx
->tunnelTxNic
== NULL
);
1032 /* If we didn't make a copy, can't continue. */
1033 if (newNbl
== NULL
) {
1034 ovsActionStats
.noCopiedNbl
++;
1035 return NDIS_STATUS_RESOURCES
;
1038 /* Finish the remaining actions with the new NBL */
1039 if (status
!= NDIS_STATUS_SUCCESS
) {
1040 OvsCompleteNBL(ovsFwdCtx
->switchContext
, newNbl
, TRUE
);
1042 status
= OvsInitForwardingCtx(ovsFwdCtx
, ovsFwdCtx
->switchContext
,
1043 newNbl
, tempVportNo
, 0,
1044 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl
),
1045 ovsFwdCtx
->completionList
,
1046 &ovsFwdCtx
->layers
, FALSE
);
1054 * --------------------------------------------------------------------------
1055 * OvsPopFieldInPacketBuf --
1056 * Function to pop a specified field of length 'shiftLength' located at
1057 * 'shiftOffset' from the Ethernet header. The data on the left of the
1058 * 'shiftOffset' is right shifted.
1060 * Returns a pointer to the new start in 'bufferData'.
1061 * --------------------------------------------------------------------------
1063 static __inline NDIS_STATUS
1064 OvsPopFieldInPacketBuf(OvsForwardingContext
*ovsFwdCtx
,
1072 UINT32 packetLen
, mdlLen
;
1073 PNET_BUFFER_LIST newNbl
;
1076 newNbl
= OvsPartialCopyNBL(ovsFwdCtx
->switchContext
, ovsFwdCtx
->curNbl
,
1077 0, 0, TRUE
/* copy NBL info */);
1079 ovsActionStats
.noCopiedNbl
++;
1080 return NDIS_STATUS_RESOURCES
;
1083 /* Complete the original NBL and create a copy to modify. */
1084 OvsCompleteNBLForwardingCtx(ovsFwdCtx
, L
"OVS-Dropped due to copy");
1086 status
= OvsInitForwardingCtx(ovsFwdCtx
, ovsFwdCtx
->switchContext
, newNbl
,
1087 ovsFwdCtx
->srcVportNo
, 0,
1088 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl
),
1089 NULL
, &ovsFwdCtx
->layers
, FALSE
);
1090 if (status
!= NDIS_STATUS_SUCCESS
) {
1091 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
1092 L
"Dropped due to resouces");
1093 return NDIS_STATUS_RESOURCES
;
1096 curNb
= NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx
->curNbl
);
1097 packetLen
= NET_BUFFER_DATA_LENGTH(curNb
);
1098 ASSERT(curNb
->Next
== NULL
);
1099 curMdl
= NET_BUFFER_CURRENT_MDL(curNb
);
1100 NdisQueryMdl(curMdl
, &bufferStart
, &mdlLen
, LowPagePriority
);
1102 return NDIS_STATUS_RESOURCES
;
1104 mdlLen
-= NET_BUFFER_CURRENT_MDL_OFFSET(curNb
);
1105 /* Bail out if L2 + shiftLength is not contiguous in the first buffer. */
1106 if (MIN(packetLen
, mdlLen
) < sizeof(EthHdr
) + shiftLength
) {
1108 return NDIS_STATUS_FAILURE
;
1110 bufferStart
+= NET_BUFFER_CURRENT_MDL_OFFSET(curNb
);
1111 /* XXX At the momemnt !bufferData means it should be treated as VLAN. We
1112 * should split the function and refactor. */
1114 EthHdr
*ethHdr
= (EthHdr
*)bufferStart
;
1115 /* If the frame is not VLAN make it a no op */
1116 if (ethHdr
->Type
!= ETH_TYPE_802_1PQ_NBO
) {
1117 return NDIS_STATUS_SUCCESS
;
1120 RtlMoveMemory(bufferStart
+ shiftLength
, bufferStart
, shiftOffset
);
1121 NdisAdvanceNetBufferDataStart(curNb
, shiftLength
, FALSE
, NULL
);
1124 *bufferData
= bufferStart
+ shiftLength
;
1127 return NDIS_STATUS_SUCCESS
;
1132 * --------------------------------------------------------------------------
1133 * OvsPopVlanInPktBuf --
1134 * Function to pop a VLAN tag when the tag is in the packet buffer.
1135 * --------------------------------------------------------------------------
1137 static __inline NDIS_STATUS
1138 OvsPopVlanInPktBuf(OvsForwardingContext
*ovsFwdCtx
)
1141 * Declare a dummy vlanTag structure since we need to compute the size
1142 * of shiftLength. The NDIS one is a unionized structure.
1144 NDIS_PACKET_8021Q_INFO vlanTag
= {0};
1145 UINT32 shiftLength
= sizeof(vlanTag
.TagHeader
);
1146 UINT32 shiftOffset
= sizeof(DL_EUI48
) + sizeof(DL_EUI48
);
1148 return OvsPopFieldInPacketBuf(ovsFwdCtx
, shiftOffset
, shiftLength
, NULL
);
1153 * --------------------------------------------------------------------------
1154 * OvsActionMplsPop --
1155 * Function to pop the first MPLS label from the current packet.
1156 * --------------------------------------------------------------------------
1158 static __inline NDIS_STATUS
1159 OvsActionMplsPop(OvsForwardingContext
*ovsFwdCtx
,
1163 OVS_PACKET_HDR_INFO
*layers
= &ovsFwdCtx
->layers
;
1164 EthHdr
*ethHdr
= NULL
;
1166 status
= OvsPopFieldInPacketBuf(ovsFwdCtx
, sizeof(*ethHdr
),
1167 MPLS_HLEN
, (PUINT8
*)ðHdr
);
1168 if (status
== NDIS_STATUS_SUCCESS
) {
1169 if (ethHdr
&& OvsEthertypeIsMpls(ethHdr
->Type
)) {
1170 ethHdr
->Type
= ethertype
;
1173 layers
->l3Offset
-= MPLS_HLEN
;
1174 layers
->l4Offset
-= MPLS_HLEN
;
1182 * --------------------------------------------------------------------------
1183 * OvsActionMplsPush --
1184 * Function to push the MPLS label into the current packet.
1185 * --------------------------------------------------------------------------
1187 static __inline NDIS_STATUS
1188 OvsActionMplsPush(OvsForwardingContext
*ovsFwdCtx
,
1189 const struct ovs_action_push_mpls
*mpls
)
1192 PNET_BUFFER curNb
= NULL
;
1194 PUINT8 bufferStart
= NULL
;
1195 OVS_PACKET_HDR_INFO
*layers
= &ovsFwdCtx
->layers
;
1196 EthHdr
*ethHdr
= NULL
;
1197 MPLSHdr
*mplsHdr
= NULL
;
1198 UINT32 mdlLen
= 0, curMdlOffset
= 0;
1199 PNET_BUFFER_LIST newNbl
;
1201 newNbl
= OvsPartialCopyNBL(ovsFwdCtx
->switchContext
, ovsFwdCtx
->curNbl
,
1202 layers
->l3Offset
, MPLS_HLEN
, TRUE
);
1204 ovsActionStats
.noCopiedNbl
++;
1205 return NDIS_STATUS_RESOURCES
;
1207 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
1208 L
"Complete after partial copy.");
1210 status
= OvsInitForwardingCtx(ovsFwdCtx
, ovsFwdCtx
->switchContext
,
1211 newNbl
, ovsFwdCtx
->srcVportNo
, 0,
1212 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl
),
1213 NULL
, &ovsFwdCtx
->layers
, FALSE
);
1214 if (status
!= NDIS_STATUS_SUCCESS
) {
1215 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
1216 L
"OVS-Dropped due to resources");
1217 return NDIS_STATUS_RESOURCES
;
1220 curNb
= NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx
->curNbl
);
1221 ASSERT(curNb
->Next
== NULL
);
1223 status
= NdisRetreatNetBufferDataStart(curNb
, MPLS_HLEN
, 0, NULL
);
1224 if (status
!= NDIS_STATUS_SUCCESS
) {
1228 curMdl
= NET_BUFFER_CURRENT_MDL(curNb
);
1229 NdisQueryMdl(curMdl
, &bufferStart
, &mdlLen
, LowPagePriority
);
1231 ovsActionStats
.noResource
++;
1232 return NDIS_STATUS_RESOURCES
;
1235 curMdlOffset
= NET_BUFFER_CURRENT_MDL_OFFSET(curNb
);
1236 mdlLen
-= curMdlOffset
;
1237 ASSERT(mdlLen
>= MPLS_HLEN
);
1239 ethHdr
= (EthHdr
*)(bufferStart
+ curMdlOffset
);
1240 RtlMoveMemory(ethHdr
, (UINT8
*)ethHdr
+ MPLS_HLEN
, sizeof(*ethHdr
));
1241 ethHdr
->Type
= mpls
->mpls_ethertype
;
1243 mplsHdr
= (MPLSHdr
*)(ethHdr
+ 1);
1244 mplsHdr
->lse
= mpls
->mpls_lse
;
1246 layers
->l3Offset
+= MPLS_HLEN
;
1247 layers
->l4Offset
+= MPLS_HLEN
;
1249 return NDIS_STATUS_SUCCESS
;
1253 *----------------------------------------------------------------------------
1254 * OvsUpdateEthHeader --
1255 * Updates the ethernet header in ovsFwdCtx.curNbl inline based on the
1257 *----------------------------------------------------------------------------
1259 static __inline NDIS_STATUS
1260 OvsUpdateEthHeader(OvsForwardingContext
*ovsFwdCtx
,
1261 const struct ovs_key_ethernet
*ethAttr
)
1267 UINT32 packetLen
, mdlLen
;
1269 curNb
= NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx
->curNbl
);
1270 ASSERT(curNb
->Next
== NULL
);
1271 packetLen
= NET_BUFFER_DATA_LENGTH(curNb
);
1272 curMdl
= NET_BUFFER_CURRENT_MDL(curNb
);
1273 NdisQueryMdl(curMdl
, &bufferStart
, &mdlLen
, LowPagePriority
);
1275 ovsActionStats
.noResource
++;
1276 return NDIS_STATUS_RESOURCES
;
1278 mdlLen
-= NET_BUFFER_CURRENT_MDL_OFFSET(curNb
);
1280 /* Bail out if the L2 header is not in a contiguous buffer. */
1281 if (MIN(packetLen
, mdlLen
) < sizeof *ethHdr
) {
1283 return NDIS_STATUS_FAILURE
;
1285 ethHdr
= (EthHdr
*)(bufferStart
+ NET_BUFFER_CURRENT_MDL_OFFSET(curNb
));
1287 RtlCopyMemory(ethHdr
->Destination
, ethAttr
->eth_dst
,
1288 sizeof ethHdr
->Destination
);
1289 RtlCopyMemory(ethHdr
->Source
, ethAttr
->eth_src
, sizeof ethHdr
->Source
);
1291 return NDIS_STATUS_SUCCESS
;
1295 *----------------------------------------------------------------------------
1296 * OvsGetHeaderBySize --
1297 * Tries to retrieve a continuous buffer from 'ovsFwdCtx->curnbl' of size
1299 * If the original buffer is insufficient it will, try to clone the net
1300 * buffer list and force the size.
1301 * Returns 'NULL' on failure or a pointer to the first byte of the data
1302 * in the first net buffer of the net buffer list 'nbl'.
1303 *----------------------------------------------------------------------------
1305 PUINT8
OvsGetHeaderBySize(OvsForwardingContext
*ovsFwdCtx
,
1309 UINT32 mdlLen
, packetLen
;
1314 curNb
= NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx
->curNbl
);
1315 ASSERT(curNb
->Next
== NULL
);
1316 packetLen
= NET_BUFFER_DATA_LENGTH(curNb
);
1317 curMdl
= NET_BUFFER_CURRENT_MDL(curNb
);
1318 NdisQueryMdl(curMdl
, &start
, &mdlLen
, LowPagePriority
);
1320 ovsActionStats
.noResource
++;
1324 curMdlOffset
= NET_BUFFER_CURRENT_MDL_OFFSET(curNb
);
1325 mdlLen
-= curMdlOffset
;
1326 ASSERT((INT
)mdlLen
>= 0);
1328 /* Count of number of bytes of valid data there are in the first MDL. */
1329 mdlLen
= MIN(packetLen
, mdlLen
);
1330 if (mdlLen
< size
) {
1331 PNET_BUFFER_LIST newNbl
;
1333 newNbl
= OvsPartialCopyNBL(ovsFwdCtx
->switchContext
, ovsFwdCtx
->curNbl
,
1334 size
, 0, TRUE
/*copy NBL info*/);
1336 ovsActionStats
.noCopiedNbl
++;
1339 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
1340 L
"Complete after partial copy.");
1342 status
= OvsInitForwardingCtx(ovsFwdCtx
, ovsFwdCtx
->switchContext
,
1343 newNbl
, ovsFwdCtx
->srcVportNo
, 0,
1344 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl
),
1345 NULL
, &ovsFwdCtx
->layers
, FALSE
);
1347 if (status
!= NDIS_STATUS_SUCCESS
) {
1348 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
1349 L
"OVS-Dropped due to resources");
1353 curNb
= NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx
->curNbl
);
1354 ASSERT(curNb
->Next
== NULL
);
1355 curMdl
= NET_BUFFER_CURRENT_MDL(curNb
);
1356 NdisQueryMdl(curMdl
, &start
, &mdlLen
, LowPagePriority
);
1358 ovsActionStats
.noResource
++;
1361 curMdlOffset
= NET_BUFFER_CURRENT_MDL_OFFSET(curNb
);
1362 mdlLen
-= curMdlOffset
;
1363 ASSERT(mdlLen
>= size
);
1366 return start
+ curMdlOffset
;
1370 *----------------------------------------------------------------------------
1371 * OvsUpdateUdpPorts --
1372 * Updates the UDP source or destination port in ovsFwdCtx.curNbl inline
1373 * based on the specified key.
1374 *----------------------------------------------------------------------------
1377 OvsUpdateUdpPorts(OvsForwardingContext
*ovsFwdCtx
,
1378 const struct ovs_key_udp
*udpAttr
)
1381 OVS_PACKET_HDR_INFO
*layers
= &ovsFwdCtx
->layers
;
1382 UDPHdr
*udpHdr
= NULL
;
1384 ASSERT(layers
->value
!= 0);
1386 if (!layers
->isUdp
) {
1387 ovsActionStats
.noCopiedNbl
++;
1388 return NDIS_STATUS_FAILURE
;
1391 bufferStart
= OvsGetHeaderBySize(ovsFwdCtx
, layers
->l7Offset
);
1393 return NDIS_STATUS_RESOURCES
;
1396 udpHdr
= (UDPHdr
*)(bufferStart
+ layers
->l4Offset
);
1397 if (udpHdr
->check
) {
1398 if (udpHdr
->source
!= udpAttr
->udp_src
) {
1399 udpHdr
->check
= ChecksumUpdate16(udpHdr
->check
, udpHdr
->source
,
1401 udpHdr
->source
= udpAttr
->udp_src
;
1403 if (udpHdr
->dest
!= udpAttr
->udp_dst
) {
1404 udpHdr
->check
= ChecksumUpdate16(udpHdr
->check
, udpHdr
->dest
,
1406 udpHdr
->dest
= udpAttr
->udp_dst
;
1409 udpHdr
->source
= udpAttr
->udp_src
;
1410 udpHdr
->dest
= udpAttr
->udp_dst
;
1413 return NDIS_STATUS_SUCCESS
;
1417 *----------------------------------------------------------------------------
1418 * OvsUpdateTcpPorts --
1419 * Updates the TCP source or destination port in ovsFwdCtx.curNbl inline
1420 * based on the specified key.
1421 *----------------------------------------------------------------------------
1424 OvsUpdateTcpPorts(OvsForwardingContext
*ovsFwdCtx
,
1425 const struct ovs_key_tcp
*tcpAttr
)
1428 OVS_PACKET_HDR_INFO
*layers
= &ovsFwdCtx
->layers
;
1429 TCPHdr
*tcpHdr
= NULL
;
1431 ASSERT(layers
->value
!= 0);
1433 if (!layers
->isTcp
) {
1434 ovsActionStats
.noCopiedNbl
++;
1435 return NDIS_STATUS_FAILURE
;
1438 bufferStart
= OvsGetHeaderBySize(ovsFwdCtx
, layers
->l7Offset
);
1440 return NDIS_STATUS_RESOURCES
;
1443 tcpHdr
= (TCPHdr
*)(bufferStart
+ layers
->l4Offset
);
1445 if (tcpHdr
->source
!= tcpAttr
->tcp_src
) {
1446 tcpHdr
->check
= ChecksumUpdate16(tcpHdr
->check
, tcpHdr
->source
,
1448 tcpHdr
->source
= tcpAttr
->tcp_src
;
1450 if (tcpHdr
->dest
!= tcpAttr
->tcp_dst
) {
1451 tcpHdr
->check
= ChecksumUpdate16(tcpHdr
->check
, tcpHdr
->dest
,
1453 tcpHdr
->dest
= tcpAttr
->tcp_dst
;
1456 return NDIS_STATUS_SUCCESS
;
1460 *----------------------------------------------------------------------------
1461 * OvsUpdateAddressAndPort --
1462 * Updates the source/destination IP and port fields in
1463 * ovsFwdCtx.curNbl inline based on the specified key.
1464 *----------------------------------------------------------------------------
1467 OvsUpdateAddressAndPort(OvsForwardingContext
*ovsFwdCtx
,
1468 UINT32 newAddr
, UINT16 newPort
,
1469 BOOLEAN isSource
, BOOLEAN isTx
)
1473 OVS_PACKET_HDR_INFO
*layers
= &ovsFwdCtx
->layers
;
1475 TCPHdr
*tcpHdr
= NULL
;
1476 UDPHdr
*udpHdr
= NULL
;
1477 UINT32
*addrField
= NULL
;
1478 UINT16
*portField
= NULL
;
1479 UINT16
*checkField
= NULL
;
1480 BOOLEAN l4Offload
= FALSE
;
1481 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo
;
1483 ASSERT(layers
->value
!= 0);
1485 if (layers
->isTcp
|| layers
->isUdp
) {
1486 hdrSize
= layers
->l4Offset
+
1487 layers
->isTcp
? sizeof (*tcpHdr
) : sizeof (*udpHdr
);
1489 hdrSize
= layers
->l3Offset
+ sizeof (*ipHdr
);
1492 bufferStart
= OvsGetHeaderBySize(ovsFwdCtx
, hdrSize
);
1494 return NDIS_STATUS_RESOURCES
;
1497 ipHdr
= (IPHdr
*)(bufferStart
+ layers
->l3Offset
);
1499 if (layers
->isTcp
) {
1500 tcpHdr
= (TCPHdr
*)(bufferStart
+ layers
->l4Offset
);
1501 } else if (layers
->isUdp
) {
1502 udpHdr
= (UDPHdr
*)(bufferStart
+ layers
->l4Offset
);
1505 csumInfo
.Value
= NET_BUFFER_LIST_INFO(ovsFwdCtx
->curNbl
,
1506 TcpIpChecksumNetBufferListInfo
);
1508 * Adjust the IP header inline as dictated by the action, and also update
1509 * the IP and the TCP checksum for the data modified.
1511 * In the future, this could be optimized to make one call to
1512 * ChecksumUpdate32(). Ignoring this for now, since for the most common
1513 * case, we only update the TTL.
1517 addrField
= &ipHdr
->saddr
;
1519 portField
= &tcpHdr
->source
;
1520 checkField
= &tcpHdr
->check
;
1521 l4Offload
= isTx
? (BOOLEAN
)csumInfo
.Transmit
.TcpChecksum
:
1522 ((BOOLEAN
)csumInfo
.Receive
.TcpChecksumSucceeded
||
1523 (BOOLEAN
)csumInfo
.Receive
.TcpChecksumFailed
);
1524 } else if (udpHdr
) {
1525 portField
= &udpHdr
->source
;
1526 checkField
= &udpHdr
->check
;
1527 l4Offload
= isTx
? (BOOLEAN
)csumInfo
.Transmit
.UdpChecksum
:
1528 ((BOOLEAN
)csumInfo
.Receive
.UdpChecksumSucceeded
||
1529 (BOOLEAN
)csumInfo
.Receive
.UdpChecksumFailed
);
1532 addrField
= &ipHdr
->daddr
;
1534 portField
= &tcpHdr
->dest
;
1535 checkField
= &tcpHdr
->check
;
1536 } else if (udpHdr
) {
1537 portField
= &udpHdr
->dest
;
1538 checkField
= &udpHdr
->check
;
1541 if (*addrField
!= newAddr
) {
1542 UINT32 oldAddr
= *addrField
;
1543 if (checkField
&& *checkField
!= 0) {
1545 /* Recompute IP pseudo checksum */
1546 *checkField
= ~(*checkField
);
1547 *checkField
= ChecksumUpdate32(*checkField
, oldAddr
,
1549 *checkField
= ~(*checkField
);
1551 *checkField
= ChecksumUpdate32(*checkField
, oldAddr
,
1555 if (ipHdr
->check
!= 0) {
1556 ipHdr
->check
= ChecksumUpdate32(ipHdr
->check
, oldAddr
,
1559 *addrField
= newAddr
;
1562 if (portField
&& *portField
!= newPort
) {
1563 if (checkField
&& !l4Offload
) {
1564 *checkField
= ChecksumUpdate16(*checkField
, *portField
,
1567 *portField
= newPort
;
1569 PNET_BUFFER_LIST curNbl
= ovsFwdCtx
->curNbl
;
1570 PNET_BUFFER_LIST newNbl
= NULL
;
1571 if (layers
->isTcp
) {
1572 UINT32 mss
= OVSGetTcpMSS(curNbl
);
1574 OVS_LOG_TRACE("l4Offset %d", layers
->l4Offset
);
1575 newNbl
= OvsTcpSegmentNBL(ovsFwdCtx
->switchContext
, curNbl
, layers
,
1577 if (newNbl
== NULL
) {
1578 OVS_LOG_ERROR("Unable to segment NBL");
1579 return NDIS_STATUS_FAILURE
;
1581 /* Clear out LSO flags after this point */
1582 NET_BUFFER_LIST_INFO(newNbl
, TcpLargeSendNetBufferListInfo
) = 0;
1585 /* If we didn't split the packet above, make a copy now */
1586 if (newNbl
== NULL
) {
1587 csumInfo
.Value
= NET_BUFFER_LIST_INFO(curNbl
,
1588 TcpIpChecksumNetBufferListInfo
);
1589 OvsApplySWChecksumOnNB(layers
, curNbl
, &csumInfo
);
1594 OvsCompleteNBLForwardingCtx(ovsFwdCtx
,
1595 L
"Complete after cloning NBL for encapsulation");
1596 OvsInitForwardingCtx(ovsFwdCtx
, ovsFwdCtx
->switchContext
,
1597 newNbl
, ovsFwdCtx
->srcVportNo
, 0,
1598 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl
),
1599 ovsFwdCtx
->completionList
,
1600 &ovsFwdCtx
->layers
, FALSE
);
1601 ovsFwdCtx
->curNbl
= newNbl
;
1604 NET_BUFFER_LIST_INFO(curNbl
,
1605 TcpIpChecksumNetBufferListInfo
) = 0;
1607 return NDIS_STATUS_SUCCESS
;
1611 *----------------------------------------------------------------------------
1612 * OvsUpdateIPv4Header --
1613 * Updates the IPv4 header in ovsFwdCtx.curNbl inline based on the
1615 *----------------------------------------------------------------------------
1618 OvsUpdateIPv4Header(OvsForwardingContext
*ovsFwdCtx
,
1619 const struct ovs_key_ipv4
*ipAttr
)
1623 OVS_PACKET_HDR_INFO
*layers
= &ovsFwdCtx
->layers
;
1625 TCPHdr
*tcpHdr
= NULL
;
1626 UDPHdr
*udpHdr
= NULL
;
1628 ASSERT(layers
->value
!= 0);
1630 if (layers
->isTcp
|| layers
->isUdp
) {
1631 hdrSize
= layers
->l4Offset
+
1632 layers
->isTcp
? sizeof (*tcpHdr
) : sizeof (*udpHdr
);
1634 hdrSize
= layers
->l3Offset
+ sizeof (*ipHdr
);
1637 bufferStart
= OvsGetHeaderBySize(ovsFwdCtx
, hdrSize
);
1639 return NDIS_STATUS_RESOURCES
;
1642 ipHdr
= (IPHdr
*)(bufferStart
+ layers
->l3Offset
);
1644 if (layers
->isTcp
) {
1645 tcpHdr
= (TCPHdr
*)(bufferStart
+ layers
->l4Offset
);
1646 } else if (layers
->isUdp
) {
1647 udpHdr
= (UDPHdr
*)(bufferStart
+ layers
->l4Offset
);
1651 * Adjust the IP header inline as dictated by the action, and also update
1652 * the IP and the TCP checksum for the data modified.
1654 * In the future, this could be optimized to make one call to
1655 * ChecksumUpdate32(). Ignoring this for now, since for the most common
1656 * case, we only update the TTL.
1658 if (ipHdr
->saddr
!= ipAttr
->ipv4_src
) {
1660 tcpHdr
->check
= ChecksumUpdate32(tcpHdr
->check
, ipHdr
->saddr
,
1662 } else if (udpHdr
&& udpHdr
->check
) {
1663 udpHdr
->check
= ChecksumUpdate32(udpHdr
->check
, ipHdr
->saddr
,
1667 if (ipHdr
->check
!= 0) {
1668 ipHdr
->check
= ChecksumUpdate32(ipHdr
->check
, ipHdr
->saddr
,
1671 ipHdr
->saddr
= ipAttr
->ipv4_src
;
1673 if (ipHdr
->daddr
!= ipAttr
->ipv4_dst
) {
1675 tcpHdr
->check
= ChecksumUpdate32(tcpHdr
->check
, ipHdr
->daddr
,
1677 } else if (udpHdr
&& udpHdr
->check
) {
1678 udpHdr
->check
= ChecksumUpdate32(udpHdr
->check
, ipHdr
->daddr
,
1682 if (ipHdr
->check
!= 0) {
1683 ipHdr
->check
= ChecksumUpdate32(ipHdr
->check
, ipHdr
->daddr
,
1686 ipHdr
->daddr
= ipAttr
->ipv4_dst
;
1688 if (ipHdr
->protocol
!= ipAttr
->ipv4_proto
) {
1689 UINT16 oldProto
= (ipHdr
->protocol
<< 16) & 0xff00;
1690 UINT16 newProto
= (ipAttr
->ipv4_proto
<< 16) & 0xff00;
1692 tcpHdr
->check
= ChecksumUpdate16(tcpHdr
->check
, oldProto
, newProto
);
1693 } else if (udpHdr
&& udpHdr
->check
) {
1694 udpHdr
->check
= ChecksumUpdate16(udpHdr
->check
, oldProto
, newProto
);
1697 if (ipHdr
->check
!= 0) {
1698 ipHdr
->check
= ChecksumUpdate16(ipHdr
->check
, oldProto
, newProto
);
1700 ipHdr
->protocol
= ipAttr
->ipv4_proto
;
1702 if (ipHdr
->ttl
!= ipAttr
->ipv4_ttl
) {
1703 UINT16 oldTtl
= (ipHdr
->ttl
) & 0xff;
1704 UINT16 newTtl
= (ipAttr
->ipv4_ttl
) & 0xff;
1705 if (ipHdr
->check
!= 0) {
1706 ipHdr
->check
= ChecksumUpdate16(ipHdr
->check
, oldTtl
, newTtl
);
1708 ipHdr
->ttl
= ipAttr
->ipv4_ttl
;
1711 return NDIS_STATUS_SUCCESS
;
1715 * --------------------------------------------------------------------------
1716 * OvsExecuteSetAction --
1717 * Executes a set() action, but storing the actions into 'ovsFwdCtx'
1718 * --------------------------------------------------------------------------
1720 static __inline NDIS_STATUS
1721 OvsExecuteSetAction(OvsForwardingContext
*ovsFwdCtx
,
1726 enum ovs_key_attr type
= NlAttrType(a
);
1727 NDIS_STATUS status
= NDIS_STATUS_SUCCESS
;
1730 case OVS_KEY_ATTR_ETHERNET
:
1731 status
= OvsUpdateEthHeader(ovsFwdCtx
,
1732 NlAttrGetUnspec(a
, sizeof(struct ovs_key_ethernet
)));
1735 case OVS_KEY_ATTR_IPV4
:
1736 status
= OvsUpdateIPv4Header(ovsFwdCtx
,
1737 NlAttrGetUnspec(a
, sizeof(struct ovs_key_ipv4
)));
1740 case OVS_KEY_ATTR_TUNNEL
:
1742 OvsIPv4TunnelKey tunKey
;
1743 tunKey
.flow_hash
= (uint16
)(hash
? *hash
: OvsHashFlow(key
));
1744 tunKey
.dst_port
= key
->ipKey
.l4
.tpDst
;
1745 NTSTATUS convertStatus
= OvsTunnelAttrToIPv4TunnelKey((PNL_ATTR
)a
, &tunKey
);
1746 status
= SUCCEEDED(convertStatus
) ? NDIS_STATUS_SUCCESS
: NDIS_STATUS_FAILURE
;
1747 ASSERT(status
== NDIS_STATUS_SUCCESS
);
1748 RtlCopyMemory(&ovsFwdCtx
->tunKey
, &tunKey
, sizeof ovsFwdCtx
->tunKey
);
1752 case OVS_KEY_ATTR_UDP
:
1753 status
= OvsUpdateUdpPorts(ovsFwdCtx
,
1754 NlAttrGetUnspec(a
, sizeof(struct ovs_key_udp
)));
1757 case OVS_KEY_ATTR_TCP
:
1758 status
= OvsUpdateTcpPorts(ovsFwdCtx
,
1759 NlAttrGetUnspec(a
, sizeof(struct ovs_key_tcp
)));
1763 OVS_LOG_INFO("Unhandled attribute %#x", type
);
1770 * --------------------------------------------------------------------------
1771 * OvsExecuteRecirc --
1772 * The function adds a deferred action to allow the current packet, nbl,
1773 * to re-enter datapath packet processing.
1774 * --------------------------------------------------------------------------
1777 OvsExecuteRecirc(OvsForwardingContext
*ovsFwdCtx
,
1779 const PNL_ATTR actions
,
1782 POVS_DEFERRED_ACTION deferredAction
= NULL
;
1783 PNET_BUFFER_LIST newNbl
= NULL
;
1785 if (!NlAttrIsLast(actions
, rem
)) {
1787 * Recirc action is the not the last action of the action list, so we
1788 * need to clone the packet.
1790 newNbl
= OvsPartialCopyNBL(ovsFwdCtx
->switchContext
, ovsFwdCtx
->curNbl
,
1791 0, 0, TRUE
/*copy NBL info*/);
1793 * Skip the recirc action when out of memory, but continue on with the
1794 * rest of the action list.
1796 if (newNbl
== NULL
) {
1797 ovsActionStats
.noCopiedNbl
++;
1798 return NDIS_STATUS_SUCCESS
;
1803 deferredAction
= OvsAddDeferredActions(newNbl
, key
, NULL
);
1805 deferredAction
= OvsAddDeferredActions(ovsFwdCtx
->curNbl
, key
, NULL
);
1808 if (deferredAction
) {
1809 deferredAction
->key
.recircId
= NlAttrGetU32(actions
);
1812 ovsActionStats
.deferredActionsQueueFull
++;
1813 OvsCompleteNBL(ovsFwdCtx
->switchContext
, newNbl
, TRUE
);
1817 return NDIS_STATUS_SUCCESS
;
1821 * --------------------------------------------------------------------------
1823 * The function updates datapath hash read from userspace.
1824 * --------------------------------------------------------------------------
1827 OvsExecuteHash(OvsFlowKey
*key
,
1828 const PNL_ATTR attr
)
1830 struct ovs_action_hash
*hash_act
= NlAttrData(attr
);
1833 hash
= (UINT32
)OvsHashFlow(key
);
1834 hash
= OvsJhashWords(&hash
, 1, hash_act
->hash_basis
);
1842 * --------------------------------------------------------------------------
1843 * OvsOutputUserspaceAction --
1844 * This function sends the packet to userspace according to nested
1845 * %OVS_USERSPACE_ATTR_* attributes.
1846 * --------------------------------------------------------------------------
1848 static __inline NDIS_STATUS
1849 OvsOutputUserspaceAction(OvsForwardingContext
*ovsFwdCtx
,
1851 const PNL_ATTR attr
)
1853 NTSTATUS status
= NDIS_STATUS_SUCCESS
;
1854 PNL_ATTR userdataAttr
;
1856 POVS_PACKET_QUEUE_ELEM elem
;
1857 POVS_PACKET_HDR_INFO layers
= &ovsFwdCtx
->layers
;
1858 BOOLEAN isRecv
= FALSE
;
1860 POVS_VPORT_ENTRY vport
= OvsFindVportByPortNo(ovsFwdCtx
->switchContext
,
1861 ovsFwdCtx
->srcVportNo
);
1864 if (vport
->isExternal
||
1865 OvsIsTunnelVportType(vport
->ovsType
)) {
1870 queueAttr
= NlAttrFindNested(attr
, OVS_USERSPACE_ATTR_PID
);
1871 userdataAttr
= NlAttrFindNested(attr
, OVS_USERSPACE_ATTR_USERDATA
);
1873 elem
= OvsCreateQueueNlPacket(NlAttrData(userdataAttr
),
1874 NlAttrGetSize(userdataAttr
),
1875 OVS_PACKET_CMD_ACTION
,
1876 vport
, key
, ovsFwdCtx
->curNbl
,
1877 NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx
->curNbl
),
1881 LIST_ENTRY missedPackets
;
1882 InitializeListHead(&missedPackets
);
1883 InsertTailList(&missedPackets
, &elem
->link
);
1884 OvsQueuePackets(&missedPackets
, 1);
1886 status
= NDIS_STATUS_FAILURE
;
1893 * --------------------------------------------------------------------------
1894 * OvsExecuteSampleAction --
1895 * Executes actions based on probability, as specified in the nested
1896 * %OVS_SAMPLE_ATTR_* attributes.
1897 * --------------------------------------------------------------------------
1899 static __inline NDIS_STATUS
1900 OvsExecuteSampleAction(OvsForwardingContext
*ovsFwdCtx
,
1902 const PNL_ATTR attr
)
1904 PNET_BUFFER_LIST newNbl
= NULL
;
1905 PNL_ATTR actionsList
= NULL
;
1910 NL_ATTR_FOR_EACH_UNSAFE(a
, rem
, NlAttrData(attr
), NlAttrGetSize(attr
)) {
1911 switch (NlAttrType(a
)) {
1912 case OVS_SAMPLE_ATTR_PROBABILITY
:
1914 UINT32 probability
= NlAttrGetU32(a
);
1916 if (!probability
|| Rand() > probability
) {
1921 case OVS_SAMPLE_ATTR_ACTIONS
:
1928 rem
= NlAttrGetSize(actionsList
);
1929 a
= (PNL_ATTR
)NlAttrData(actionsList
);
1933 /* Actions list is empty, do nothing */
1934 return STATUS_SUCCESS
;
1938 * The only known usage of sample action is having a single user-space
1939 * action. Treat this usage as a special case.
1941 if (NlAttrType(a
) == OVS_ACTION_ATTR_USERSPACE
&&
1942 NlAttrIsLast(a
, rem
)) {
1943 return OvsOutputUserspaceAction(ovsFwdCtx
, key
, a
);
1946 newNbl
= OvsPartialCopyNBL(ovsFwdCtx
->switchContext
, ovsFwdCtx
->curNbl
,
1947 0, 0, TRUE
/*copy NBL info*/);
1948 if (newNbl
== NULL
) {
1950 * Skip the sample action when out of memory, but continue on with the
1951 * rest of the action list.
1953 ovsActionStats
.noCopiedNbl
++;
1954 return STATUS_SUCCESS
;
1957 if (!OvsAddDeferredActions(newNbl
, key
, a
)) {
1959 "Deferred actions limit reached, dropping sample action.");
1960 OvsCompleteNBL(ovsFwdCtx
->switchContext
, newNbl
, TRUE
);
1963 return STATUS_SUCCESS
;
1967 * --------------------------------------------------------------------------
1968 * OvsDoExecuteActions --
1969 * Interpret and execute the specified 'actions' on the specified packet
1970 * 'curNbl'. The expectation is that if the packet needs to be dropped
1971 * (completed) for some reason, it is added to 'completionList' so that the
1972 * caller can complete the packet. If 'completionList' is NULL, the NBL is
1973 * assumed to be generated by OVS and freed up. Otherwise, the function
1974 * consumes the NBL by generating a NDIS send indication for the packet.
1976 * There are one or more of "clone" NBLs that may get generated while
1977 * executing the actions. Upon any failures, the "cloned" NBLs are freed up,
1978 * and the caller does not have to worry about them.
1980 * Success or failure is returned based on whether the specified actions
1981 * were executed successfully on the packet or not.
1982 * --------------------------------------------------------------------------
1985 OvsDoExecuteActions(POVS_SWITCH_CONTEXT switchContext
,
1986 OvsCompletionList
*completionList
,
1987 PNET_BUFFER_LIST curNbl
,
1992 OVS_PACKET_HDR_INFO
*layers
,
1993 const PNL_ATTR actions
,
1999 OvsForwardingContext ovsFwdCtx
;
2000 PCWSTR dropReason
= L
"";
2002 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail
=
2003 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl
);
2005 /* XXX: ASSERT that the flow table lock is held. */
2006 status
= OvsInitForwardingCtx(&ovsFwdCtx
, switchContext
, curNbl
, portNo
,
2007 sendFlags
, fwdDetail
, completionList
,
2009 if (status
!= NDIS_STATUS_SUCCESS
) {
2010 dropReason
= L
"OVS-initing destination port list failed";
2014 if (actionsLen
== 0) {
2015 dropReason
= L
"OVS-Dropped due to Flow action";
2016 ovsActionStats
.zeroActionLen
++;
2020 NL_ATTR_FOR_EACH_UNSAFE (a
, rem
, actions
, actionsLen
) {
2021 switch(NlAttrType(a
)) {
2022 case OVS_ACTION_ATTR_OUTPUT
:
2023 dstPortID
= NlAttrGetU32(a
);
2024 status
= OvsAddPorts(&ovsFwdCtx
, key
, dstPortID
,
2026 if (status
!= NDIS_STATUS_SUCCESS
) {
2027 dropReason
= L
"OVS-adding destination port failed";
2032 case OVS_ACTION_ATTR_PUSH_VLAN
:
2034 struct ovs_action_push_vlan
*vlan
;
2036 PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag
;
2038 if (ovsFwdCtx
.destPortsSizeOut
> 0 || ovsFwdCtx
.tunnelTxNic
!= NULL
2039 || ovsFwdCtx
.tunnelRxNic
!= NULL
) {
2040 status
= OvsOutputBeforeSetAction(&ovsFwdCtx
);
2041 if (status
!= NDIS_STATUS_SUCCESS
) {
2042 dropReason
= L
"OVS-adding destination failed";
2047 vlanTagValue
= NET_BUFFER_LIST_INFO(ovsFwdCtx
.curNbl
,
2048 Ieee8021QNetBufferListInfo
);
2049 if (vlanTagValue
!= NULL
) {
2051 * XXX: We don't support double VLAN tag offload. In such cases,
2052 * we need to insert the existing one into the packet buffer,
2053 * and add the new one as offload. This will take care of
2054 * guest tag-in-tag case as well as OVS rules that specify
2059 vlanTag
= (PNDIS_NET_BUFFER_LIST_8021Q_INFO
)(PVOID
*)&vlanTagValue
;
2060 vlan
= (struct ovs_action_push_vlan
*)NlAttrGet((const PNL_ATTR
)a
);
2061 vlanTag
->TagHeader
.VlanId
= ntohs(vlan
->vlan_tci
) & 0xfff;
2062 vlanTag
->TagHeader
.UserPriority
= ntohs(vlan
->vlan_tci
) >> 13;
2064 NET_BUFFER_LIST_INFO(ovsFwdCtx
.curNbl
,
2065 Ieee8021QNetBufferListInfo
) = vlanTagValue
;
2070 case OVS_ACTION_ATTR_POP_VLAN
:
2072 if (ovsFwdCtx
.destPortsSizeOut
> 0 || ovsFwdCtx
.tunnelTxNic
!= NULL
2073 || ovsFwdCtx
.tunnelRxNic
!= NULL
) {
2074 status
= OvsOutputBeforeSetAction(&ovsFwdCtx
);
2075 if (status
!= NDIS_STATUS_SUCCESS
) {
2076 dropReason
= L
"OVS-adding destination failed";
2081 if (NET_BUFFER_LIST_INFO(ovsFwdCtx
.curNbl
,
2082 Ieee8021QNetBufferListInfo
) != 0) {
2083 NET_BUFFER_LIST_INFO(ovsFwdCtx
.curNbl
,
2084 Ieee8021QNetBufferListInfo
) = 0;
2087 * The VLAN tag is inserted into the packet buffer. Pop the tag
2088 * by packet buffer modification.
2090 status
= OvsPopVlanInPktBuf(&ovsFwdCtx
);
2091 if (status
!= NDIS_STATUS_SUCCESS
) {
2092 dropReason
= L
"OVS-pop vlan action failed";
2099 case OVS_ACTION_ATTR_PUSH_MPLS
:
2101 if (ovsFwdCtx
.destPortsSizeOut
> 0 || ovsFwdCtx
.tunnelTxNic
!= NULL
2102 || ovsFwdCtx
.tunnelRxNic
!= NULL
) {
2103 status
= OvsOutputBeforeSetAction(&ovsFwdCtx
);
2104 if (status
!= NDIS_STATUS_SUCCESS
) {
2105 dropReason
= L
"OVS-adding destination failed";
2110 status
= OvsActionMplsPush(&ovsFwdCtx
,
2111 (struct ovs_action_push_mpls
*)NlAttrGet
2112 ((const PNL_ATTR
)a
));
2113 if (status
!= NDIS_STATUS_SUCCESS
) {
2114 dropReason
= L
"OVS-push MPLS action failed";
2117 layers
->l3Offset
+= MPLS_HLEN
;
2118 layers
->l4Offset
+= MPLS_HLEN
;
2122 case OVS_ACTION_ATTR_POP_MPLS
:
2124 if (ovsFwdCtx
.destPortsSizeOut
> 0 || ovsFwdCtx
.tunnelTxNic
!= NULL
2125 || ovsFwdCtx
.tunnelRxNic
!= NULL
) {
2126 status
= OvsOutputBeforeSetAction(&ovsFwdCtx
);
2127 if (status
!= NDIS_STATUS_SUCCESS
) {
2128 dropReason
= L
"OVS-adding destination failed";
2133 status
= OvsActionMplsPop(&ovsFwdCtx
, NlAttrGetBe16(a
));
2134 if (status
!= NDIS_STATUS_SUCCESS
) {
2135 dropReason
= L
"OVS-pop MPLS action failed";
2138 layers
->l3Offset
-= MPLS_HLEN
;
2139 layers
->l4Offset
-= MPLS_HLEN
;
2143 case OVS_ACTION_ATTR_HASH
:
2145 if (ovsFwdCtx
.destPortsSizeOut
> 0 || ovsFwdCtx
.tunnelTxNic
!= NULL
2146 || ovsFwdCtx
.tunnelRxNic
!= NULL
) {
2147 status
= OvsOutputBeforeSetAction(&ovsFwdCtx
);
2148 if (status
!= NDIS_STATUS_SUCCESS
) {
2149 dropReason
= L
"OVS-adding destination failed";
2154 OvsExecuteHash(key
, (const PNL_ATTR
)a
);
2159 case OVS_ACTION_ATTR_CT
:
2161 if (ovsFwdCtx
.destPortsSizeOut
> 0
2162 || ovsFwdCtx
.tunnelTxNic
!= NULL
2163 || ovsFwdCtx
.tunnelRxNic
!= NULL
) {
2164 status
= OvsOutputBeforeSetAction(&ovsFwdCtx
);
2165 if (status
!= NDIS_STATUS_SUCCESS
) {
2166 dropReason
= L
"OVS-adding destination failed";
2171 PNET_BUFFER_LIST oldNbl
= ovsFwdCtx
.curNbl
;
2172 status
= OvsExecuteConntrackAction(&ovsFwdCtx
, key
,
2174 if (status
!= NDIS_STATUS_SUCCESS
) {
2175 /* Pending NBLs are consumed by Defragmentation. */
2176 if (status
!= NDIS_STATUS_PENDING
) {
2177 OVS_LOG_ERROR("CT Action failed status = %lu", status
);
2178 dropReason
= L
"OVS-conntrack action failed";
2180 /* We added a new pending NBL to be consumed later.
2181 * Report to the userspace that the action applied
2183 status
= NDIS_STATUS_SUCCESS
;
2186 } else if (oldNbl
!= ovsFwdCtx
.curNbl
) {
2188 * OvsIpv4Reassemble consumes the original NBL and creates a
2189 * new one and assigns it to the curNbl of ovsFwdCtx.
2191 OvsInitForwardingCtx(&ovsFwdCtx
,
2192 ovsFwdCtx
.switchContext
,
2194 ovsFwdCtx
.srcVportNo
,
2195 ovsFwdCtx
.sendFlags
,
2196 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(ovsFwdCtx
.curNbl
),
2197 ovsFwdCtx
.completionList
,
2198 &ovsFwdCtx
.layers
, FALSE
);
2199 key
->ipKey
.nwFrag
= OVS_FRAG_TYPE_NONE
;
2204 case OVS_ACTION_ATTR_RECIRC
:
2206 if (ovsFwdCtx
.destPortsSizeOut
> 0 || ovsFwdCtx
.tunnelTxNic
!= NULL
2207 || ovsFwdCtx
.tunnelRxNic
!= NULL
) {
2208 status
= OvsOutputBeforeSetAction(&ovsFwdCtx
);
2209 if (status
!= NDIS_STATUS_SUCCESS
) {
2210 dropReason
= L
"OVS-adding destination failed";
2215 status
= OvsExecuteRecirc(&ovsFwdCtx
, key
, (const PNL_ATTR
)a
, rem
);
2216 if (status
!= NDIS_STATUS_SUCCESS
) {
2217 dropReason
= L
"OVS-recirculation action failed";
2221 if (NlAttrIsLast(a
, rem
)) {
2227 case OVS_ACTION_ATTR_USERSPACE
:
2229 status
= OvsOutputUserspaceAction(&ovsFwdCtx
, key
,
2231 if (status
!= NDIS_STATUS_SUCCESS
) {
2232 dropReason
= L
"OVS-Dropped due to failure to queue to "
2236 dropReason
= L
"OVS-Completed since packet was copied to "
2240 case OVS_ACTION_ATTR_SET
:
2242 if (ovsFwdCtx
.destPortsSizeOut
> 0 || ovsFwdCtx
.tunnelTxNic
!= NULL
2243 || ovsFwdCtx
.tunnelRxNic
!= NULL
) {
2244 status
= OvsOutputBeforeSetAction(&ovsFwdCtx
);
2245 if (status
!= NDIS_STATUS_SUCCESS
) {
2246 dropReason
= L
"OVS-adding destination failed";
2251 status
= OvsExecuteSetAction(&ovsFwdCtx
, key
, hash
,
2252 (const PNL_ATTR
)NlAttrGet
2253 ((const PNL_ATTR
)a
));
2254 if (status
!= NDIS_STATUS_SUCCESS
) {
2255 dropReason
= L
"OVS-set action failed";
2260 case OVS_ACTION_ATTR_SAMPLE
:
2262 if (ovsFwdCtx
.destPortsSizeOut
> 0 || ovsFwdCtx
.tunnelTxNic
!= NULL
2263 || ovsFwdCtx
.tunnelRxNic
!= NULL
) {
2264 status
= OvsOutputBeforeSetAction(&ovsFwdCtx
);
2265 if (status
!= NDIS_STATUS_SUCCESS
) {
2266 dropReason
= L
"OVS-adding destination failed";
2271 status
= OvsExecuteSampleAction(&ovsFwdCtx
, key
,
2273 if (status
!= NDIS_STATUS_SUCCESS
) {
2274 dropReason
= L
"OVS-sample action failed";
2280 status
= NDIS_STATUS_NOT_SUPPORTED
;
2285 if (ovsFwdCtx
.destPortsSizeOut
> 0 || ovsFwdCtx
.tunnelTxNic
!= NULL
2286 || ovsFwdCtx
.tunnelRxNic
!= NULL
) {
2287 status
= OvsOutputForwardingCtx(&ovsFwdCtx
);
2288 ASSERT(ovsFwdCtx
.curNbl
== NULL
);
2291 ASSERT(ovsFwdCtx
.destPortsSizeOut
== 0);
2292 ASSERT(ovsFwdCtx
.tunnelRxNic
== NULL
);
2293 ASSERT(ovsFwdCtx
.tunnelTxNic
== NULL
);
2297 * If curNbl != NULL, it implies the NBL has not been not freed up so far.
2299 if (ovsFwdCtx
.curNbl
) {
2300 OvsCompleteNBLForwardingCtx(&ovsFwdCtx
, dropReason
);
2308 * --------------------------------------------------------------------------
2309 * OvsActionsExecute --
2310 * The function interprets and executes the specified 'actions' on the
2311 * specified packet 'curNbl'. See 'OvsDoExecuteActions' description for
2314 * Also executes deferred actions added by recirculation or sample
2316 * --------------------------------------------------------------------------
2319 OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext
,
2320 OvsCompletionList
*completionList
,
2321 PNET_BUFFER_LIST curNbl
,
2326 OVS_PACKET_HDR_INFO
*layers
,
2327 const PNL_ATTR actions
,
2332 status
= OvsDoExecuteActions(switchContext
, completionList
, curNbl
,
2333 portNo
, sendFlags
, key
, hash
, layers
,
2334 actions
, actionsLen
);
2336 if (status
== STATUS_SUCCESS
) {
2337 status
= OvsProcessDeferredActions(switchContext
, completionList
,
2338 portNo
, sendFlags
, layers
);
2345 * --------------------------------------------------------------------------
2347 * The function processes the packet 'curNbl' that re-entered datapath
2348 * packet processing after a recirculation action.
2349 * --------------------------------------------------------------------------
2352 OvsDoRecirc(POVS_SWITCH_CONTEXT switchContext
,
2353 OvsCompletionList
*completionList
,
2354 PNET_BUFFER_LIST curNbl
,
2357 OVS_PACKET_HDR_INFO
*layers
)
2361 OvsForwardingContext ovsFwdCtx
= { 0 };
2365 OvsInitForwardingCtx(&ovsFwdCtx
, switchContext
, curNbl
,
2367 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl
),
2368 completionList
, layers
, TRUE
);
2369 ASSERT(ovsFwdCtx
.switchContext
);
2371 flow
= OvsLookupFlow(&ovsFwdCtx
.switchContext
->datapath
, key
, &hash
, FALSE
);
2373 UINT32 level
= OvsDeferredActionsLevelGet();
2375 if (level
> DEFERRED_ACTION_EXEC_LEVEL
) {
2376 OvsCompleteNBLForwardingCtx(&ovsFwdCtx
,
2377 L
"OVS-Dropped due to deferred actions execution level limit \
2379 ovsActionStats
.deferredActionsExecLimit
++;
2380 ovsFwdCtx
.curNbl
= NULL
;
2381 return NDIS_STATUS_FAILURE
;
2384 OvsFlowUsed(flow
, ovsFwdCtx
.curNbl
, &ovsFwdCtx
.layers
);
2385 ovsFwdCtx
.switchContext
->datapath
.hits
++;
2387 OvsDeferredActionsLevelInc();
2389 status
= OvsDoExecuteActions(ovsFwdCtx
.switchContext
,
2390 ovsFwdCtx
.completionList
,
2392 ovsFwdCtx
.srcVportNo
,
2393 ovsFwdCtx
.sendFlags
,
2394 key
, &hash
, &ovsFwdCtx
.layers
,
2395 flow
->actions
, flow
->actionsLen
);
2396 ovsFwdCtx
.curNbl
= NULL
;
2398 OvsDeferredActionsLevelDec();
2400 POVS_VPORT_ENTRY vport
= NULL
;
2401 LIST_ENTRY missedPackets
;
2404 ovsFwdCtx
.switchContext
->datapath
.misses
++;
2405 InitializeListHead(&missedPackets
);
2406 vport
= OvsFindVportByPortNo(switchContext
, srcPortNo
);
2407 if (vport
== NULL
|| vport
->ovsState
!= OVS_STATE_CONNECTED
) {
2408 OvsCompleteNBLForwardingCtx(&ovsFwdCtx
,
2409 L
"OVS-Dropped due to port removal");
2410 ovsActionStats
.noVport
++;
2411 return NDIS_STATUS_SUCCESS
;
2413 status
= OvsCreateAndAddPackets(NULL
, 0, OVS_PACKET_CMD_MISS
,
2414 vport
, key
, ovsFwdCtx
.curNbl
,
2415 OvsIsExternalVportByPortId(switchContext
,
2418 ovsFwdCtx
.switchContext
,
2419 &missedPackets
, &num
);
2421 OvsQueuePackets(&missedPackets
, num
);
2423 if (status
== NDIS_STATUS_SUCCESS
) {
2424 /* Complete the packet since it was copied to user buffer. */
2425 OvsCompleteNBLForwardingCtx(&ovsFwdCtx
,
2426 L
"OVS-Dropped since packet was copied to userspace");
2427 ovsActionStats
.flowMiss
++;
2429 OvsCompleteNBLForwardingCtx(&ovsFwdCtx
,
2430 L
"OVS-Dropped due to failure to queue to userspace");
2431 ovsActionStats
.failedFlowMiss
++;
2432 status
= NDIS_STATUS_FAILURE
;