]> git.proxmox.com Git - mirror_ovs.git/blame - datapath-windows/ovsext/Actions.c
datapath-windows: Add Geneve support
[mirror_ovs.git] / datapath-windows / ovsext / Actions.c
CommitLineData
c803536e 1/*
7b383a56 2 * Copyright (c) 2014, 2016 VMware, Inc.
c803536e
SS
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "precomp.h"
18
ee25964a 19#include "Actions.h"
792d377d 20#include "Conntrack.h"
7b383a56 21#include "Debug.h"
fa1324c9 22#include "Event.h"
fa1324c9 23#include "Flow.h"
85571a3d 24#include "Gre.h"
245eedef 25#include "Jhash.h"
5874d571 26#include "Mpls.h"
85571a3d 27#include "NetProto.h"
7b383a56 28#include "Offload.h"
fa1324c9 29#include "PacketIO.h"
ee25964a 30#include "Recirc.h"
85571a3d
AS
31#include "Stt.h"
32#include "Switch.h"
33#include "User.h"
34#include "Vport.h"
35#include "Vxlan.h"
47c3123d 36#include "Geneve.h"
c803536e 37
c803536e
SS
38#ifdef OVS_DBG_MOD
39#undef OVS_DBG_MOD
40#endif
41#define OVS_DBG_MOD OVS_DBG_ACTION
c803536e 42
ee25964a
SV
43#define OVS_DEST_PORTS_ARRAY_MIN_SIZE 2
44
c803536e 45typedef struct _OVS_ACTION_STATS {
85571a3d
AS
46 UINT64 rxGre;
47 UINT64 txGre;
c803536e
SS
48 UINT64 rxVxlan;
49 UINT64 txVxlan;
022c2040
EE
50 UINT64 rxStt;
51 UINT64 txStt;
47c3123d
YL
52 UINT64 rxGeneve;
53 UINT64 txGeneve;
c803536e
SS
54 UINT64 flowMiss;
55 UINT64 flowUserspace;
56 UINT64 txTcp;
57 UINT32 failedFlowMiss;
58 UINT32 noVport;
59 UINT32 failedFlowExtract;
60 UINT32 noResource;
61 UINT32 noCopiedNbl;
62 UINT32 failedEncap;
63 UINT32 failedDecap;
64 UINT32 cannotGrowDest;
65 UINT32 zeroActionLen;
66 UINT32 failedChecksum;
ee25964a
SV
67 UINT32 deferredActionsQueueFull;
68 UINT32 deferredActionsExecLimit;
c803536e
SS
69} OVS_ACTION_STATS, *POVS_ACTION_STATS;
70
71OVS_ACTION_STATS ovsActionStats;
72
73/*
74 * There a lot of data that needs to be maintained while executing the pipeline
75 * as dictated by the actions of a flow, across different functions at different
76 * levels. Such data is put together in a 'context' structure. Care should be
77 * exercised while adding new members to the structure - only add ones that get
78 * used across multiple stages in the pipeline/get used in multiple functions.
79 */
c803536e
SS
80typedef struct OvsForwardingContext {
81 POVS_SWITCH_CONTEXT switchContext;
82 /* The NBL currently used in the pipeline. */
83 PNET_BUFFER_LIST curNbl;
84 /* NDIS forwarding detail for 'curNbl'. */
85 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
86 /* Array of destination ports for 'curNbl'. */
87 PNDIS_SWITCH_FORWARDING_DESTINATION_ARRAY destinationPorts;
88 /* send flags while sending 'curNbl' into NDIS. */
89 ULONG sendFlags;
90 /* Total number of output ports, used + unused, in 'curNbl'. */
91 UINT32 destPortsSizeIn;
92 /* Total number of used output ports in 'curNbl'. */
93 UINT32 destPortsSizeOut;
94 /*
95 * If 'curNbl' is not owned by OVS, they need to be tracked, if they need to
96 * be freed/completed.
97 */
98 OvsCompletionList *completionList;
99 /*
100 * vport number of 'curNbl' when it is passed from the PIF bridge to the INT
101 * bridge. ie. during tunneling on the Rx side.
102 */
103 UINT32 srcVportNo;
104
105 /*
106 * Tunnel key:
107 * - specified in actions during tunneling Tx
108 * - extracted from an NBL during tunneling Rx
109 */
110 OvsIPv4TunnelKey tunKey;
111
ee25964a 112 /*
c803536e
SS
113 * Tunneling - Tx:
114 * To store the output port, when it is a tunneled port. We don't foresee
115 * multiple tunneled ports as outport for any given NBL.
116 */
117 POVS_VPORT_ENTRY tunnelTxNic;
118
119 /*
120 * Tunneling - Rx:
121 * Points to the Internal port on the PIF Bridge, if the packet needs to be
122 * de-tunneled.
123 */
124 POVS_VPORT_ENTRY tunnelRxNic;
125
126 /* header information */
127 OVS_PACKET_HDR_INFO layers;
128} OvsForwardingContext;
129
c803536e
SS
130/*
131 * --------------------------------------------------------------------------
132 * OvsInitForwardingCtx --
133 * Function to init/re-init the 'ovsFwdCtx' context as the actions pipeline
134 * is being executed.
135 *
136 * Result:
137 * NDIS_STATUS_SUCCESS on success
138 * Other NDIS_STATUS upon failure. Upon failure, it is safe to call
139 * OvsCompleteNBLForwardingCtx(), since 'ovsFwdCtx' has been initialized
140 * enough for OvsCompleteNBLForwardingCtx() to do its work.
141 * --------------------------------------------------------------------------
142 */
143static __inline NDIS_STATUS
144OvsInitForwardingCtx(OvsForwardingContext *ovsFwdCtx,
145 POVS_SWITCH_CONTEXT switchContext,
146 PNET_BUFFER_LIST curNbl,
147 UINT32 srcVportNo,
148 ULONG sendFlags,
149 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail,
150 OvsCompletionList *completionList,
151 OVS_PACKET_HDR_INFO *layers,
152 BOOLEAN resetTunnelInfo)
153{
154 ASSERT(ovsFwdCtx);
155 ASSERT(switchContext);
156 ASSERT(curNbl);
157 ASSERT(fwdDetail);
158
159 /*
160 * Set values for curNbl and switchContext so upon failures, we have enough
161 * information to do cleanup.
162 */
163 ovsFwdCtx->curNbl = curNbl;
164 ovsFwdCtx->switchContext = switchContext;
165 ovsFwdCtx->completionList = completionList;
166 ovsFwdCtx->fwdDetail = fwdDetail;
167
168 if (fwdDetail->NumAvailableDestinations > 0) {
169 /*
170 * XXX: even though MSDN says GetNetBufferListDestinations() returns
171 * NDIS_STATUS, the header files say otherwise.
172 */
173 switchContext->NdisSwitchHandlers.GetNetBufferListDestinations(
174 switchContext->NdisSwitchContext, curNbl,
175 &ovsFwdCtx->destinationPorts);
176
177 ASSERT(ovsFwdCtx->destinationPorts);
178 /* Ensure that none of the elements are consumed yet. */
179 ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
180 fwdDetail->NumAvailableDestinations);
181 } else {
182 ovsFwdCtx->destinationPorts = NULL;
183 }
184 ovsFwdCtx->destPortsSizeIn = fwdDetail->NumAvailableDestinations;
185 ovsFwdCtx->destPortsSizeOut = 0;
186 ovsFwdCtx->srcVportNo = srcVportNo;
187 ovsFwdCtx->sendFlags = sendFlags;
188 if (layers) {
189 ovsFwdCtx->layers = *layers;
190 } else {
191 RtlZeroMemory(&ovsFwdCtx->layers, sizeof ovsFwdCtx->layers);
192 }
193 if (resetTunnelInfo) {
194 ovsFwdCtx->tunnelTxNic = NULL;
195 ovsFwdCtx->tunnelRxNic = NULL;
196 RtlZeroMemory(&ovsFwdCtx->tunKey, sizeof ovsFwdCtx->tunKey);
197 }
198
199 return NDIS_STATUS_SUCCESS;
200}
201
202/*
203 * --------------------------------------------------------------------------
204 * OvsDetectTunnelRxPkt --
205 * Utility function for an RX packet to detect its tunnel type.
206 *
207 * Result:
208 * True - if the tunnel type was detected.
209 * False - if not a tunnel packet or tunnel type not supported.
210 * --------------------------------------------------------------------------
211 */
212static __inline BOOLEAN
213OvsDetectTunnelRxPkt(OvsForwardingContext *ovsFwdCtx,
214 const OvsFlowKey *flowKey)
215{
216 POVS_VPORT_ENTRY tunnelVport = NULL;
217
218 /* XXX: we should also check for the length of the UDP payload to pick
219 * packets only if they are at least VXLAN header size.
220 */
885b8265
NR
221
222 /*
223 * For some of the tunnel types such as GRE, the dstPort is not applicable
224 * since GRE does not have a L4 port. We use '0' for convenience.
225 */
85571a3d 226 if (!flowKey->ipKey.nwFrag) {
022c2040 227 UINT16 dstPort = htons(flowKey->ipKey.l4.tpDst);
885b8265
NR
228
229 ASSERT(flowKey->ipKey.nwProto != IPPROTO_GRE || dstPort == 0);
230
231 tunnelVport =
232 OvsFindTunnelVportByDstPortAndNWProto(ovsFwdCtx->switchContext,
233 dstPort,
234 flowKey->ipKey.nwProto);
235 if (tunnelVport) {
236 switch(tunnelVport->ovsType) {
237 case OVS_VPORT_TYPE_STT:
85571a3d 238 ovsActionStats.rxStt++;
885b8265
NR
239 break;
240 case OVS_VPORT_TYPE_VXLAN:
85571a3d 241 ovsActionStats.rxVxlan++;
885b8265 242 break;
47c3123d
YL
243 case OVS_VPORT_TYPE_GENEVE:
244 ovsActionStats.rxGeneve++;
245 break;
885b8265
NR
246 case OVS_VPORT_TYPE_GRE:
247 ovsActionStats.rxGre++;
248 break;
85571a3d 249 }
022c2040 250 }
c803536e
SS
251 }
252
253 // We might get tunnel packets even before the tunnel gets initialized.
254 if (tunnelVport) {
255 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
256 ovsFwdCtx->tunnelRxNic = tunnelVport;
257 return TRUE;
258 }
259
260 return FALSE;
261}
262
263/*
264 * --------------------------------------------------------------------------
265 * OvsDetectTunnelPkt --
b2d9d3e8
NR
266 * Utility function to detect if a packet is to be subjected to
267 * tunneling (Tx) or de-tunneling (Rx). Various factors such as source
268 * port, destination port, packet contents, and previously setup tunnel
269 * context are used.
c803536e
SS
270 *
271 * Result:
b2d9d3e8
NR
272 * True - If the packet is to be subjected to tunneling.
273 * In case of invalid tunnel context, the tunneling functionality is
274 * a no-op and is completed within this function itself by consuming
275 * all of the tunneling context.
276 * False - If not a tunnel packet or tunnel type not supported. Caller should
277 * process the packet as a non-tunnel packet.
c803536e
SS
278 * --------------------------------------------------------------------------
279 */
280static __inline BOOLEAN
281OvsDetectTunnelPkt(OvsForwardingContext *ovsFwdCtx,
282 const POVS_VPORT_ENTRY dstVport,
283 const OvsFlowKey *flowKey)
284{
c803536e 285 if (OvsIsInternalVportType(dstVport->ovsType)) {
b2d9d3e8
NR
286 /*
287 * Rx:
288 * The source of NBL during tunneling Rx could be the external
289 * port or if it is being executed from userspace, the source port is
290 * default port.
291 */
7434992b
NR
292 BOOLEAN validSrcPort =
293 (ovsFwdCtx->fwdDetail->SourcePortId ==
294 ovsFwdCtx->switchContext->virtualExternalPortId) ||
295 (ovsFwdCtx->fwdDetail->SourcePortId ==
296 NDIS_SWITCH_DEFAULT_PORT_ID);
c803536e
SS
297
298 if (validSrcPort && OvsDetectTunnelRxPkt(ovsFwdCtx, flowKey)) {
299 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
300 ASSERT(ovsFwdCtx->tunnelRxNic != NULL);
301 return TRUE;
302 }
303 } else if (OvsIsTunnelVportType(dstVport->ovsType)) {
304 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
305 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
b2d9d3e8
NR
306
307 /*
308 * Tx:
309 * The destination port is a tunnel port. Encapsulation must be
30bc8153
NR
310 * performed only on packets that originate from:
311 * - a VIF port
312 * - a bridge-internal port (packets generated from userspace)
313 * - no port.
b2d9d3e8
NR
314 *
315 * If the packet will not be encapsulated, consume the tunnel context
316 * by clearing it.
317 */
12e888ba 318 if (ovsFwdCtx->srcVportNo != OVS_DPPORT_NUMBER_INVALID) {
429d4556
AS
319
320 POVS_VPORT_ENTRY vport = OvsFindVportByPortNo(
321 ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo);
322
30bc8153
NR
323 if (!vport ||
324 (vport->ovsType != OVS_VPORT_TYPE_NETDEV &&
325 !OvsIsBridgeInternalVport(vport))) {
429d4556
AS
326 ovsFwdCtx->tunKey.dst = 0;
327 }
b2d9d3e8
NR
328 }
329
330 /* Tunnel the packet only if tunnel context is set. */
331 if (ovsFwdCtx->tunKey.dst != 0) {
022c2040 332 switch(dstVport->ovsType) {
85571a3d
AS
333 case OVS_VPORT_TYPE_GRE:
334 ovsActionStats.txGre++;
335 break;
022c2040
EE
336 case OVS_VPORT_TYPE_VXLAN:
337 ovsActionStats.txVxlan++;
338 break;
339 case OVS_VPORT_TYPE_STT:
340 ovsActionStats.txStt++;
341 break;
47c3123d
YL
342 case OVS_VPORT_TYPE_GENEVE:
343 ovsActionStats.txGeneve++;
344 break;
022c2040 345 }
b2d9d3e8
NR
346 ovsFwdCtx->tunnelTxNic = dstVport;
347 }
348
c803536e
SS
349 return TRUE;
350 }
351
352 return FALSE;
353}
354
355
356/*
357 * --------------------------------------------------------------------------
358 * OvsAddPorts --
359 * Add the specified destination vport into the forwarding context. If the
360 * vport is a VIF/external port, it is added directly to the NBL. If it is
361 * a tunneling port, it is NOT added to the NBL.
362 *
363 * Result:
364 * NDIS_STATUS_SUCCESS on success
365 * Other NDIS_STATUS upon failure.
366 * --------------------------------------------------------------------------
367 */
368static __inline NDIS_STATUS
369OvsAddPorts(OvsForwardingContext *ovsFwdCtx,
370 OvsFlowKey *flowKey,
371 NDIS_SWITCH_PORT_ID dstPortId,
372 BOOLEAN preserveVLAN,
373 BOOLEAN preservePriority)
374{
375 POVS_VPORT_ENTRY vport;
376 PNDIS_SWITCH_PORT_DESTINATION fwdPort;
377 NDIS_STATUS status;
378 POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext;
379
380 /*
381 * We hold the dispatch lock that protects the list of vports, so vports
382 * validated here can be added as destinations safely before we call into
383 * NDIS.
384 *
385 * Some of the vports can be tunnelled ports as well in which case
386 * they should be added to a separate list of tunnelled destination ports
387 * instead of the VIF ports. The context for the tunnel is settable
388 * in OvsForwardingContext.
389 */
390 vport = OvsFindVportByPortNo(ovsFwdCtx->switchContext, dstPortId);
391 if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
392 /*
393 * There may be some latency between a port disappearing, and userspace
394 * updating the recalculated flows. In the meantime, handle invalid
395 * ports gracefully.
396 */
397 ovsActionStats.noVport++;
398 return NDIS_STATUS_SUCCESS;
399 }
400 ASSERT(vport->nicState == NdisSwitchNicStateConnected);
401 vport->stats.txPackets++;
402 vport->stats.txBytes +=
403 NET_BUFFER_DATA_LENGTH(NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl));
404
7434992b
NR
405 if (OvsIsBridgeInternalVport(vport)) {
406 return NDIS_STATUS_SUCCESS;
407 }
408
c803536e 409 if (OvsDetectTunnelPkt(ovsFwdCtx, vport, flowKey)) {
c803536e
SS
410 return NDIS_STATUS_SUCCESS;
411 }
412
413 if (ovsFwdCtx->destPortsSizeOut == ovsFwdCtx->destPortsSizeIn) {
414 if (ovsFwdCtx->destPortsSizeIn == 0) {
415 ASSERT(ovsFwdCtx->destinationPorts == NULL);
416 ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0);
417 status =
418 switchContext->NdisSwitchHandlers.GrowNetBufferListDestinations(
419 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
420 OVS_DEST_PORTS_ARRAY_MIN_SIZE,
421 &ovsFwdCtx->destinationPorts);
422 if (status != NDIS_STATUS_SUCCESS) {
423 ovsActionStats.cannotGrowDest++;
424 return status;
425 }
426 ovsFwdCtx->destPortsSizeIn =
427 ovsFwdCtx->fwdDetail->NumAvailableDestinations;
428 ASSERT(ovsFwdCtx->destinationPorts);
429 } else {
430 ASSERT(ovsFwdCtx->destinationPorts != NULL);
431 /*
432 * NumElements:
433 * A ULONG value that specifies the total number of
434 * NDIS_SWITCH_PORT_DESTINATION elements in the
435 * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure.
436 *
437 * NumDestinations:
438 * A ULONG value that specifies the number of
439 * NDIS_SWITCH_PORT_DESTINATION elements in the
440 * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure that
441 * specify port destinations.
442 *
443 * NumAvailableDestinations:
444 * A value that specifies the number of unused extensible switch
445 * destination ports elements within an NET_BUFFER_LIST structure.
446 */
447 ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
448 ovsFwdCtx->destPortsSizeIn);
449 ASSERT(ovsFwdCtx->destinationPorts->NumDestinations ==
450 ovsFwdCtx->destPortsSizeOut -
451 ovsFwdCtx->fwdDetail->NumAvailableDestinations);
452 ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations > 0);
453 /*
454 * Before we grow the array of destination ports, the current set
455 * of ports needs to be committed. Only the ports added since the
456 * last commit need to be part of the new update.
457 */
458 status = switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations(
459 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
460 ovsFwdCtx->fwdDetail->NumAvailableDestinations,
461 ovsFwdCtx->destinationPorts);
462 if (status != NDIS_STATUS_SUCCESS) {
463 ovsActionStats.cannotGrowDest++;
464 return status;
465 }
466 ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
467 ovsFwdCtx->destPortsSizeIn);
468 ASSERT(ovsFwdCtx->destinationPorts->NumDestinations ==
469 ovsFwdCtx->destPortsSizeOut);
470 ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0);
471
472 status = switchContext->NdisSwitchHandlers.GrowNetBufferListDestinations(
473 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
474 ovsFwdCtx->destPortsSizeIn, &ovsFwdCtx->destinationPorts);
475 if (status != NDIS_STATUS_SUCCESS) {
476 ovsActionStats.cannotGrowDest++;
477 return status;
478 }
479 ASSERT(ovsFwdCtx->destinationPorts != NULL);
480 ovsFwdCtx->destPortsSizeIn <<= 1;
481 }
482 }
483
484 ASSERT(ovsFwdCtx->destPortsSizeOut < ovsFwdCtx->destPortsSizeIn);
485 fwdPort =
486 NDIS_SWITCH_PORT_DESTINATION_AT_ARRAY_INDEX(ovsFwdCtx->destinationPorts,
487 ovsFwdCtx->destPortsSizeOut);
488
489 fwdPort->PortId = vport->portId;
490 fwdPort->NicIndex = vport->nicIndex;
491 fwdPort->IsExcluded = 0;
492 fwdPort->PreserveVLAN = preserveVLAN;
493 fwdPort->PreservePriority = preservePriority;
494 ovsFwdCtx->destPortsSizeOut += 1;
495
496 return NDIS_STATUS_SUCCESS;
497}
498
499
500/*
501 * --------------------------------------------------------------------------
502 * OvsClearTunTxCtx --
503 * Utility function to clear tx tunneling context.
504 * --------------------------------------------------------------------------
505 */
506static __inline VOID
507OvsClearTunTxCtx(OvsForwardingContext *ovsFwdCtx)
508{
509 ovsFwdCtx->tunnelTxNic = NULL;
510 ovsFwdCtx->tunKey.dst = 0;
511}
512
513
514/*
515 * --------------------------------------------------------------------------
516 * OvsClearTunRxCtx --
517 * Utility function to clear rx tunneling context.
518 * --------------------------------------------------------------------------
519 */
520static __inline VOID
521OvsClearTunRxCtx(OvsForwardingContext *ovsFwdCtx)
522{
523 ovsFwdCtx->tunnelRxNic = NULL;
524 ovsFwdCtx->tunKey.dst = 0;
525}
526
527
528/*
529 * --------------------------------------------------------------------------
530 * OvsCompleteNBLForwardingCtx --
531 * This utility function is responsible for freeing/completing an NBL - either
532 * by adding it to a completion list or by freeing it.
533 *
534 * Side effects:
535 * It also resets the necessary fields in 'ovsFwdCtx'.
536 * --------------------------------------------------------------------------
537 */
538static __inline VOID
539OvsCompleteNBLForwardingCtx(OvsForwardingContext *ovsFwdCtx,
540 PCWSTR dropReason)
541{
542 NDIS_STRING filterReason;
543
544 RtlInitUnicodeString(&filterReason, dropReason);
545 if (ovsFwdCtx->completionList) {
546 OvsAddPktCompletionList(ovsFwdCtx->completionList, TRUE,
547 ovsFwdCtx->fwdDetail->SourcePortId, ovsFwdCtx->curNbl, 1,
548 &filterReason);
549 ovsFwdCtx->curNbl = NULL;
550 } else {
551 /* If there is no completionList, we assume this is ovs created NBL */
552 ovsFwdCtx->curNbl = OvsCompleteNBL(ovsFwdCtx->switchContext,
553 ovsFwdCtx->curNbl, TRUE);
554 ASSERT(ovsFwdCtx->curNbl == NULL);
555 }
556 /* XXX: these can be made debug only to save cycles. Ideally the pipeline
557 * using these fields should reset the values at the end of the pipeline. */
558 ovsFwdCtx->destPortsSizeOut = 0;
559 ovsFwdCtx->tunnelTxNic = NULL;
560 ovsFwdCtx->tunnelRxNic = NULL;
561}
562
563/*
564 * --------------------------------------------------------------------------
565 * OvsDoFlowLookupOutput --
566 * Function to be used for the second stage of a tunneling workflow, ie.:
567 * - On the encapsulated packet on Tx path, to do a flow extract, flow
568 * lookup and excuting the actions.
569 * - On the decapsulated packet on Rx path, to do a flow extract, flow
570 * lookup and excuting the actions.
571 *
572 * XXX: It is assumed that the NBL in 'ovsFwdCtx' is owned by OVS. This is
573 * until the new buffer management framework is adopted.
574 *
575 * Side effects:
576 * The NBL in 'ovsFwdCtx' is consumed.
577 * --------------------------------------------------------------------------
578 */
579static __inline NDIS_STATUS
580OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx)
581{
ee25964a
SV
582 OvsFlowKey key = { 0 };
583 OvsFlow *flow = NULL;
584 UINT64 hash = 0;
585 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
c803536e
SS
586 POVS_VPORT_ENTRY vport =
587 OvsFindVportByPortNo(ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo);
588 if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
c803536e
SS
589 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
590 L"OVS-Dropped due to internal/tunnel port removal");
591 ovsActionStats.noVport++;
592 return NDIS_STATUS_SUCCESS;
593 }
594 ASSERT(vport->nicState == NdisSwitchNicStateConnected);
595
596 /* Assert that in the Rx direction, key is always setup. */
597 ASSERT(ovsFwdCtx->tunnelRxNic == NULL || ovsFwdCtx->tunKey.dst != 0);
4c470e88
SV
598 status =
599 OvsExtractFlow(ovsFwdCtx->curNbl, ovsFwdCtx->srcVportNo,
600 &key, &ovsFwdCtx->layers,
601 ovsFwdCtx->tunKey.dst != 0 ? &ovsFwdCtx->tunKey : NULL);
c803536e
SS
602 if (status != NDIS_STATUS_SUCCESS) {
603 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
604 L"OVS-Flow extract failed");
605 ovsActionStats.failedFlowExtract++;
606 return status;
607 }
608
609 flow = OvsLookupFlow(&ovsFwdCtx->switchContext->datapath, &key, &hash, FALSE);
610 if (flow) {
611 OvsFlowUsed(flow, ovsFwdCtx->curNbl, &ovsFwdCtx->layers);
612 ovsFwdCtx->switchContext->datapath.hits++;
ee25964a
SV
613 status = OvsDoExecuteActions(ovsFwdCtx->switchContext,
614 ovsFwdCtx->completionList,
615 ovsFwdCtx->curNbl,
616 ovsFwdCtx->srcVportNo,
617 ovsFwdCtx->sendFlags,
618 &key, &hash, &ovsFwdCtx->layers,
619 flow->actions, flow->actionsLen);
c803536e
SS
620 ovsFwdCtx->curNbl = NULL;
621 } else {
622 LIST_ENTRY missedPackets;
623 UINT32 num = 0;
624 ovsFwdCtx->switchContext->datapath.misses++;
625 InitializeListHead(&missedPackets);
4c470e88 626 status = OvsCreateAndAddPackets(NULL, 0, OVS_PACKET_CMD_MISS, vport,
640ebde7 627 &key,ovsFwdCtx->curNbl,
a422ea1d 628 FALSE, &ovsFwdCtx->layers,
640ebde7 629 ovsFwdCtx->switchContext, &missedPackets, &num);
c803536e 630 if (num) {
4a3c9b70 631 OvsQueuePackets(&missedPackets, num);
c803536e
SS
632 }
633 if (status == NDIS_STATUS_SUCCESS) {
634 /* Complete the packet since it was copied to user buffer. */
635 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
636 L"OVS-Dropped since packet was copied to userspace");
637 ovsActionStats.flowMiss++;
638 status = NDIS_STATUS_SUCCESS;
639 } else {
640 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
641 L"OVS-Dropped due to failure to queue to userspace");
642 status = NDIS_STATUS_FAILURE;
643 ovsActionStats.failedFlowMiss++;
644 }
645 }
646
647 return status;
648}
649
650/*
651 * --------------------------------------------------------------------------
652 * OvsTunnelPortTx --
653 * The start function for Tx tunneling - encapsulates the packet, and
654 * outputs the packet on the PIF bridge.
655 *
656 * Side effects:
657 * The NBL in 'ovsFwdCtx' is consumed.
658 * --------------------------------------------------------------------------
659 */
660static __inline NDIS_STATUS
661OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx)
662{
663 NDIS_STATUS status = NDIS_STATUS_FAILURE;
664 PNET_BUFFER_LIST newNbl = NULL;
665
666 /*
667 * Setup the source port to be the internal port to as to facilitate the
668 * second OvsLookupFlow.
669 */
022c2040
EE
670 if (ovsFwdCtx->switchContext->internalVport == NULL ||
671 ovsFwdCtx->switchContext->virtualExternalVport == NULL) {
ad0d70d2
EE
672 OvsClearTunTxCtx(ovsFwdCtx);
673 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
022c2040 674 L"OVS-Dropped since either internal or external port is absent");
ad0d70d2
EE
675 return NDIS_STATUS_FAILURE;
676 }
c803536e
SS
677 ovsFwdCtx->srcVportNo =
678 ((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->portNo;
679
680 ovsFwdCtx->fwdDetail->SourcePortId = ovsFwdCtx->switchContext->internalPortId;
681 ovsFwdCtx->fwdDetail->SourceNicIndex =
682 ((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->nicIndex;
683
684 /* Do the encap. Encap function does not consume the NBL. */
685 switch(ovsFwdCtx->tunnelTxNic->ovsType) {
85571a3d
AS
686 case OVS_VPORT_TYPE_GRE:
687 status = OvsEncapGre(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl,
688 &ovsFwdCtx->tunKey, ovsFwdCtx->switchContext,
689 &ovsFwdCtx->layers, &newNbl);
690 break;
e00afcf6 691 case OVS_VPORT_TYPE_VXLAN:
0b623ad5
NR
692 status = OvsEncapVxlan(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl,
693 &ovsFwdCtx->tunKey, ovsFwdCtx->switchContext,
c803536e
SS
694 &ovsFwdCtx->layers, &newNbl);
695 break;
022c2040
EE
696 case OVS_VPORT_TYPE_STT:
697 status = OvsEncapStt(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl,
0b623ad5 698 &ovsFwdCtx->tunKey, ovsFwdCtx->switchContext,
022c2040
EE
699 &ovsFwdCtx->layers, &newNbl);
700 break;
47c3123d
YL
701 case OVS_VPORT_TYPE_GENEVE:
702 status = OvsEncapGeneve(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl,
703 &ovsFwdCtx->tunKey, ovsFwdCtx->switchContext,
704 &ovsFwdCtx->layers, &newNbl);
705 break;
c803536e
SS
706 default:
707 ASSERT(! "Tx: Unhandled tunnel type");
708 }
709
710 /* Reset the tunnel context so that it doesn't get used after this point. */
711 OvsClearTunTxCtx(ovsFwdCtx);
712
713 if (status == NDIS_STATUS_SUCCESS) {
714 ASSERT(newNbl);
715 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
716 L"Complete after cloning NBL for encapsulation");
717 ovsFwdCtx->curNbl = newNbl;
718 status = OvsDoFlowLookupOutput(ovsFwdCtx);
719 ASSERT(ovsFwdCtx->curNbl == NULL);
720 } else {
721 /*
722 * XXX: Temporary freeing of the packet until we register a
723 * callback to IP helper.
724 */
725 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
726 L"OVS-Dropped due to encap failure");
727 ovsActionStats.failedEncap++;
728 status = NDIS_STATUS_SUCCESS;
729 }
730
731 return status;
732}
733
734/*
735 * --------------------------------------------------------------------------
736 * OvsTunnelPortRx --
737 * Decapsulate the incoming NBL based on the tunnel type and goes through
738 * the flow lookup for the inner packet.
739 *
740 * Note: IP checksum is validate here, but L4 checksum validation needs
741 * to be done by the corresponding tunnel types.
742 *
743 * Side effects:
744 * The NBL in 'ovsFwdCtx' is consumed.
745 * --------------------------------------------------------------------------
746 */
747static __inline NDIS_STATUS
748OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx)
749{
750 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
751 PNET_BUFFER_LIST newNbl = NULL;
752 POVS_VPORT_ENTRY tunnelRxVport = ovsFwdCtx->tunnelRxNic;
a422ea1d 753 PCWSTR dropReason = L"OVS-dropped due to new decap packet";
c803536e
SS
754
755 if (OvsValidateIPChecksum(ovsFwdCtx->curNbl, &ovsFwdCtx->layers)
756 != NDIS_STATUS_SUCCESS) {
757 ovsActionStats.failedChecksum++;
758 OVS_LOG_INFO("Packet dropped due to IP checksum failure.");
759 goto dropNbl;
760 }
761
022c2040
EE
762 /*
763 * Decap port functions should return a new NBL if it was copied, and
764 * this new NBL should be setup as the ovsFwdCtx->curNbl.
765 */
766
c803536e 767 switch(tunnelRxVport->ovsType) {
85571a3d
AS
768 case OVS_VPORT_TYPE_GRE:
769 status = OvsDecapGre(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
770 &ovsFwdCtx->tunKey, &newNbl);
771 break;
e00afcf6 772 case OVS_VPORT_TYPE_VXLAN:
022c2040
EE
773 status = OvsDecapVxlan(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
774 &ovsFwdCtx->tunKey, &newNbl);
775 break;
776 case OVS_VPORT_TYPE_STT:
777 status = OvsDecapStt(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
778 &ovsFwdCtx->tunKey, &newNbl);
a422ea1d
SV
779 if (status == NDIS_STATUS_SUCCESS && newNbl == NULL) {
780 /* This was an STT-LSO Fragment */
781 dropReason = L"OVS-STT segment is cached";
782 }
c803536e 783 break;
47c3123d
YL
784 case OVS_VPORT_TYPE_GENEVE:
785 status = OvsDecapGeneve(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
786 &ovsFwdCtx->tunKey, &newNbl);
787 break;
c803536e
SS
788 default:
789 OVS_LOG_ERROR("Rx: Unhandled tunnel type: %d\n",
790 tunnelRxVport->ovsType);
791 ASSERT(! "Rx: Unhandled tunnel type");
792 status = NDIS_STATUS_NOT_SUPPORTED;
793 }
794
795 if (status != NDIS_STATUS_SUCCESS) {
796 ovsActionStats.failedDecap++;
797 goto dropNbl;
798 }
799
800 /*
801 * tunnelRxNic and other fields will be cleared, re-init the context
802 * before usage.
803 */
a422ea1d 804 OvsCompleteNBLForwardingCtx(ovsFwdCtx, dropReason);
c803536e 805
a422ea1d
SV
806 if (newNbl) {
807 /* Decapsulated packet is in a new NBL */
808 ovsFwdCtx->tunnelRxNic = tunnelRxVport;
809 OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
810 newNbl, tunnelRxVport->portNo, 0,
811 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
812 ovsFwdCtx->completionList,
813 &ovsFwdCtx->layers, FALSE);
c803536e 814
a422ea1d
SV
815 /*
816 * Set the NBL's SourcePortId and SourceNicIndex to default values to
817 * keep NDIS happy when we forward the packet.
818 */
819 ovsFwdCtx->fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
820 ovsFwdCtx->fwdDetail->SourceNicIndex = 0;
c803536e 821
a422ea1d
SV
822 status = OvsDoFlowLookupOutput(ovsFwdCtx);
823 }
c803536e
SS
824 ASSERT(ovsFwdCtx->curNbl == NULL);
825 OvsClearTunRxCtx(ovsFwdCtx);
826
827 return status;
828
829dropNbl:
830 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
831 L"OVS-dropped due to decap failure");
832 OvsClearTunRxCtx(ovsFwdCtx);
833 return status;
834}
835
836
837/*
838 * --------------------------------------------------------------------------
839 * OvsOutputForwardingCtx --
840 * This function outputs an NBL to NDIS or to a tunneling pipeline based on
841 * the ports added so far into 'ovsFwdCtx'.
842 *
843 * Side effects:
844 * This function consumes the NBL - either by forwarding it successfully to
845 * NDIS, or adding it to the completion list in 'ovsFwdCtx', or freeing it.
846 *
847 * Also makes sure that the list of destination ports - tunnel or otherwise is
848 * drained.
849 * --------------------------------------------------------------------------
850 */
851static __inline NDIS_STATUS
852OvsOutputForwardingCtx(OvsForwardingContext *ovsFwdCtx)
853{
854 NDIS_STATUS status = STATUS_SUCCESS;
855 POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext;
eda457f1 856 PCWSTR dropReason;
c803536e
SS
857
858 /*
859 * Handle the case where the some of the destination ports are tunneled
860 * ports - the non-tunneled ports get a unmodified copy of the NBL, and the
861 * tunneling pipeline starts when we output the packet to tunneled port.
862 */
863 if (ovsFwdCtx->destPortsSizeOut > 0) {
864 PNET_BUFFER_LIST newNbl = NULL;
865 PNET_BUFFER nb;
866 UINT32 portsToUpdate =
867 ovsFwdCtx->fwdDetail->NumAvailableDestinations -
868 (ovsFwdCtx->destPortsSizeIn - ovsFwdCtx->destPortsSizeOut);
869
870 ASSERT(ovsFwdCtx->destinationPorts != NULL);
871
872 /*
873 * Create a copy of the packet in order to do encap on it later. Also,
874 * don't copy the offload context since the encap'd packet has a
875 * different set of headers. This will change when we implement offloads
876 * before doing encapsulation.
877 */
878 if (ovsFwdCtx->tunnelTxNic != NULL || ovsFwdCtx->tunnelRxNic != NULL) {
879 nb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
880 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
881 0, 0, TRUE /*copy NBL info*/);
882 if (newNbl == NULL) {
883 status = NDIS_STATUS_RESOURCES;
884 ovsActionStats.noCopiedNbl++;
eda457f1 885 dropReason = L"Dropped due to failure to create NBL copy.";
c803536e
SS
886 goto dropit;
887 }
888 }
889
890 /* It does not seem like we'll get here unless 'portsToUpdate' > 0. */
891 ASSERT(portsToUpdate > 0);
892 status = switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations(
893 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
894 portsToUpdate, ovsFwdCtx->destinationPorts);
895 if (status != NDIS_STATUS_SUCCESS) {
896 OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE);
897 ovsActionStats.cannotGrowDest++;
eda457f1 898 dropReason = L"Dropped due to failure to update destinations.";
c803536e
SS
899 goto dropit;
900 }
901
902 OvsSendNBLIngress(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
903 ovsFwdCtx->sendFlags);
904 /* End this pipeline by resetting the corresponding context. */
905 ovsFwdCtx->destPortsSizeOut = 0;
906 ovsFwdCtx->curNbl = NULL;
907 if (newNbl) {
908 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
909 newNbl, ovsFwdCtx->srcVportNo, 0,
910 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
911 ovsFwdCtx->completionList,
912 &ovsFwdCtx->layers, FALSE);
913 if (status != NDIS_STATUS_SUCCESS) {
eda457f1 914 dropReason = L"Dropped due to resouces.";
c803536e
SS
915 goto dropit;
916 }
917 }
918 }
919
920 if (ovsFwdCtx->tunnelTxNic != NULL) {
921 status = OvsTunnelPortTx(ovsFwdCtx);
922 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
923 ASSERT(ovsFwdCtx->tunKey.dst == 0);
924 } else if (ovsFwdCtx->tunnelRxNic != NULL) {
925 status = OvsTunnelPortRx(ovsFwdCtx);
926 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
927 ASSERT(ovsFwdCtx->tunKey.dst == 0);
928 }
929 ASSERT(ovsFwdCtx->curNbl == NULL);
930
931 return status;
932
933dropit:
934 if (status != NDIS_STATUS_SUCCESS) {
eda457f1 935 OvsCompleteNBLForwardingCtx(ovsFwdCtx, dropReason);
c803536e
SS
936 }
937
938 return status;
939}
940
941
942/*
943 * --------------------------------------------------------------------------
944 * OvsLookupFlowOutput --
945 * Utility function for external callers to do flow extract, lookup,
946 * actions execute on a given NBL.
947 *
948 * Note: If this is being used from a callback function, make sure that the
949 * arguments specified are still valid in the asynchronous context.
950 *
951 * Side effects:
952 * This function consumes the NBL.
953 * --------------------------------------------------------------------------
954 */
955VOID
956OvsLookupFlowOutput(POVS_SWITCH_CONTEXT switchContext,
957 VOID *compList,
958 PNET_BUFFER_LIST curNbl)
959{
960 NDIS_STATUS status;
961 OvsForwardingContext ovsFwdCtx;
962 POVS_VPORT_ENTRY internalVport =
963 (POVS_VPORT_ENTRY)switchContext->internalVport;
964
965 /* XXX: make sure comp list was not a stack variable previously. */
966 OvsCompletionList *completionList = (OvsCompletionList *)compList;
967
968 /*
969 * XXX: can internal port disappear while we are busy doing ARP resolution?
970 * It could, but will we get this callback from IP helper in that case. Need
971 * to check.
972 */
973 ASSERT(switchContext->internalVport);
974 status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl,
975 internalVport->portNo, 0,
976 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl),
977 completionList, NULL, TRUE);
978 if (status != NDIS_STATUS_SUCCESS) {
979 OvsCompleteNBLForwardingCtx(&ovsFwdCtx,
980 L"OVS-Dropped due to resources");
981 return;
982 }
983
984 ASSERT(FALSE);
985 /*
986 * XXX: We need to acquire the dispatch lock and the datapath lock.
987 */
988
989 OvsDoFlowLookupOutput(&ovsFwdCtx);
990}
991
992
993/*
994 * --------------------------------------------------------------------------
995 * OvsOutputBeforeSetAction --
996 * Function to be called to complete one set of actions on an NBL, before
997 * we start the next one.
998 * --------------------------------------------------------------------------
999 */
1000static __inline NDIS_STATUS
1001OvsOutputBeforeSetAction(OvsForwardingContext *ovsFwdCtx)
1002{
1003 PNET_BUFFER_LIST newNbl;
5278f698 1004 NDIS_STATUS status;
c803536e
SS
1005
1006 /*
1007 * Create a copy and work on the copy after this point. The original NBL is
1008 * forwarded. One reason to not use the copy for forwarding is that
1009 * ports have already been added to the original NBL, and it might be
1010 * inefficient/impossible to remove/re-add them to the copy. There's no
1011 * notion of removing the ports, the ports need to be marked as
1012 * "isExcluded". There's seems no real advantage to retaining the original
1013 * and sending out the copy instead.
1014 *
1015 * XXX: We are copying the offload context here. This is to handle actions
1016 * such as:
1017 * outport, pop_vlan(), outport, push_vlan(), outport
1018 *
1019 * copy size needs to include inner ether + IP + TCP, need to revisit
1020 * if we support IP options.
1021 * XXX Head room needs to include the additional encap.
1022 * XXX copySize check is not considering multiple NBs.
1023 */
c803536e
SS
1024 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
1025 0, 0, TRUE /*copy NBL info*/);
1026
1027 ASSERT(ovsFwdCtx->destPortsSizeOut > 0 ||
1028 ovsFwdCtx->tunnelTxNic != NULL || ovsFwdCtx->tunnelRxNic != NULL);
1029
ba472491
AS
1030 /* Send the original packet out and save the original source port number */
1031 UINT32 tempVportNo = ovsFwdCtx->srcVportNo;
c803536e
SS
1032 status = OvsOutputForwardingCtx(ovsFwdCtx);
1033 ASSERT(ovsFwdCtx->curNbl == NULL);
1034 ASSERT(ovsFwdCtx->destPortsSizeOut == 0);
1035 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
1036 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
1037
1038 /* If we didn't make a copy, can't continue. */
1039 if (newNbl == NULL) {
1040 ovsActionStats.noCopiedNbl++;
1041 return NDIS_STATUS_RESOURCES;
1042 }
1043
1044 /* Finish the remaining actions with the new NBL */
1045 if (status != NDIS_STATUS_SUCCESS) {
1046 OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE);
1047 } else {
1048 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
ba472491 1049 newNbl, tempVportNo, 0,
c803536e
SS
1050 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
1051 ovsFwdCtx->completionList,
1052 &ovsFwdCtx->layers, FALSE);
1053 }
1054
1055 return status;
1056}
1057
1058
1059/*
1060 * --------------------------------------------------------------------------
5874d571
SV
1061 * OvsPopFieldInPacketBuf --
1062 * Function to pop a specified field of length 'shiftLength' located at
1063 * 'shiftOffset' from the ethernet header. The data on the left of the
1064 * 'shiftOffset' is right shifted.
1065 *
1066 * Returns a pointer to the new start in 'bufferData'.
c803536e
SS
1067 * --------------------------------------------------------------------------
1068 */
1069static __inline NDIS_STATUS
5874d571
SV
1070OvsPopFieldInPacketBuf(OvsForwardingContext *ovsFwdCtx,
1071 UINT32 shiftOffset,
1072 UINT32 shiftLength,
1073 PUINT8 *bufferData)
c803536e
SS
1074{
1075 PNET_BUFFER curNb;
1076 PMDL curMdl;
1077 PUINT8 bufferStart;
c803536e
SS
1078 UINT32 packetLen, mdlLen;
1079 PNET_BUFFER_LIST newNbl;
1080 NDIS_STATUS status;
5874d571 1081 PUINT8 tempBuffer[ETH_HEADER_LENGTH];
c803536e 1082
5874d571 1083 ASSERT(shiftOffset > ETH_ADDR_LENGTH);
c803536e
SS
1084
1085 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
1086 0, 0, TRUE /* copy NBL info */);
1087 if (!newNbl) {
1088 ovsActionStats.noCopiedNbl++;
1089 return NDIS_STATUS_RESOURCES;
1090 }
1091
1092 /* Complete the original NBL and create a copy to modify. */
1093 OvsCompleteNBLForwardingCtx(ovsFwdCtx, L"OVS-Dropped due to copy");
1094
5874d571
SV
1095 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext, newNbl,
1096 ovsFwdCtx->srcVportNo, 0,
c803536e
SS
1097 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
1098 NULL, &ovsFwdCtx->layers, FALSE);
1099 if (status != NDIS_STATUS_SUCCESS) {
1100 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1101 L"Dropped due to resouces");
1102 return NDIS_STATUS_RESOURCES;
1103 }
1104
1105 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1106 packetLen = NET_BUFFER_DATA_LENGTH(curNb);
1107 ASSERT(curNb->Next == NULL);
1108 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1109 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1110 if (!bufferStart) {
1111 return NDIS_STATUS_RESOURCES;
1112 }
1113 mdlLen -= NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
5874d571
SV
1114 /* Bail out if L2 + shiftLength is not contiguous in the first buffer. */
1115 if (MIN(packetLen, mdlLen) < sizeof(EthHdr) + shiftLength) {
c803536e
SS
1116 ASSERT(FALSE);
1117 return NDIS_STATUS_FAILURE;
1118 }
1119 bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
5874d571
SV
1120 RtlCopyMemory(tempBuffer, bufferStart, shiftOffset);
1121 RtlCopyMemory(bufferStart + shiftLength, tempBuffer, shiftOffset);
c803536e
SS
1122 NdisAdvanceNetBufferDataStart(curNb, shiftLength, FALSE, NULL);
1123
5874d571
SV
1124 if (bufferData) {
1125 *bufferData = bufferStart + shiftLength;
1126 }
1127
1128 return NDIS_STATUS_SUCCESS;
1129}
1130
1131
1132/*
1133 * --------------------------------------------------------------------------
1134 * OvsPopVlanInPktBuf --
1135 * Function to pop a VLAN tag when the tag is in the packet buffer.
1136 * --------------------------------------------------------------------------
1137 */
1138static __inline NDIS_STATUS
1139OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx)
1140{
1141 /*
1142 * Declare a dummy vlanTag structure since we need to compute the size
1143 * of shiftLength. The NDIS one is a unionized structure.
1144 */
1145 NDIS_PACKET_8021Q_INFO vlanTag = {0};
1146 UINT32 shiftLength = sizeof(vlanTag.TagHeader);
1147 UINT32 shiftOffset = sizeof(DL_EUI48) + sizeof(DL_EUI48);
1148
1149 return OvsPopFieldInPacketBuf(ovsFwdCtx, shiftOffset, shiftLength, NULL);
1150}
1151
1152
1153/*
1154 * --------------------------------------------------------------------------
1155 * OvsActionMplsPop --
1156 * Function to pop the first MPLS label from the current packet.
1157 * --------------------------------------------------------------------------
1158 */
1159static __inline NDIS_STATUS
1160OvsActionMplsPop(OvsForwardingContext *ovsFwdCtx,
1161 ovs_be16 ethertype)
1162{
5278f698 1163 NDIS_STATUS status;
5874d571
SV
1164 OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers;
1165 EthHdr *ethHdr = NULL;
1166
1167 status = OvsPopFieldInPacketBuf(ovsFwdCtx, sizeof(*ethHdr),
1168 MPLS_HLEN, (PUINT8*)&ethHdr);
1169 if (status == NDIS_STATUS_SUCCESS) {
1170 if (ethHdr && OvsEthertypeIsMpls(ethHdr->Type)) {
1171 ethHdr->Type = ethertype;
1172 }
1173
1174 layers->l3Offset -= MPLS_HLEN;
1175 layers->l4Offset -= MPLS_HLEN;
1176 }
1177
1178 return status;
1179}
1180
1181
1182/*
1183 * --------------------------------------------------------------------------
1184 * OvsActionMplsPush --
1185 * Function to push the MPLS label into the current packet.
1186 * --------------------------------------------------------------------------
1187 */
1188static __inline NDIS_STATUS
1189OvsActionMplsPush(OvsForwardingContext *ovsFwdCtx,
1190 const struct ovs_action_push_mpls *mpls)
1191{
1192 NDIS_STATUS status;
1193 PNET_BUFFER curNb = NULL;
1194 PMDL curMdl = NULL;
1195 PUINT8 bufferStart = NULL;
1196 OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers;
1197 EthHdr *ethHdr = NULL;
1198 MPLSHdr *mplsHdr = NULL;
1199 UINT32 mdlLen = 0, curMdlOffset = 0;
1200 PNET_BUFFER_LIST newNbl;
1201
1202 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
1203 layers->l3Offset, MPLS_HLEN, TRUE);
1204 if (!newNbl) {
1205 ovsActionStats.noCopiedNbl++;
1206 return NDIS_STATUS_RESOURCES;
1207 }
1208 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1209 L"Complete after partial copy.");
1210
1211 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
1212 newNbl, ovsFwdCtx->srcVportNo, 0,
1213 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
1214 NULL, &ovsFwdCtx->layers, FALSE);
1215 if (status != NDIS_STATUS_SUCCESS) {
1216 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1217 L"OVS-Dropped due to resources");
1218 return NDIS_STATUS_RESOURCES;
1219 }
1220
1221 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1222 ASSERT(curNb->Next == NULL);
1223
1224 status = NdisRetreatNetBufferDataStart(curNb, MPLS_HLEN, 0, NULL);
1225 if (status != NDIS_STATUS_SUCCESS) {
1226 return status;
1227 }
1228
1229 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1230 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1231 if (!curMdl) {
1232 ovsActionStats.noResource++;
1233 return NDIS_STATUS_RESOURCES;
1234 }
1235
1236 curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1237 mdlLen -= curMdlOffset;
1238 ASSERT(mdlLen >= MPLS_HLEN);
1239
1240 ethHdr = (EthHdr *)(bufferStart + curMdlOffset);
1241 RtlMoveMemory(ethHdr, (UINT8*)ethHdr + MPLS_HLEN, sizeof(*ethHdr));
1242 ethHdr->Type = mpls->mpls_ethertype;
1243
1244 mplsHdr = (MPLSHdr *)(ethHdr + 1);
1245 mplsHdr->lse = mpls->mpls_lse;
1246
1247 layers->l3Offset += MPLS_HLEN;
1248 layers->l4Offset += MPLS_HLEN;
1249
c803536e
SS
1250 return NDIS_STATUS_SUCCESS;
1251}
1252
c803536e
SS
1253/*
1254 *----------------------------------------------------------------------------
1255 * OvsUpdateEthHeader --
1256 * Updates the ethernet header in ovsFwdCtx.curNbl inline based on the
1257 * specified key.
1258 *----------------------------------------------------------------------------
1259 */
1260static __inline NDIS_STATUS
1261OvsUpdateEthHeader(OvsForwardingContext *ovsFwdCtx,
1262 const struct ovs_key_ethernet *ethAttr)
1263{
1264 PNET_BUFFER curNb;
1265 PMDL curMdl;
1266 PUINT8 bufferStart;
1267 EthHdr *ethHdr;
1268 UINT32 packetLen, mdlLen;
1269
1270 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1271 ASSERT(curNb->Next == NULL);
1272 packetLen = NET_BUFFER_DATA_LENGTH(curNb);
1273 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1274 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1275 if (!bufferStart) {
1276 ovsActionStats.noResource++;
1277 return NDIS_STATUS_RESOURCES;
1278 }
1279 mdlLen -= NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1280 ASSERT(mdlLen > 0);
1281 /* Bail out if the L2 header is not in a contiguous buffer. */
1282 if (MIN(packetLen, mdlLen) < sizeof *ethHdr) {
1283 ASSERT(FALSE);
1284 return NDIS_STATUS_FAILURE;
1285 }
1286 ethHdr = (EthHdr *)(bufferStart + NET_BUFFER_CURRENT_MDL_OFFSET(curNb));
1287
1288 RtlCopyMemory(ethHdr->Destination, ethAttr->eth_dst,
1289 sizeof ethHdr->Destination);
1290 RtlCopyMemory(ethHdr->Source, ethAttr->eth_src, sizeof ethHdr->Source);
1291
1292 return NDIS_STATUS_SUCCESS;
1293}
1294
1295/*
1296 *----------------------------------------------------------------------------
1297 * OvsUpdateIPv4Header --
1298 * Updates the IPv4 header in ovsFwdCtx.curNbl inline based on the
1299 * specified key.
1300 *----------------------------------------------------------------------------
1301 */
1302static __inline NDIS_STATUS
1303OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
1304 const struct ovs_key_ipv4 *ipAttr)
1305{
1306 PNET_BUFFER curNb;
1307 PMDL curMdl;
1308 ULONG curMdlOffset;
1309 PUINT8 bufferStart;
1310 UINT32 mdlLen, hdrSize, packetLen;
1311 OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers;
1312 NDIS_STATUS status;
1313 IPHdr *ipHdr;
1314 TCPHdr *tcpHdr = NULL;
1315 UDPHdr *udpHdr = NULL;
1316
1317 ASSERT(layers->value != 0);
1318
1319 /*
1320 * Peek into the MDL to get a handle to the IP header and if required
1321 * the TCP/UDP header as well. We check if the required headers are in one
1322 * contiguous MDL, and if not, we copy them over to one MDL.
1323 */
1324 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1325 ASSERT(curNb->Next == NULL);
1326 packetLen = NET_BUFFER_DATA_LENGTH(curNb);
1327 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1328 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1329 if (!bufferStart) {
1330 ovsActionStats.noResource++;
1331 return NDIS_STATUS_RESOURCES;
1332 }
1333 curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1334 mdlLen -= curMdlOffset;
1335 ASSERT((INT)mdlLen >= 0);
1336
1337 if (layers->isTcp || layers->isUdp) {
1338 hdrSize = layers->l4Offset +
1339 layers->isTcp ? sizeof (*tcpHdr) : sizeof (*udpHdr);
1340 } else {
1341 hdrSize = layers->l3Offset + sizeof (*ipHdr);
1342 }
1343
1344 /* Count of number of bytes of valid data there are in the first MDL. */
1345 mdlLen = MIN(packetLen, mdlLen);
1346 if (mdlLen < hdrSize) {
1347 PNET_BUFFER_LIST newNbl;
1348 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
1349 hdrSize, 0, TRUE /*copy NBL info*/);
1350 if (!newNbl) {
1351 ovsActionStats.noCopiedNbl++;
1352 return NDIS_STATUS_RESOURCES;
1353 }
1354 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1355 L"Complete after partial copy.");
1356
1357 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
1358 newNbl, ovsFwdCtx->srcVportNo, 0,
1359 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
1360 NULL, &ovsFwdCtx->layers, FALSE);
1361 if (status != NDIS_STATUS_SUCCESS) {
1362 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1363 L"OVS-Dropped due to resources");
1364 return NDIS_STATUS_RESOURCES;
1365 }
1366
1367 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1368 ASSERT(curNb->Next == NULL);
1369 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1370 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1371 if (!curMdl) {
1372 ovsActionStats.noResource++;
1373 return NDIS_STATUS_RESOURCES;
1374 }
1375 curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1376 mdlLen -= curMdlOffset;
1377 ASSERT(mdlLen >= hdrSize);
1378 }
1379
1380 ipHdr = (IPHdr *)(bufferStart + curMdlOffset + layers->l3Offset);
1381
1382 if (layers->isTcp) {
1383 tcpHdr = (TCPHdr *)(bufferStart + curMdlOffset + layers->l4Offset);
1384 } else if (layers->isUdp) {
1385 udpHdr = (UDPHdr *)(bufferStart + curMdlOffset + layers->l4Offset);
1386 }
1387
1388 /*
1389 * Adjust the IP header inline as dictated by the action, nad also update
1390 * the IP and the TCP checksum for the data modified.
1391 *
1392 * In the future, this could be optimized to make one call to
1393 * ChecksumUpdate32(). Ignoring this for now, since for the most common
1394 * case, we only update the TTL.
1395 */
1396 if (ipHdr->saddr != ipAttr->ipv4_src) {
1397 if (tcpHdr) {
1398 tcpHdr->check = ChecksumUpdate32(tcpHdr->check, ipHdr->saddr,
1399 ipAttr->ipv4_src);
1400 } else if (udpHdr && udpHdr->check) {
1401 udpHdr->check = ChecksumUpdate32(udpHdr->check, ipHdr->saddr,
1402 ipAttr->ipv4_src);
1403 }
1404
1405 if (ipHdr->check != 0) {
1406 ipHdr->check = ChecksumUpdate32(ipHdr->check, ipHdr->saddr,
1407 ipAttr->ipv4_src);
1408 }
1409 ipHdr->saddr = ipAttr->ipv4_src;
1410 }
1411 if (ipHdr->daddr != ipAttr->ipv4_dst) {
1412 if (tcpHdr) {
1413 tcpHdr->check = ChecksumUpdate32(tcpHdr->check, ipHdr->daddr,
1414 ipAttr->ipv4_dst);
1415 } else if (udpHdr && udpHdr->check) {
1416 udpHdr->check = ChecksumUpdate32(udpHdr->check, ipHdr->daddr,
1417 ipAttr->ipv4_dst);
1418 }
1419
1420 if (ipHdr->check != 0) {
1421 ipHdr->check = ChecksumUpdate32(ipHdr->check, ipHdr->daddr,
1422 ipAttr->ipv4_dst);
1423 }
1424 ipHdr->daddr = ipAttr->ipv4_dst;
1425 }
1426 if (ipHdr->protocol != ipAttr->ipv4_proto) {
1427 UINT16 oldProto = (ipHdr->protocol << 16) & 0xff00;
1428 UINT16 newProto = (ipAttr->ipv4_proto << 16) & 0xff00;
1429 if (tcpHdr) {
1430 tcpHdr->check = ChecksumUpdate16(tcpHdr->check, oldProto, newProto);
1431 } else if (udpHdr && udpHdr->check) {
1432 udpHdr->check = ChecksumUpdate16(udpHdr->check, oldProto, newProto);
1433 }
1434
1435 if (ipHdr->check != 0) {
1436 ipHdr->check = ChecksumUpdate16(ipHdr->check, oldProto, newProto);
1437 }
1438 ipHdr->protocol = ipAttr->ipv4_proto;
1439 }
1440 if (ipHdr->ttl != ipAttr->ipv4_ttl) {
1441 UINT16 oldTtl = (ipHdr->ttl) & 0xff;
1442 UINT16 newTtl = (ipAttr->ipv4_ttl) & 0xff;
1443 if (ipHdr->check != 0) {
1444 ipHdr->check = ChecksumUpdate16(ipHdr->check, oldTtl, newTtl);
1445 }
1446 ipHdr->ttl = ipAttr->ipv4_ttl;
1447 }
1448
1449 return NDIS_STATUS_SUCCESS;
1450}
1451
1452/*
1453 * --------------------------------------------------------------------------
1454 * OvsExecuteSetAction --
1455 * Executes a set() action, but storing the actions into 'ovsFwdCtx'
1456 * --------------------------------------------------------------------------
1457 */
1458static __inline NDIS_STATUS
1459OvsExecuteSetAction(OvsForwardingContext *ovsFwdCtx,
1460 OvsFlowKey *key,
1461 UINT64 *hash,
d838e577 1462 const PNL_ATTR a)
c803536e 1463{
d838e577 1464 enum ovs_key_attr type = NlAttrType(a);
c803536e
SS
1465 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
1466
1467 switch (type) {
1468 case OVS_KEY_ATTR_ETHERNET:
1469 status = OvsUpdateEthHeader(ovsFwdCtx,
d838e577 1470 NlAttrGetUnspec(a, sizeof(struct ovs_key_ethernet)));
c803536e
SS
1471 break;
1472
1473 case OVS_KEY_ATTR_IPV4:
1474 status = OvsUpdateIPv4Header(ovsFwdCtx,
d838e577 1475 NlAttrGetUnspec(a, sizeof(struct ovs_key_ipv4)));
c803536e
SS
1476 break;
1477
1478 case OVS_KEY_ATTR_TUNNEL:
1479 {
1480 OvsIPv4TunnelKey tunKey;
47c3123d
YL
1481 NTSTATUS convertStatus = OvsTunnelAttrToIPv4TunnelKey((PNL_ATTR)a, &tunKey);
1482 status = SUCCEEDED(convertStatus) ? NDIS_STATUS_SUCCESS : NDIS_STATUS_FAILURE;
c803536e
SS
1483 ASSERT(status == NDIS_STATUS_SUCCESS);
1484 tunKey.flow_hash = (uint16)(hash ? *hash : OvsHashFlow(key));
ffde5f8f 1485 tunKey.dst_port = key->ipKey.l4.tpDst;
c803536e 1486 RtlCopyMemory(&ovsFwdCtx->tunKey, &tunKey, sizeof ovsFwdCtx->tunKey);
c803536e
SS
1487 break;
1488 }
4ac06450 1489
c803536e 1490 default:
4ac06450 1491 OVS_LOG_INFO("Unhandled attribute %#x", type);
3819692e 1492 break;
c803536e
SS
1493 }
1494 return status;
1495}
1496
1497/*
1498 * --------------------------------------------------------------------------
ee25964a
SV
1499 * OvsExecuteRecirc --
1500 * The function adds a deferred action to allow the current packet, nbl,
1501 * to re-enter datapath packet processing.
1502 * --------------------------------------------------------------------------
1503 */
1504NDIS_STATUS
1505OvsExecuteRecirc(OvsForwardingContext *ovsFwdCtx,
1506 OvsFlowKey *key,
1507 const PNL_ATTR actions,
1508 int rem)
1509{
1510 POVS_DEFERRED_ACTION deferredAction = NULL;
1511 PNET_BUFFER_LIST newNbl = NULL;
1512
1513 if (!NlAttrIsLast(actions, rem)) {
1514 /*
1515 * Recirc action is the not the last action of the action list, so we
1516 * need to clone the packet.
1517 */
1518 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
1519 0, 0, TRUE /*copy NBL info*/);
1520 /*
1521 * Skip the recirc action when out of memory, but continue on with the
1522 * rest of the action list.
1523 */
1524 if (newNbl == NULL) {
1525 ovsActionStats.noCopiedNbl++;
1526 return NDIS_STATUS_SUCCESS;
1527 }
ee25964a
SV
1528 }
1529
ac933282
SV
1530 if (newNbl) {
1531 deferredAction = OvsAddDeferredActions(newNbl, key, NULL);
1532 } else {
1533 deferredAction = OvsAddDeferredActions(ovsFwdCtx->curNbl, key, NULL);
1534 }
1535
ee25964a
SV
1536 if (deferredAction) {
1537 deferredAction->key.recircId = NlAttrGetU32(actions);
1538 } else {
1539 if (newNbl) {
1540 ovsActionStats.deferredActionsQueueFull++;
1541 OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE);
1542 }
1543 }
1544
1545 return NDIS_STATUS_SUCCESS;
1546}
1547
245eedef
SV
1548/*
1549 * --------------------------------------------------------------------------
1550 * OvsExecuteHash --
1551 * The function updates datapath hash read from userspace.
1552 * --------------------------------------------------------------------------
1553 */
1554VOID
1555OvsExecuteHash(OvsFlowKey *key,
1556 const PNL_ATTR attr)
1557{
1558 struct ovs_action_hash *hash_act = NlAttrData(attr);
1559 UINT32 hash = 0;
1560
1561 hash = (UINT32)OvsHashFlow(key);
1562 hash = OvsJhashWords(&hash, 1, hash_act->hash_basis);
1563 if (!hash)
1564 hash = 1;
1565
1566 key->dpHash = hash;
1567}
1568
9d36ca82
SV
1569/*
1570 * --------------------------------------------------------------------------
1571 * OvsOutputUserspaceAction --
1572 * This function sends the packet to userspace according to nested
1573 * %OVS_USERSPACE_ATTR_* attributes.
1574 * --------------------------------------------------------------------------
1575 */
1576static __inline NDIS_STATUS
1577OvsOutputUserspaceAction(OvsForwardingContext *ovsFwdCtx,
1578 OvsFlowKey *key,
1579 const PNL_ATTR attr)
1580{
1581 NTSTATUS status = NDIS_STATUS_SUCCESS;
1582 PNL_ATTR userdataAttr;
1583 PNL_ATTR queueAttr;
1584 POVS_PACKET_QUEUE_ELEM elem;
1585 POVS_PACKET_HDR_INFO layers = &ovsFwdCtx->layers;
1586 BOOLEAN isRecv = FALSE;
1587
1588 POVS_VPORT_ENTRY vport = OvsFindVportByPortNo(ovsFwdCtx->switchContext,
1589 ovsFwdCtx->srcVportNo);
1590
1591 if (vport) {
1592 if (vport->isExternal ||
1593 OvsIsTunnelVportType(vport->ovsType)) {
1594 isRecv = TRUE;
1595 }
1596 }
1597
1598 queueAttr = NlAttrFindNested(attr, OVS_USERSPACE_ATTR_PID);
1599 userdataAttr = NlAttrFindNested(attr, OVS_USERSPACE_ATTR_USERDATA);
1600
1601 elem = OvsCreateQueueNlPacket(NlAttrData(userdataAttr),
1602 NlAttrGetSize(userdataAttr),
1603 OVS_PACKET_CMD_ACTION,
1604 vport, key, ovsFwdCtx->curNbl,
1605 NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl),
1606 isRecv,
1607 layers);
1608 if (elem) {
1609 LIST_ENTRY missedPackets;
1610 InitializeListHead(&missedPackets);
1611 InsertTailList(&missedPackets, &elem->link);
1612 OvsQueuePackets(&missedPackets, 1);
1613 } else {
1614 status = NDIS_STATUS_FAILURE;
1615 }
1616
1617 return status;
1618}
1619
1620/*
1621 * --------------------------------------------------------------------------
1622 * OvsExecuteSampleAction --
1623 * Executes actions based on probability, as specified in the nested
1624 * %OVS_SAMPLE_ATTR_* attributes.
1625 * --------------------------------------------------------------------------
1626 */
1627static __inline NDIS_STATUS
1628OvsExecuteSampleAction(OvsForwardingContext *ovsFwdCtx,
1629 OvsFlowKey *key,
1630 const PNL_ATTR attr)
1631{
1632 PNET_BUFFER_LIST newNbl = NULL;
1633 PNL_ATTR actionsList = NULL;
1634 PNL_ATTR a = NULL;
1635 INT rem = 0;
1636
1637 SRand();
1638 NL_ATTR_FOR_EACH_UNSAFE(a, rem, NlAttrData(attr), NlAttrGetSize(attr)) {
1639 switch (NlAttrType(a)) {
1640 case OVS_SAMPLE_ATTR_PROBABILITY:
1641 {
1642 UINT32 probability = NlAttrGetU32(a);
1643
1644 if (!probability || Rand() > probability) {
1645 return 0;
1646 }
1647 break;
1648 }
1649 case OVS_SAMPLE_ATTR_ACTIONS:
1650 actionsList = a;
1651 break;
1652 }
1653 }
1654
1655 if (actionsList) {
1656 rem = NlAttrGetSize(actionsList);
1657 a = (PNL_ATTR)NlAttrData(actionsList);
1658 }
1659
1660 if (!rem) {
1661 /* Actions list is empty, do nothing */
1662 return STATUS_SUCCESS;
1663 }
1664
1665 /*
1666 * The only known usage of sample action is having a single user-space
1667 * action. Treat this usage as a special case.
1668 */
1669 if (NlAttrType(a) == OVS_ACTION_ATTR_USERSPACE &&
1670 NlAttrIsLast(a, rem)) {
1671 return OvsOutputUserspaceAction(ovsFwdCtx, key, a);
1672 }
1673
1674 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
1675 0, 0, TRUE /*copy NBL info*/);
1676 if (newNbl == NULL) {
1677 /*
1678 * Skip the sample action when out of memory, but continue on with the
1679 * rest of the action list.
1680 */
1681 ovsActionStats.noCopiedNbl++;
1682 return STATUS_SUCCESS;
1683 }
1684
1685 if (!OvsAddDeferredActions(newNbl, key, a)) {
1686 OVS_LOG_INFO(
1687 "Deferred actions limit reached, dropping sample action.");
1688 OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE);
1689 }
1690
1691 return STATUS_SUCCESS;
1692}
1693
ee25964a
SV
1694/*
1695 * --------------------------------------------------------------------------
1696 * OvsDoExecuteActions --
1697 * Interpret and execute the specified 'actions' on the specified packet
c803536e
SS
1698 * 'curNbl'. The expectation is that if the packet needs to be dropped
1699 * (completed) for some reason, it is added to 'completionList' so that the
1700 * caller can complete the packet. If 'completionList' is NULL, the NBL is
1701 * assumed to be generated by OVS and freed up. Otherwise, the function
1702 * consumes the NBL by generating a NDIS send indication for the packet.
1703 *
1704 * There are one or more of "clone" NBLs that may get generated while
1705 * executing the actions. Upon any failures, the "cloned" NBLs are freed up,
1706 * and the caller does not have to worry about them.
1707 *
1708 * Success or failure is returned based on whether the specified actions
1709 * were executed successfully on the packet or not.
1710 * --------------------------------------------------------------------------
1711 */
1712NDIS_STATUS
ee25964a
SV
1713OvsDoExecuteActions(POVS_SWITCH_CONTEXT switchContext,
1714 OvsCompletionList *completionList,
1715 PNET_BUFFER_LIST curNbl,
1716 UINT32 portNo,
1717 ULONG sendFlags,
1718 OvsFlowKey *key,
1719 UINT64 *hash,
1720 OVS_PACKET_HDR_INFO *layers,
1721 const PNL_ATTR actions,
1722 INT actionsLen)
c803536e 1723{
d838e577 1724 PNL_ATTR a;
c803536e
SS
1725 INT rem;
1726 UINT32 dstPortID;
1727 OvsForwardingContext ovsFwdCtx;
1728 PCWSTR dropReason = L"";
1729 NDIS_STATUS status;
1730 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail =
1731 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl);
1732
1733 /* XXX: ASSERT that the flow table lock is held. */
1734 status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl, portNo,
1735 sendFlags, fwdDetail, completionList,
1736 layers, TRUE);
1737 if (status != NDIS_STATUS_SUCCESS) {
1738 dropReason = L"OVS-initing destination port list failed";
1739 goto dropit;
1740 }
1741
1742 if (actionsLen == 0) {
1743 dropReason = L"OVS-Dropped due to Flow action";
1744 ovsActionStats.zeroActionLen++;
1745 goto dropit;
1746 }
1747
1748 NL_ATTR_FOR_EACH_UNSAFE (a, rem, actions, actionsLen) {
d838e577 1749 switch(NlAttrType(a)) {
c803536e 1750 case OVS_ACTION_ATTR_OUTPUT:
d838e577 1751 dstPortID = NlAttrGetU32(a);
c803536e
SS
1752 status = OvsAddPorts(&ovsFwdCtx, key, dstPortID,
1753 TRUE, TRUE);
1754 if (status != NDIS_STATUS_SUCCESS) {
1755 dropReason = L"OVS-adding destination port failed";
1756 goto dropit;
1757 }
1758 break;
1759
1760 case OVS_ACTION_ATTR_PUSH_VLAN:
1761 {
1762 struct ovs_action_push_vlan *vlan;
1763 PVOID vlanTagValue;
1764 PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag;
1765
1766 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1767 || ovsFwdCtx.tunnelRxNic != NULL) {
1768 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1769 if (status != NDIS_STATUS_SUCCESS) {
1770 dropReason = L"OVS-adding destination failed";
1771 goto dropit;
1772 }
1773 }
1774
1775 vlanTagValue = NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1776 Ieee8021QNetBufferListInfo);
1777 if (vlanTagValue != NULL) {
1778 /*
1779 * XXX: We don't support double VLAN tag offload. In such cases,
1780 * we need to insert the existing one into the packet buffer,
1781 * and add the new one as offload. This will take care of
1782 * guest tag-in-tag case as well as OVS rules that specify
1783 * tag-in-tag.
1784 */
1785 } else {
1786 vlanTagValue = 0;
1787 vlanTag = (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue;
d838e577 1788 vlan = (struct ovs_action_push_vlan *)NlAttrGet((const PNL_ATTR)a);
c803536e
SS
1789 vlanTag->TagHeader.VlanId = ntohs(vlan->vlan_tci) & 0xfff;
1790 vlanTag->TagHeader.UserPriority = ntohs(vlan->vlan_tci) >> 13;
1791
1792 NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1793 Ieee8021QNetBufferListInfo) = vlanTagValue;
1794 }
1795 break;
1796 }
1797
1798 case OVS_ACTION_ATTR_POP_VLAN:
1799 {
1800 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1801 || ovsFwdCtx.tunnelRxNic != NULL) {
1802 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1803 if (status != NDIS_STATUS_SUCCESS) {
1804 dropReason = L"OVS-adding destination failed";
1805 goto dropit;
1806 }
1807 }
1808
1809 if (NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1810 Ieee8021QNetBufferListInfo) != 0) {
1811 NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1812 Ieee8021QNetBufferListInfo) = 0;
1813 } else {
1814 /*
1815 * The VLAN tag is inserted into the packet buffer. Pop the tag
1816 * by packet buffer modification.
1817 */
1818 status = OvsPopVlanInPktBuf(&ovsFwdCtx);
1819 if (status != NDIS_STATUS_SUCCESS) {
1820 dropReason = L"OVS-pop vlan action failed";
1821 goto dropit;
1822 }
1823 }
1824 break;
1825 }
1826
5874d571
SV
1827 case OVS_ACTION_ATTR_PUSH_MPLS:
1828 {
1829 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1830 || ovsFwdCtx.tunnelRxNic != NULL) {
1831 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1832 if (status != NDIS_STATUS_SUCCESS) {
1833 dropReason = L"OVS-adding destination failed";
1834 goto dropit;
1835 }
1836 }
1837
1838 status = OvsActionMplsPush(&ovsFwdCtx,
1839 (struct ovs_action_push_mpls *)NlAttrGet
1840 ((const PNL_ATTR)a));
1841 if (status != NDIS_STATUS_SUCCESS) {
1842 dropReason = L"OVS-push MPLS action failed";
1843 goto dropit;
1844 }
1845 layers->l3Offset += MPLS_HLEN;
1846 layers->l4Offset += MPLS_HLEN;
1847 break;
1848 }
1849
1850 case OVS_ACTION_ATTR_POP_MPLS:
1851 {
1852 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1853 || ovsFwdCtx.tunnelRxNic != NULL) {
1854 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1855 if (status != NDIS_STATUS_SUCCESS) {
1856 dropReason = L"OVS-adding destination failed";
1857 goto dropit;
1858 }
1859 }
1860
1861 status = OvsActionMplsPop(&ovsFwdCtx, NlAttrGetBe16(a));
1862 if (status != NDIS_STATUS_SUCCESS) {
1863 dropReason = L"OVS-pop MPLS action failed";
1864 goto dropit;
1865 }
1866 layers->l3Offset -= MPLS_HLEN;
1867 layers->l4Offset -= MPLS_HLEN;
1868 break;
1869 }
1870
245eedef
SV
1871 case OVS_ACTION_ATTR_HASH:
1872 {
1873 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1874 || ovsFwdCtx.tunnelRxNic != NULL) {
1875 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1876 if (status != NDIS_STATUS_SUCCESS) {
1877 dropReason = L"OVS-adding destination failed";
1878 goto dropit;
1879 }
1880 }
1881
1882 OvsExecuteHash(key, (const PNL_ATTR)a);
1883
1884 break;
1885 }
1886
792d377d
SV
1887 case OVS_ACTION_ATTR_CT:
1888 {
1889 if (ovsFwdCtx.destPortsSizeOut > 0
1890 || ovsFwdCtx.tunnelTxNic != NULL
1891 || ovsFwdCtx.tunnelRxNic != NULL) {
1892 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1893 if (status != NDIS_STATUS_SUCCESS) {
1894 dropReason = L"OVS-adding destination failed";
1895 goto dropit;
1896 }
1897 }
1898
1899 status = OvsExecuteConntrackAction(ovsFwdCtx.curNbl, layers,
1900 key, (const PNL_ATTR)a);
1901 if (status != NDIS_STATUS_SUCCESS) {
1902 OVS_LOG_ERROR("CT Action failed");
1903 dropReason = L"OVS-conntrack action failed";
1904 goto dropit;
1905 }
1906 break;
1907 }
1908
ee25964a
SV
1909 case OVS_ACTION_ATTR_RECIRC:
1910 {
1911 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1912 || ovsFwdCtx.tunnelRxNic != NULL) {
1913 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1914 if (status != NDIS_STATUS_SUCCESS) {
1915 dropReason = L"OVS-adding destination failed";
1916 goto dropit;
1917 }
1918 }
1919
1920 status = OvsExecuteRecirc(&ovsFwdCtx, key, (const PNL_ATTR)a, rem);
1921 if (status != NDIS_STATUS_SUCCESS) {
1922 dropReason = L"OVS-recirculation action failed";
1923 goto dropit;
1924 }
1925
1926 if (NlAttrIsLast(a, rem)) {
1927 goto exit;
1928 }
1929 break;
1930 }
1931
c803536e
SS
1932 case OVS_ACTION_ATTR_USERSPACE:
1933 {
9d36ca82
SV
1934 status = OvsOutputUserspaceAction(&ovsFwdCtx, key,
1935 (const PNL_ATTR)a);
1936 if (status != NDIS_STATUS_SUCCESS) {
c803536e
SS
1937 dropReason = L"OVS-Dropped due to failure to queue to "
1938 L"userspace";
1939 goto dropit;
1940 }
9d36ca82
SV
1941 dropReason = L"OVS-Completed since packet was copied to "
1942 L"userspace";
c803536e
SS
1943 break;
1944 }
1945 case OVS_ACTION_ATTR_SET:
1946 {
1947 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1948 || ovsFwdCtx.tunnelRxNic != NULL) {
1949 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1950 if (status != NDIS_STATUS_SUCCESS) {
1951 dropReason = L"OVS-adding destination failed";
1952 goto dropit;
1953 }
1954 }
1955
1956 status = OvsExecuteSetAction(&ovsFwdCtx, key, hash,
d838e577
AS
1957 (const PNL_ATTR)NlAttrGet
1958 ((const PNL_ATTR)a));
c803536e
SS
1959 if (status != NDIS_STATUS_SUCCESS) {
1960 dropReason = L"OVS-set action failed";
1961 goto dropit;
1962 }
1963 break;
1964 }
1965 case OVS_ACTION_ATTR_SAMPLE:
9d36ca82
SV
1966 {
1967 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1968 || ovsFwdCtx.tunnelRxNic != NULL) {
1969 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1970 if (status != NDIS_STATUS_SUCCESS) {
1971 dropReason = L"OVS-adding destination failed";
1972 goto dropit;
1973 }
1974 }
1975
1976 status = OvsExecuteSampleAction(&ovsFwdCtx, key,
1977 (const PNL_ATTR)a);
1978 if (status != NDIS_STATUS_SUCCESS) {
1979 dropReason = L"OVS-sample action failed";
1980 goto dropit;
1981 }
1982 break;
1983 }
c803536e 1984 default:
7c5d9f17 1985 status = NDIS_STATUS_NOT_SUPPORTED;
c803536e
SS
1986 break;
1987 }
1988 }
1989
1990 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1991 || ovsFwdCtx.tunnelRxNic != NULL) {
1992 status = OvsOutputForwardingCtx(&ovsFwdCtx);
1993 ASSERT(ovsFwdCtx.curNbl == NULL);
1994 }
1995
1996 ASSERT(ovsFwdCtx.destPortsSizeOut == 0);
1997 ASSERT(ovsFwdCtx.tunnelRxNic == NULL);
1998 ASSERT(ovsFwdCtx.tunnelTxNic == NULL);
1999
2000dropit:
2001 /*
2002 * If curNbl != NULL, it implies the NBL has not been not freed up so far.
2003 */
2004 if (ovsFwdCtx.curNbl) {
2005 OvsCompleteNBLForwardingCtx(&ovsFwdCtx, dropReason);
2006 }
2007
ee25964a
SV
2008exit:
2009 return status;
2010}
2011
2012/*
2013 * --------------------------------------------------------------------------
2014 * OvsActionsExecute --
2015 * The function interprets and executes the specified 'actions' on the
2016 * specified packet 'curNbl'. See 'OvsDoExecuteActions' description for
2017 * more details.
2018 *
2019 * Also executes deferred actions added by recirculation or sample
2020 * actions.
2021 * --------------------------------------------------------------------------
2022 */
2023NDIS_STATUS
2024OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext,
2025 OvsCompletionList *completionList,
2026 PNET_BUFFER_LIST curNbl,
2027 UINT32 portNo,
2028 ULONG sendFlags,
2029 OvsFlowKey *key,
2030 UINT64 *hash,
2031 OVS_PACKET_HDR_INFO *layers,
2032 const PNL_ATTR actions,
2033 INT actionsLen)
2034{
5278f698 2035 NDIS_STATUS status;
ee25964a
SV
2036
2037 status = OvsDoExecuteActions(switchContext, completionList, curNbl,
2038 portNo, sendFlags, key, hash, layers,
2039 actions, actionsLen);
2040
2041 if (status == STATUS_SUCCESS) {
2042 status = OvsProcessDeferredActions(switchContext, completionList,
2043 portNo, sendFlags, layers);
2044 }
2045
2046 return status;
2047}
2048
2049/*
2050 * --------------------------------------------------------------------------
2051 * OvsDoRecirc --
2052 * The function processes the packet 'curNbl' that re-entered datapath
2053 * packet processing after a recirculation action.
2054 * --------------------------------------------------------------------------
2055 */
2056NDIS_STATUS
2057OvsDoRecirc(POVS_SWITCH_CONTEXT switchContext,
2058 OvsCompletionList *completionList,
2059 PNET_BUFFER_LIST curNbl,
2060 OvsFlowKey *key,
2061 UINT32 srcPortNo,
2062 OVS_PACKET_HDR_INFO *layers)
2063{
5278f698
PB
2064 NDIS_STATUS status;
2065 OvsFlow *flow;
ee25964a
SV
2066 OvsForwardingContext ovsFwdCtx = { 0 };
2067 UINT64 hash = 0;
2068 ASSERT(layers);
2069
2070 OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl,
2071 srcPortNo, 0,
2072 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl),
2073 completionList, layers, TRUE);
2074
ee25964a
SV
2075 flow = OvsLookupFlow(&ovsFwdCtx.switchContext->datapath, key, &hash, FALSE);
2076 if (flow) {
2077 UINT32 level = OvsDeferredActionsLevelGet();
2078
2079 if (level > DEFERRED_ACTION_EXEC_LEVEL) {
2080 OvsCompleteNBLForwardingCtx(&ovsFwdCtx,
2081 L"OVS-Dropped due to deferred actions execution level limit \
2082 reached");
2083 ovsActionStats.deferredActionsExecLimit++;
2084 ovsFwdCtx.curNbl = NULL;
2085 return NDIS_STATUS_FAILURE;
2086 }
2087
2088 OvsFlowUsed(flow, ovsFwdCtx.curNbl, &ovsFwdCtx.layers);
2089 ovsFwdCtx.switchContext->datapath.hits++;
2090
2091 OvsDeferredActionsLevelInc();
2092
2093 status = OvsDoExecuteActions(ovsFwdCtx.switchContext,
2094 ovsFwdCtx.completionList,
2095 ovsFwdCtx.curNbl,
2096 ovsFwdCtx.srcVportNo,
2097 ovsFwdCtx.sendFlags,
2098 key, &hash, &ovsFwdCtx.layers,
2099 flow->actions, flow->actionsLen);
2100 ovsFwdCtx.curNbl = NULL;
2101
2102 OvsDeferredActionsLevelDec();
2103 } else {
2104 POVS_VPORT_ENTRY vport = NULL;
2105 LIST_ENTRY missedPackets;
2106 UINT32 num = 0;
2107
2108 ovsFwdCtx.switchContext->datapath.misses++;
2109 InitializeListHead(&missedPackets);
2110 vport = OvsFindVportByPortNo(switchContext, srcPortNo);
2111 if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
2112 OvsCompleteNBLForwardingCtx(&ovsFwdCtx,
2113 L"OVS-Dropped due to port removal");
2114 ovsActionStats.noVport++;
2115 return NDIS_STATUS_SUCCESS;
2116 }
2117 status = OvsCreateAndAddPackets(NULL, 0, OVS_PACKET_CMD_MISS,
2118 vport, key, ovsFwdCtx.curNbl,
e97d67f4 2119 vport->portId ==
ee25964a
SV
2120 switchContext->virtualExternalPortId,
2121 &ovsFwdCtx.layers,
2122 ovsFwdCtx.switchContext,
2123 &missedPackets, &num);
2124 if (num) {
2125 OvsQueuePackets(&missedPackets, num);
2126 }
2127 if (status == NDIS_STATUS_SUCCESS) {
2128 /* Complete the packet since it was copied to user buffer. */
2129 OvsCompleteNBLForwardingCtx(&ovsFwdCtx,
2130 L"OVS-Dropped since packet was copied to userspace");
2131 ovsActionStats.flowMiss++;
2132 } else {
2133 OvsCompleteNBLForwardingCtx(&ovsFwdCtx,
2134 L"OVS-Dropped due to failure to queue to userspace");
2135 ovsActionStats.failedFlowMiss++;
2136 status = NDIS_STATUS_FAILURE;
2137 }
2138 }
2139
c803536e
SS
2140 return status;
2141}