]> git.proxmox.com Git - mirror_ovs.git/blame - datapath-windows/ovsext/Actions.c
datapath-windows: Clean up properly in case of driver init failure.
[mirror_ovs.git] / datapath-windows / ovsext / Actions.c
CommitLineData
c803536e
SS
1/*
2 * Copyright (c) 2014 VMware, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "precomp.h"
18
fa1324c9
SG
19#include "Switch.h"
20#include "Vport.h"
21#include "Event.h"
22#include "User.h"
23#include "NetProto.h"
24#include "Flow.h"
25#include "Vxlan.h"
022c2040 26#include "Stt.h"
fa1324c9
SG
27#include "Checksum.h"
28#include "PacketIO.h"
c803536e 29
c803536e
SS
30#ifdef OVS_DBG_MOD
31#undef OVS_DBG_MOD
32#endif
33#define OVS_DBG_MOD OVS_DBG_ACTION
fa1324c9 34#include "Debug.h"
c803536e
SS
35
36typedef struct _OVS_ACTION_STATS {
37 UINT64 rxVxlan;
38 UINT64 txVxlan;
022c2040
EE
39 UINT64 rxStt;
40 UINT64 txStt;
c803536e
SS
41 UINT64 flowMiss;
42 UINT64 flowUserspace;
43 UINT64 txTcp;
44 UINT32 failedFlowMiss;
45 UINT32 noVport;
46 UINT32 failedFlowExtract;
47 UINT32 noResource;
48 UINT32 noCopiedNbl;
49 UINT32 failedEncap;
50 UINT32 failedDecap;
51 UINT32 cannotGrowDest;
52 UINT32 zeroActionLen;
53 UINT32 failedChecksum;
54} OVS_ACTION_STATS, *POVS_ACTION_STATS;
55
56OVS_ACTION_STATS ovsActionStats;
57
58/*
59 * There a lot of data that needs to be maintained while executing the pipeline
60 * as dictated by the actions of a flow, across different functions at different
61 * levels. Such data is put together in a 'context' structure. Care should be
62 * exercised while adding new members to the structure - only add ones that get
63 * used across multiple stages in the pipeline/get used in multiple functions.
64 */
65#define OVS_DEST_PORTS_ARRAY_MIN_SIZE 2
66typedef struct OvsForwardingContext {
67 POVS_SWITCH_CONTEXT switchContext;
68 /* The NBL currently used in the pipeline. */
69 PNET_BUFFER_LIST curNbl;
70 /* NDIS forwarding detail for 'curNbl'. */
71 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
72 /* Array of destination ports for 'curNbl'. */
73 PNDIS_SWITCH_FORWARDING_DESTINATION_ARRAY destinationPorts;
74 /* send flags while sending 'curNbl' into NDIS. */
75 ULONG sendFlags;
76 /* Total number of output ports, used + unused, in 'curNbl'. */
77 UINT32 destPortsSizeIn;
78 /* Total number of used output ports in 'curNbl'. */
79 UINT32 destPortsSizeOut;
80 /*
81 * If 'curNbl' is not owned by OVS, they need to be tracked, if they need to
82 * be freed/completed.
83 */
84 OvsCompletionList *completionList;
85 /*
86 * vport number of 'curNbl' when it is passed from the PIF bridge to the INT
87 * bridge. ie. during tunneling on the Rx side.
88 */
89 UINT32 srcVportNo;
90
91 /*
92 * Tunnel key:
93 * - specified in actions during tunneling Tx
94 * - extracted from an NBL during tunneling Rx
95 */
96 OvsIPv4TunnelKey tunKey;
97
98 /*
99 * Tunneling - Tx:
100 * To store the output port, when it is a tunneled port. We don't foresee
101 * multiple tunneled ports as outport for any given NBL.
102 */
103 POVS_VPORT_ENTRY tunnelTxNic;
104
105 /*
106 * Tunneling - Rx:
107 * Points to the Internal port on the PIF Bridge, if the packet needs to be
108 * de-tunneled.
109 */
110 POVS_VPORT_ENTRY tunnelRxNic;
111
112 /* header information */
113 OVS_PACKET_HDR_INFO layers;
114} OvsForwardingContext;
115
116
117/*
118 * --------------------------------------------------------------------------
119 * OvsInitForwardingCtx --
120 * Function to init/re-init the 'ovsFwdCtx' context as the actions pipeline
121 * is being executed.
122 *
123 * Result:
124 * NDIS_STATUS_SUCCESS on success
125 * Other NDIS_STATUS upon failure. Upon failure, it is safe to call
126 * OvsCompleteNBLForwardingCtx(), since 'ovsFwdCtx' has been initialized
127 * enough for OvsCompleteNBLForwardingCtx() to do its work.
128 * --------------------------------------------------------------------------
129 */
130static __inline NDIS_STATUS
131OvsInitForwardingCtx(OvsForwardingContext *ovsFwdCtx,
132 POVS_SWITCH_CONTEXT switchContext,
133 PNET_BUFFER_LIST curNbl,
134 UINT32 srcVportNo,
135 ULONG sendFlags,
136 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail,
137 OvsCompletionList *completionList,
138 OVS_PACKET_HDR_INFO *layers,
139 BOOLEAN resetTunnelInfo)
140{
141 ASSERT(ovsFwdCtx);
142 ASSERT(switchContext);
143 ASSERT(curNbl);
144 ASSERT(fwdDetail);
145
146 /*
147 * Set values for curNbl and switchContext so upon failures, we have enough
148 * information to do cleanup.
149 */
150 ovsFwdCtx->curNbl = curNbl;
151 ovsFwdCtx->switchContext = switchContext;
152 ovsFwdCtx->completionList = completionList;
153 ovsFwdCtx->fwdDetail = fwdDetail;
154
155 if (fwdDetail->NumAvailableDestinations > 0) {
156 /*
157 * XXX: even though MSDN says GetNetBufferListDestinations() returns
158 * NDIS_STATUS, the header files say otherwise.
159 */
160 switchContext->NdisSwitchHandlers.GetNetBufferListDestinations(
161 switchContext->NdisSwitchContext, curNbl,
162 &ovsFwdCtx->destinationPorts);
163
164 ASSERT(ovsFwdCtx->destinationPorts);
165 /* Ensure that none of the elements are consumed yet. */
166 ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
167 fwdDetail->NumAvailableDestinations);
168 } else {
169 ovsFwdCtx->destinationPorts = NULL;
170 }
171 ovsFwdCtx->destPortsSizeIn = fwdDetail->NumAvailableDestinations;
172 ovsFwdCtx->destPortsSizeOut = 0;
173 ovsFwdCtx->srcVportNo = srcVportNo;
174 ovsFwdCtx->sendFlags = sendFlags;
175 if (layers) {
176 ovsFwdCtx->layers = *layers;
177 } else {
178 RtlZeroMemory(&ovsFwdCtx->layers, sizeof ovsFwdCtx->layers);
179 }
180 if (resetTunnelInfo) {
181 ovsFwdCtx->tunnelTxNic = NULL;
182 ovsFwdCtx->tunnelRxNic = NULL;
183 RtlZeroMemory(&ovsFwdCtx->tunKey, sizeof ovsFwdCtx->tunKey);
184 }
185
186 return NDIS_STATUS_SUCCESS;
187}
188
189/*
190 * --------------------------------------------------------------------------
191 * OvsDetectTunnelRxPkt --
192 * Utility function for an RX packet to detect its tunnel type.
193 *
194 * Result:
195 * True - if the tunnel type was detected.
196 * False - if not a tunnel packet or tunnel type not supported.
197 * --------------------------------------------------------------------------
198 */
199static __inline BOOLEAN
200OvsDetectTunnelRxPkt(OvsForwardingContext *ovsFwdCtx,
201 const OvsFlowKey *flowKey)
202{
203 POVS_VPORT_ENTRY tunnelVport = NULL;
204
205 /* XXX: we should also check for the length of the UDP payload to pick
206 * packets only if they are at least VXLAN header size.
207 */
208 if (!flowKey->ipKey.nwFrag &&
ffde5f8f 209 flowKey->ipKey.nwProto == IPPROTO_UDP) {
0b623ad5 210 UINT16 dstPort = ntohs(flowKey->ipKey.l4.tpDst);
ffde5f8f 211 tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext,
022c2040
EE
212 dstPort,
213 OVS_VPORT_TYPE_VXLAN);
214 if (tunnelVport) {
215 ovsActionStats.rxVxlan++;
216 }
217 } else if (!flowKey->ipKey.nwFrag &&
218 flowKey->ipKey.nwProto == IPPROTO_TCP) {
219 UINT16 dstPort = htons(flowKey->ipKey.l4.tpDst);
220 tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext,
221 dstPort,
222 OVS_VPORT_TYPE_STT);
223 if (tunnelVport) {
224 ovsActionStats.rxStt++;
225 }
c803536e
SS
226 }
227
022c2040 228
c803536e
SS
229 // We might get tunnel packets even before the tunnel gets initialized.
230 if (tunnelVport) {
231 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
232 ovsFwdCtx->tunnelRxNic = tunnelVport;
233 return TRUE;
234 }
235
236 return FALSE;
237}
238
239/*
240 * --------------------------------------------------------------------------
241 * OvsDetectTunnelPkt --
b2d9d3e8
NR
242 * Utility function to detect if a packet is to be subjected to
243 * tunneling (Tx) or de-tunneling (Rx). Various factors such as source
244 * port, destination port, packet contents, and previously setup tunnel
245 * context are used.
c803536e
SS
246 *
247 * Result:
b2d9d3e8
NR
248 * True - If the packet is to be subjected to tunneling.
249 * In case of invalid tunnel context, the tunneling functionality is
250 * a no-op and is completed within this function itself by consuming
251 * all of the tunneling context.
252 * False - If not a tunnel packet or tunnel type not supported. Caller should
253 * process the packet as a non-tunnel packet.
c803536e
SS
254 * --------------------------------------------------------------------------
255 */
256static __inline BOOLEAN
257OvsDetectTunnelPkt(OvsForwardingContext *ovsFwdCtx,
258 const POVS_VPORT_ENTRY dstVport,
259 const OvsFlowKey *flowKey)
260{
c803536e 261 if (OvsIsInternalVportType(dstVport->ovsType)) {
b2d9d3e8
NR
262 /*
263 * Rx:
264 * The source of NBL during tunneling Rx could be the external
265 * port or if it is being executed from userspace, the source port is
266 * default port.
267 */
7434992b
NR
268 BOOLEAN validSrcPort =
269 (ovsFwdCtx->fwdDetail->SourcePortId ==
270 ovsFwdCtx->switchContext->virtualExternalPortId) ||
271 (ovsFwdCtx->fwdDetail->SourcePortId ==
272 NDIS_SWITCH_DEFAULT_PORT_ID);
c803536e
SS
273
274 if (validSrcPort && OvsDetectTunnelRxPkt(ovsFwdCtx, flowKey)) {
275 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
276 ASSERT(ovsFwdCtx->tunnelRxNic != NULL);
277 return TRUE;
278 }
279 } else if (OvsIsTunnelVportType(dstVport->ovsType)) {
280 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
281 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
b2d9d3e8
NR
282
283 /*
284 * Tx:
285 * The destination port is a tunnel port. Encapsulation must be
30bc8153
NR
286 * performed only on packets that originate from:
287 * - a VIF port
288 * - a bridge-internal port (packets generated from userspace)
289 * - no port.
b2d9d3e8
NR
290 *
291 * If the packet will not be encapsulated, consume the tunnel context
292 * by clearing it.
293 */
429d4556
AS
294 if (ovsFwdCtx->srcVportNo != OVS_DEFAULT_PORT_NO) {
295
296 POVS_VPORT_ENTRY vport = OvsFindVportByPortNo(
297 ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo);
298
30bc8153
NR
299 if (!vport ||
300 (vport->ovsType != OVS_VPORT_TYPE_NETDEV &&
301 !OvsIsBridgeInternalVport(vport))) {
429d4556
AS
302 ovsFwdCtx->tunKey.dst = 0;
303 }
b2d9d3e8
NR
304 }
305
306 /* Tunnel the packet only if tunnel context is set. */
307 if (ovsFwdCtx->tunKey.dst != 0) {
022c2040
EE
308 switch(dstVport->ovsType) {
309 case OVS_VPORT_TYPE_VXLAN:
310 ovsActionStats.txVxlan++;
311 break;
312 case OVS_VPORT_TYPE_STT:
313 ovsActionStats.txStt++;
314 break;
315 }
b2d9d3e8
NR
316 ovsFwdCtx->tunnelTxNic = dstVport;
317 }
318
c803536e
SS
319 return TRUE;
320 }
321
322 return FALSE;
323}
324
325
326/*
327 * --------------------------------------------------------------------------
328 * OvsAddPorts --
329 * Add the specified destination vport into the forwarding context. If the
330 * vport is a VIF/external port, it is added directly to the NBL. If it is
331 * a tunneling port, it is NOT added to the NBL.
332 *
333 * Result:
334 * NDIS_STATUS_SUCCESS on success
335 * Other NDIS_STATUS upon failure.
336 * --------------------------------------------------------------------------
337 */
338static __inline NDIS_STATUS
339OvsAddPorts(OvsForwardingContext *ovsFwdCtx,
340 OvsFlowKey *flowKey,
341 NDIS_SWITCH_PORT_ID dstPortId,
342 BOOLEAN preserveVLAN,
343 BOOLEAN preservePriority)
344{
345 POVS_VPORT_ENTRY vport;
346 PNDIS_SWITCH_PORT_DESTINATION fwdPort;
347 NDIS_STATUS status;
348 POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext;
349
350 /*
351 * We hold the dispatch lock that protects the list of vports, so vports
352 * validated here can be added as destinations safely before we call into
353 * NDIS.
354 *
355 * Some of the vports can be tunnelled ports as well in which case
356 * they should be added to a separate list of tunnelled destination ports
357 * instead of the VIF ports. The context for the tunnel is settable
358 * in OvsForwardingContext.
359 */
360 vport = OvsFindVportByPortNo(ovsFwdCtx->switchContext, dstPortId);
361 if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
362 /*
363 * There may be some latency between a port disappearing, and userspace
364 * updating the recalculated flows. In the meantime, handle invalid
365 * ports gracefully.
366 */
367 ovsActionStats.noVport++;
368 return NDIS_STATUS_SUCCESS;
369 }
370 ASSERT(vport->nicState == NdisSwitchNicStateConnected);
371 vport->stats.txPackets++;
372 vport->stats.txBytes +=
373 NET_BUFFER_DATA_LENGTH(NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl));
374
7434992b
NR
375 if (OvsIsBridgeInternalVport(vport)) {
376 return NDIS_STATUS_SUCCESS;
377 }
378
c803536e 379 if (OvsDetectTunnelPkt(ovsFwdCtx, vport, flowKey)) {
c803536e
SS
380 return NDIS_STATUS_SUCCESS;
381 }
382
383 if (ovsFwdCtx->destPortsSizeOut == ovsFwdCtx->destPortsSizeIn) {
384 if (ovsFwdCtx->destPortsSizeIn == 0) {
385 ASSERT(ovsFwdCtx->destinationPorts == NULL);
386 ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0);
387 status =
388 switchContext->NdisSwitchHandlers.GrowNetBufferListDestinations(
389 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
390 OVS_DEST_PORTS_ARRAY_MIN_SIZE,
391 &ovsFwdCtx->destinationPorts);
392 if (status != NDIS_STATUS_SUCCESS) {
393 ovsActionStats.cannotGrowDest++;
394 return status;
395 }
396 ovsFwdCtx->destPortsSizeIn =
397 ovsFwdCtx->fwdDetail->NumAvailableDestinations;
398 ASSERT(ovsFwdCtx->destinationPorts);
399 } else {
400 ASSERT(ovsFwdCtx->destinationPorts != NULL);
401 /*
402 * NumElements:
403 * A ULONG value that specifies the total number of
404 * NDIS_SWITCH_PORT_DESTINATION elements in the
405 * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure.
406 *
407 * NumDestinations:
408 * A ULONG value that specifies the number of
409 * NDIS_SWITCH_PORT_DESTINATION elements in the
410 * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure that
411 * specify port destinations.
412 *
413 * NumAvailableDestinations:
414 * A value that specifies the number of unused extensible switch
415 * destination ports elements within an NET_BUFFER_LIST structure.
416 */
417 ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
418 ovsFwdCtx->destPortsSizeIn);
419 ASSERT(ovsFwdCtx->destinationPorts->NumDestinations ==
420 ovsFwdCtx->destPortsSizeOut -
421 ovsFwdCtx->fwdDetail->NumAvailableDestinations);
422 ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations > 0);
423 /*
424 * Before we grow the array of destination ports, the current set
425 * of ports needs to be committed. Only the ports added since the
426 * last commit need to be part of the new update.
427 */
428 status = switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations(
429 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
430 ovsFwdCtx->fwdDetail->NumAvailableDestinations,
431 ovsFwdCtx->destinationPorts);
432 if (status != NDIS_STATUS_SUCCESS) {
433 ovsActionStats.cannotGrowDest++;
434 return status;
435 }
436 ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
437 ovsFwdCtx->destPortsSizeIn);
438 ASSERT(ovsFwdCtx->destinationPorts->NumDestinations ==
439 ovsFwdCtx->destPortsSizeOut);
440 ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0);
441
442 status = switchContext->NdisSwitchHandlers.GrowNetBufferListDestinations(
443 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
444 ovsFwdCtx->destPortsSizeIn, &ovsFwdCtx->destinationPorts);
445 if (status != NDIS_STATUS_SUCCESS) {
446 ovsActionStats.cannotGrowDest++;
447 return status;
448 }
449 ASSERT(ovsFwdCtx->destinationPorts != NULL);
450 ovsFwdCtx->destPortsSizeIn <<= 1;
451 }
452 }
453
454 ASSERT(ovsFwdCtx->destPortsSizeOut < ovsFwdCtx->destPortsSizeIn);
455 fwdPort =
456 NDIS_SWITCH_PORT_DESTINATION_AT_ARRAY_INDEX(ovsFwdCtx->destinationPorts,
457 ovsFwdCtx->destPortsSizeOut);
458
459 fwdPort->PortId = vport->portId;
460 fwdPort->NicIndex = vport->nicIndex;
461 fwdPort->IsExcluded = 0;
462 fwdPort->PreserveVLAN = preserveVLAN;
463 fwdPort->PreservePriority = preservePriority;
464 ovsFwdCtx->destPortsSizeOut += 1;
465
466 return NDIS_STATUS_SUCCESS;
467}
468
469
470/*
471 * --------------------------------------------------------------------------
472 * OvsClearTunTxCtx --
473 * Utility function to clear tx tunneling context.
474 * --------------------------------------------------------------------------
475 */
476static __inline VOID
477OvsClearTunTxCtx(OvsForwardingContext *ovsFwdCtx)
478{
479 ovsFwdCtx->tunnelTxNic = NULL;
480 ovsFwdCtx->tunKey.dst = 0;
481}
482
483
484/*
485 * --------------------------------------------------------------------------
486 * OvsClearTunRxCtx --
487 * Utility function to clear rx tunneling context.
488 * --------------------------------------------------------------------------
489 */
490static __inline VOID
491OvsClearTunRxCtx(OvsForwardingContext *ovsFwdCtx)
492{
493 ovsFwdCtx->tunnelRxNic = NULL;
494 ovsFwdCtx->tunKey.dst = 0;
495}
496
497
498/*
499 * --------------------------------------------------------------------------
500 * OvsCompleteNBLForwardingCtx --
501 * This utility function is responsible for freeing/completing an NBL - either
502 * by adding it to a completion list or by freeing it.
503 *
504 * Side effects:
505 * It also resets the necessary fields in 'ovsFwdCtx'.
506 * --------------------------------------------------------------------------
507 */
508static __inline VOID
509OvsCompleteNBLForwardingCtx(OvsForwardingContext *ovsFwdCtx,
510 PCWSTR dropReason)
511{
512 NDIS_STRING filterReason;
513
514 RtlInitUnicodeString(&filterReason, dropReason);
515 if (ovsFwdCtx->completionList) {
516 OvsAddPktCompletionList(ovsFwdCtx->completionList, TRUE,
517 ovsFwdCtx->fwdDetail->SourcePortId, ovsFwdCtx->curNbl, 1,
518 &filterReason);
519 ovsFwdCtx->curNbl = NULL;
520 } else {
521 /* If there is no completionList, we assume this is ovs created NBL */
522 ovsFwdCtx->curNbl = OvsCompleteNBL(ovsFwdCtx->switchContext,
523 ovsFwdCtx->curNbl, TRUE);
524 ASSERT(ovsFwdCtx->curNbl == NULL);
525 }
526 /* XXX: these can be made debug only to save cycles. Ideally the pipeline
527 * using these fields should reset the values at the end of the pipeline. */
528 ovsFwdCtx->destPortsSizeOut = 0;
529 ovsFwdCtx->tunnelTxNic = NULL;
530 ovsFwdCtx->tunnelRxNic = NULL;
531}
532
533/*
534 * --------------------------------------------------------------------------
535 * OvsDoFlowLookupOutput --
536 * Function to be used for the second stage of a tunneling workflow, ie.:
537 * - On the encapsulated packet on Tx path, to do a flow extract, flow
538 * lookup and excuting the actions.
539 * - On the decapsulated packet on Rx path, to do a flow extract, flow
540 * lookup and excuting the actions.
541 *
542 * XXX: It is assumed that the NBL in 'ovsFwdCtx' is owned by OVS. This is
543 * until the new buffer management framework is adopted.
544 *
545 * Side effects:
546 * The NBL in 'ovsFwdCtx' is consumed.
547 * --------------------------------------------------------------------------
548 */
549static __inline NDIS_STATUS
550OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx)
551{
552 OvsFlowKey key;
553 OvsFlow *flow;
554 UINT64 hash;
555 NDIS_STATUS status;
556 POVS_VPORT_ENTRY vport =
557 OvsFindVportByPortNo(ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo);
558 if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
559 ASSERT(FALSE); // XXX: let's catch this for now
560 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
561 L"OVS-Dropped due to internal/tunnel port removal");
562 ovsActionStats.noVport++;
563 return NDIS_STATUS_SUCCESS;
564 }
565 ASSERT(vport->nicState == NdisSwitchNicStateConnected);
566
567 /* Assert that in the Rx direction, key is always setup. */
568 ASSERT(ovsFwdCtx->tunnelRxNic == NULL || ovsFwdCtx->tunKey.dst != 0);
569 status = OvsExtractFlow(ovsFwdCtx->curNbl, ovsFwdCtx->srcVportNo,
570 &key, &ovsFwdCtx->layers, ovsFwdCtx->tunKey.dst != 0 ?
571 &ovsFwdCtx->tunKey : NULL);
572 if (status != NDIS_STATUS_SUCCESS) {
573 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
574 L"OVS-Flow extract failed");
575 ovsActionStats.failedFlowExtract++;
576 return status;
577 }
578
579 flow = OvsLookupFlow(&ovsFwdCtx->switchContext->datapath, &key, &hash, FALSE);
580 if (flow) {
581 OvsFlowUsed(flow, ovsFwdCtx->curNbl, &ovsFwdCtx->layers);
582 ovsFwdCtx->switchContext->datapath.hits++;
583 status = OvsActionsExecute(ovsFwdCtx->switchContext,
584 ovsFwdCtx->completionList, ovsFwdCtx->curNbl,
585 ovsFwdCtx->srcVportNo, ovsFwdCtx->sendFlags,
586 &key, &hash, &ovsFwdCtx->layers,
587 flow->actions, flow->actionsLen);
588 ovsFwdCtx->curNbl = NULL;
589 } else {
590 LIST_ENTRY missedPackets;
591 UINT32 num = 0;
592 ovsFwdCtx->switchContext->datapath.misses++;
593 InitializeListHead(&missedPackets);
640ebde7
EE
594 status = OvsCreateAndAddPackets(NULL, 0, OVS_PACKET_CMD_MISS,
595 ovsFwdCtx->srcVportNo,
596 &key,ovsFwdCtx->curNbl,
597 ovsFwdCtx->tunnelRxNic != NULL, &ovsFwdCtx->layers,
598 ovsFwdCtx->switchContext, &missedPackets, &num);
c803536e 599 if (num) {
4a3c9b70 600 OvsQueuePackets(&missedPackets, num);
c803536e
SS
601 }
602 if (status == NDIS_STATUS_SUCCESS) {
603 /* Complete the packet since it was copied to user buffer. */
604 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
605 L"OVS-Dropped since packet was copied to userspace");
606 ovsActionStats.flowMiss++;
607 status = NDIS_STATUS_SUCCESS;
608 } else {
609 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
610 L"OVS-Dropped due to failure to queue to userspace");
611 status = NDIS_STATUS_FAILURE;
612 ovsActionStats.failedFlowMiss++;
613 }
614 }
615
616 return status;
617}
618
619/*
620 * --------------------------------------------------------------------------
621 * OvsTunnelPortTx --
622 * The start function for Tx tunneling - encapsulates the packet, and
623 * outputs the packet on the PIF bridge.
624 *
625 * Side effects:
626 * The NBL in 'ovsFwdCtx' is consumed.
627 * --------------------------------------------------------------------------
628 */
629static __inline NDIS_STATUS
630OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx)
631{
632 NDIS_STATUS status = NDIS_STATUS_FAILURE;
633 PNET_BUFFER_LIST newNbl = NULL;
634
635 /*
636 * Setup the source port to be the internal port to as to facilitate the
637 * second OvsLookupFlow.
638 */
022c2040
EE
639 if (ovsFwdCtx->switchContext->internalVport == NULL ||
640 ovsFwdCtx->switchContext->virtualExternalVport == NULL) {
ad0d70d2
EE
641 OvsClearTunTxCtx(ovsFwdCtx);
642 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
022c2040 643 L"OVS-Dropped since either internal or external port is absent");
ad0d70d2
EE
644 return NDIS_STATUS_FAILURE;
645 }
c803536e
SS
646 ovsFwdCtx->srcVportNo =
647 ((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->portNo;
648
649 ovsFwdCtx->fwdDetail->SourcePortId = ovsFwdCtx->switchContext->internalPortId;
650 ovsFwdCtx->fwdDetail->SourceNicIndex =
651 ((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->nicIndex;
652
653 /* Do the encap. Encap function does not consume the NBL. */
654 switch(ovsFwdCtx->tunnelTxNic->ovsType) {
e00afcf6 655 case OVS_VPORT_TYPE_VXLAN:
0b623ad5
NR
656 status = OvsEncapVxlan(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl,
657 &ovsFwdCtx->tunKey, ovsFwdCtx->switchContext,
c803536e
SS
658 &ovsFwdCtx->layers, &newNbl);
659 break;
022c2040
EE
660 case OVS_VPORT_TYPE_STT:
661 status = OvsEncapStt(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl,
0b623ad5 662 &ovsFwdCtx->tunKey, ovsFwdCtx->switchContext,
022c2040
EE
663 &ovsFwdCtx->layers, &newNbl);
664 break;
c803536e
SS
665 default:
666 ASSERT(! "Tx: Unhandled tunnel type");
667 }
668
669 /* Reset the tunnel context so that it doesn't get used after this point. */
670 OvsClearTunTxCtx(ovsFwdCtx);
671
672 if (status == NDIS_STATUS_SUCCESS) {
673 ASSERT(newNbl);
674 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
675 L"Complete after cloning NBL for encapsulation");
676 ovsFwdCtx->curNbl = newNbl;
677 status = OvsDoFlowLookupOutput(ovsFwdCtx);
678 ASSERT(ovsFwdCtx->curNbl == NULL);
679 } else {
680 /*
681 * XXX: Temporary freeing of the packet until we register a
682 * callback to IP helper.
683 */
684 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
685 L"OVS-Dropped due to encap failure");
686 ovsActionStats.failedEncap++;
687 status = NDIS_STATUS_SUCCESS;
688 }
689
690 return status;
691}
692
693/*
694 * --------------------------------------------------------------------------
695 * OvsTunnelPortRx --
696 * Decapsulate the incoming NBL based on the tunnel type and goes through
697 * the flow lookup for the inner packet.
698 *
699 * Note: IP checksum is validate here, but L4 checksum validation needs
700 * to be done by the corresponding tunnel types.
701 *
702 * Side effects:
703 * The NBL in 'ovsFwdCtx' is consumed.
704 * --------------------------------------------------------------------------
705 */
706static __inline NDIS_STATUS
707OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx)
708{
709 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
710 PNET_BUFFER_LIST newNbl = NULL;
711 POVS_VPORT_ENTRY tunnelRxVport = ovsFwdCtx->tunnelRxNic;
712
713 if (OvsValidateIPChecksum(ovsFwdCtx->curNbl, &ovsFwdCtx->layers)
714 != NDIS_STATUS_SUCCESS) {
715 ovsActionStats.failedChecksum++;
716 OVS_LOG_INFO("Packet dropped due to IP checksum failure.");
717 goto dropNbl;
718 }
719
022c2040
EE
720 /*
721 * Decap port functions should return a new NBL if it was copied, and
722 * this new NBL should be setup as the ovsFwdCtx->curNbl.
723 */
724
c803536e 725 switch(tunnelRxVport->ovsType) {
e00afcf6 726 case OVS_VPORT_TYPE_VXLAN:
022c2040
EE
727 status = OvsDecapVxlan(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
728 &ovsFwdCtx->tunKey, &newNbl);
729 break;
730 case OVS_VPORT_TYPE_STT:
731 status = OvsDecapStt(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
732 &ovsFwdCtx->tunKey, &newNbl);
c803536e
SS
733 break;
734 default:
735 OVS_LOG_ERROR("Rx: Unhandled tunnel type: %d\n",
736 tunnelRxVport->ovsType);
737 ASSERT(! "Rx: Unhandled tunnel type");
738 status = NDIS_STATUS_NOT_SUPPORTED;
739 }
740
741 if (status != NDIS_STATUS_SUCCESS) {
742 ovsActionStats.failedDecap++;
743 goto dropNbl;
744 }
745
746 /*
747 * tunnelRxNic and other fields will be cleared, re-init the context
748 * before usage.
749 */
750 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
751 L"OVS-dropped due to new decap packet");
752
753 /* Decapsulated packet is in a new NBL */
754 ovsFwdCtx->tunnelRxNic = tunnelRxVport;
755 OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
756 newNbl, tunnelRxVport->portNo, 0,
757 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
758 ovsFwdCtx->completionList,
759 &ovsFwdCtx->layers, FALSE);
760
761 /*
762 * Set the NBL's SourcePortId and SourceNicIndex to default values to
763 * keep NDIS happy when we forward the packet.
764 */
765 ovsFwdCtx->fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
766 ovsFwdCtx->fwdDetail->SourceNicIndex = 0;
767
768 status = OvsDoFlowLookupOutput(ovsFwdCtx);
769 ASSERT(ovsFwdCtx->curNbl == NULL);
770 OvsClearTunRxCtx(ovsFwdCtx);
771
772 return status;
773
774dropNbl:
775 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
776 L"OVS-dropped due to decap failure");
777 OvsClearTunRxCtx(ovsFwdCtx);
778 return status;
779}
780
781
782/*
783 * --------------------------------------------------------------------------
784 * OvsOutputForwardingCtx --
785 * This function outputs an NBL to NDIS or to a tunneling pipeline based on
786 * the ports added so far into 'ovsFwdCtx'.
787 *
788 * Side effects:
789 * This function consumes the NBL - either by forwarding it successfully to
790 * NDIS, or adding it to the completion list in 'ovsFwdCtx', or freeing it.
791 *
792 * Also makes sure that the list of destination ports - tunnel or otherwise is
793 * drained.
794 * --------------------------------------------------------------------------
795 */
796static __inline NDIS_STATUS
797OvsOutputForwardingCtx(OvsForwardingContext *ovsFwdCtx)
798{
799 NDIS_STATUS status = STATUS_SUCCESS;
800 POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext;
eda457f1 801 PCWSTR dropReason;
c803536e
SS
802
803 /*
804 * Handle the case where the some of the destination ports are tunneled
805 * ports - the non-tunneled ports get a unmodified copy of the NBL, and the
806 * tunneling pipeline starts when we output the packet to tunneled port.
807 */
808 if (ovsFwdCtx->destPortsSizeOut > 0) {
809 PNET_BUFFER_LIST newNbl = NULL;
810 PNET_BUFFER nb;
811 UINT32 portsToUpdate =
812 ovsFwdCtx->fwdDetail->NumAvailableDestinations -
813 (ovsFwdCtx->destPortsSizeIn - ovsFwdCtx->destPortsSizeOut);
814
815 ASSERT(ovsFwdCtx->destinationPorts != NULL);
816
817 /*
818 * Create a copy of the packet in order to do encap on it later. Also,
819 * don't copy the offload context since the encap'd packet has a
820 * different set of headers. This will change when we implement offloads
821 * before doing encapsulation.
822 */
823 if (ovsFwdCtx->tunnelTxNic != NULL || ovsFwdCtx->tunnelRxNic != NULL) {
824 nb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
825 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
826 0, 0, TRUE /*copy NBL info*/);
827 if (newNbl == NULL) {
828 status = NDIS_STATUS_RESOURCES;
829 ovsActionStats.noCopiedNbl++;
eda457f1 830 dropReason = L"Dropped due to failure to create NBL copy.";
c803536e
SS
831 goto dropit;
832 }
833 }
834
835 /* It does not seem like we'll get here unless 'portsToUpdate' > 0. */
836 ASSERT(portsToUpdate > 0);
837 status = switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations(
838 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
839 portsToUpdate, ovsFwdCtx->destinationPorts);
840 if (status != NDIS_STATUS_SUCCESS) {
841 OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE);
842 ovsActionStats.cannotGrowDest++;
eda457f1 843 dropReason = L"Dropped due to failure to update destinations.";
c803536e
SS
844 goto dropit;
845 }
846
847 OvsSendNBLIngress(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
848 ovsFwdCtx->sendFlags);
849 /* End this pipeline by resetting the corresponding context. */
850 ovsFwdCtx->destPortsSizeOut = 0;
851 ovsFwdCtx->curNbl = NULL;
852 if (newNbl) {
853 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
854 newNbl, ovsFwdCtx->srcVportNo, 0,
855 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
856 ovsFwdCtx->completionList,
857 &ovsFwdCtx->layers, FALSE);
858 if (status != NDIS_STATUS_SUCCESS) {
eda457f1 859 dropReason = L"Dropped due to resouces.";
c803536e
SS
860 goto dropit;
861 }
862 }
863 }
864
865 if (ovsFwdCtx->tunnelTxNic != NULL) {
866 status = OvsTunnelPortTx(ovsFwdCtx);
867 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
868 ASSERT(ovsFwdCtx->tunKey.dst == 0);
869 } else if (ovsFwdCtx->tunnelRxNic != NULL) {
870 status = OvsTunnelPortRx(ovsFwdCtx);
871 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
872 ASSERT(ovsFwdCtx->tunKey.dst == 0);
873 }
874 ASSERT(ovsFwdCtx->curNbl == NULL);
875
876 return status;
877
878dropit:
879 if (status != NDIS_STATUS_SUCCESS) {
eda457f1 880 OvsCompleteNBLForwardingCtx(ovsFwdCtx, dropReason);
c803536e
SS
881 }
882
883 return status;
884}
885
886
887/*
888 * --------------------------------------------------------------------------
889 * OvsLookupFlowOutput --
890 * Utility function for external callers to do flow extract, lookup,
891 * actions execute on a given NBL.
892 *
893 * Note: If this is being used from a callback function, make sure that the
894 * arguments specified are still valid in the asynchronous context.
895 *
896 * Side effects:
897 * This function consumes the NBL.
898 * --------------------------------------------------------------------------
899 */
900VOID
901OvsLookupFlowOutput(POVS_SWITCH_CONTEXT switchContext,
902 VOID *compList,
903 PNET_BUFFER_LIST curNbl)
904{
905 NDIS_STATUS status;
906 OvsForwardingContext ovsFwdCtx;
907 POVS_VPORT_ENTRY internalVport =
908 (POVS_VPORT_ENTRY)switchContext->internalVport;
909
910 /* XXX: make sure comp list was not a stack variable previously. */
911 OvsCompletionList *completionList = (OvsCompletionList *)compList;
912
913 /*
914 * XXX: can internal port disappear while we are busy doing ARP resolution?
915 * It could, but will we get this callback from IP helper in that case. Need
916 * to check.
917 */
918 ASSERT(switchContext->internalVport);
919 status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl,
920 internalVport->portNo, 0,
921 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl),
922 completionList, NULL, TRUE);
923 if (status != NDIS_STATUS_SUCCESS) {
924 OvsCompleteNBLForwardingCtx(&ovsFwdCtx,
925 L"OVS-Dropped due to resources");
926 return;
927 }
928
929 ASSERT(FALSE);
930 /*
931 * XXX: We need to acquire the dispatch lock and the datapath lock.
932 */
933
934 OvsDoFlowLookupOutput(&ovsFwdCtx);
935}
936
937
938/*
939 * --------------------------------------------------------------------------
940 * OvsOutputBeforeSetAction --
941 * Function to be called to complete one set of actions on an NBL, before
942 * we start the next one.
943 * --------------------------------------------------------------------------
944 */
945static __inline NDIS_STATUS
946OvsOutputBeforeSetAction(OvsForwardingContext *ovsFwdCtx)
947{
948 PNET_BUFFER_LIST newNbl;
949 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
950 PNET_BUFFER nb;
951
952 /*
953 * Create a copy and work on the copy after this point. The original NBL is
954 * forwarded. One reason to not use the copy for forwarding is that
955 * ports have already been added to the original NBL, and it might be
956 * inefficient/impossible to remove/re-add them to the copy. There's no
957 * notion of removing the ports, the ports need to be marked as
958 * "isExcluded". There's seems no real advantage to retaining the original
959 * and sending out the copy instead.
960 *
961 * XXX: We are copying the offload context here. This is to handle actions
962 * such as:
963 * outport, pop_vlan(), outport, push_vlan(), outport
964 *
965 * copy size needs to include inner ether + IP + TCP, need to revisit
966 * if we support IP options.
967 * XXX Head room needs to include the additional encap.
968 * XXX copySize check is not considering multiple NBs.
969 */
970 nb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
971 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
972 0, 0, TRUE /*copy NBL info*/);
973
974 ASSERT(ovsFwdCtx->destPortsSizeOut > 0 ||
975 ovsFwdCtx->tunnelTxNic != NULL || ovsFwdCtx->tunnelRxNic != NULL);
976
977 /* Send the original packet out */
978 status = OvsOutputForwardingCtx(ovsFwdCtx);
979 ASSERT(ovsFwdCtx->curNbl == NULL);
980 ASSERT(ovsFwdCtx->destPortsSizeOut == 0);
981 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
982 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
983
984 /* If we didn't make a copy, can't continue. */
985 if (newNbl == NULL) {
986 ovsActionStats.noCopiedNbl++;
987 return NDIS_STATUS_RESOURCES;
988 }
989
990 /* Finish the remaining actions with the new NBL */
991 if (status != NDIS_STATUS_SUCCESS) {
992 OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE);
993 } else {
994 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
995 newNbl, ovsFwdCtx->srcVportNo, 0,
996 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
997 ovsFwdCtx->completionList,
998 &ovsFwdCtx->layers, FALSE);
999 }
1000
1001 return status;
1002}
1003
1004
1005/*
1006 * --------------------------------------------------------------------------
1007 * OvsPopVlanInPktBuf --
1008 * Function to pop a VLAN tag when the tag is in the packet buffer.
1009 * --------------------------------------------------------------------------
1010 */
1011static __inline NDIS_STATUS
1012OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx)
1013{
1014 PNET_BUFFER curNb;
1015 PMDL curMdl;
1016 PUINT8 bufferStart;
1017 ULONG dataLength = sizeof (DL_EUI48) + sizeof (DL_EUI48);
1018 UINT32 packetLen, mdlLen;
1019 PNET_BUFFER_LIST newNbl;
1020 NDIS_STATUS status;
1021
1022 /*
1023 * Declare a dummy vlanTag structure since we need to compute the size
1024 * of shiftLength. The NDIS one is a unionized structure.
1025 */
1026 NDIS_PACKET_8021Q_INFO vlanTag = {0};
1027 ULONG shiftLength = sizeof (vlanTag.TagHeader);
1028 PUINT8 tempBuffer[sizeof (DL_EUI48) + sizeof (DL_EUI48)];
1029
1030 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
1031 0, 0, TRUE /* copy NBL info */);
1032 if (!newNbl) {
1033 ovsActionStats.noCopiedNbl++;
1034 return NDIS_STATUS_RESOURCES;
1035 }
1036
1037 /* Complete the original NBL and create a copy to modify. */
1038 OvsCompleteNBLForwardingCtx(ovsFwdCtx, L"OVS-Dropped due to copy");
1039
1040 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
1041 newNbl, ovsFwdCtx->srcVportNo, 0,
1042 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
1043 NULL, &ovsFwdCtx->layers, FALSE);
1044 if (status != NDIS_STATUS_SUCCESS) {
1045 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1046 L"Dropped due to resouces");
1047 return NDIS_STATUS_RESOURCES;
1048 }
1049
1050 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1051 packetLen = NET_BUFFER_DATA_LENGTH(curNb);
1052 ASSERT(curNb->Next == NULL);
1053 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1054 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1055 if (!bufferStart) {
1056 return NDIS_STATUS_RESOURCES;
1057 }
1058 mdlLen -= NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1059 /* Bail out if L2 + VLAN header is not contiguous in the first buffer. */
1060 if (MIN(packetLen, mdlLen) < sizeof (EthHdr) + shiftLength) {
1061 ASSERT(FALSE);
1062 return NDIS_STATUS_FAILURE;
1063 }
1064 bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1065 RtlCopyMemory(tempBuffer, bufferStart, dataLength);
1066 RtlCopyMemory(bufferStart + shiftLength, tempBuffer, dataLength);
1067 NdisAdvanceNetBufferDataStart(curNb, shiftLength, FALSE, NULL);
1068
1069 return NDIS_STATUS_SUCCESS;
1070}
1071
1072/*
1073 * --------------------------------------------------------------------------
1074 * OvsTunnelAttrToIPv4TunnelKey --
1075 * Convert tunnel attribute to OvsIPv4TunnelKey.
1076 * --------------------------------------------------------------------------
1077 */
1078static __inline NDIS_STATUS
d838e577 1079OvsTunnelAttrToIPv4TunnelKey(PNL_ATTR attr,
c803536e
SS
1080 OvsIPv4TunnelKey *tunKey)
1081{
d838e577 1082 PNL_ATTR a;
c803536e
SS
1083 INT rem;
1084
1085 tunKey->attr[0] = 0;
1086 tunKey->attr[1] = 0;
1087 tunKey->attr[2] = 0;
d838e577 1088 ASSERT(NlAttrType(attr) == OVS_KEY_ATTR_TUNNEL);
c803536e 1089
d838e577
AS
1090 NL_ATTR_FOR_EACH_UNSAFE (a, rem, NlAttrData(attr),
1091 NlAttrGetSize(attr)) {
1092 switch (NlAttrType(a)) {
c803536e 1093 case OVS_TUNNEL_KEY_ATTR_ID:
d838e577 1094 tunKey->tunnelId = NlAttrGetBe64(a);
c803536e
SS
1095 tunKey->flags |= OVS_TNL_F_KEY;
1096 break;
1097 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
d838e577 1098 tunKey->src = NlAttrGetBe32(a);
c803536e
SS
1099 break;
1100 case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
d838e577 1101 tunKey->dst = NlAttrGetBe32(a);
c803536e
SS
1102 break;
1103 case OVS_TUNNEL_KEY_ATTR_TOS:
d838e577 1104 tunKey->tos = NlAttrGetU8(a);
c803536e
SS
1105 break;
1106 case OVS_TUNNEL_KEY_ATTR_TTL:
d838e577 1107 tunKey->ttl = NlAttrGetU8(a);
c803536e
SS
1108 break;
1109 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
1110 tunKey->flags |= OVS_TNL_F_DONT_FRAGMENT;
1111 break;
1112 case OVS_TUNNEL_KEY_ATTR_CSUM:
1113 tunKey->flags |= OVS_TNL_F_CSUM;
1114 break;
1115 default:
1116 ASSERT(0);
1117 }
1118 }
1119
1120 return NDIS_STATUS_SUCCESS;
1121}
1122
1123/*
1124 *----------------------------------------------------------------------------
1125 * OvsUpdateEthHeader --
1126 * Updates the ethernet header in ovsFwdCtx.curNbl inline based on the
1127 * specified key.
1128 *----------------------------------------------------------------------------
1129 */
1130static __inline NDIS_STATUS
1131OvsUpdateEthHeader(OvsForwardingContext *ovsFwdCtx,
1132 const struct ovs_key_ethernet *ethAttr)
1133{
1134 PNET_BUFFER curNb;
1135 PMDL curMdl;
1136 PUINT8 bufferStart;
1137 EthHdr *ethHdr;
1138 UINT32 packetLen, mdlLen;
1139
1140 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1141 ASSERT(curNb->Next == NULL);
1142 packetLen = NET_BUFFER_DATA_LENGTH(curNb);
1143 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1144 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1145 if (!bufferStart) {
1146 ovsActionStats.noResource++;
1147 return NDIS_STATUS_RESOURCES;
1148 }
1149 mdlLen -= NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1150 ASSERT(mdlLen > 0);
1151 /* Bail out if the L2 header is not in a contiguous buffer. */
1152 if (MIN(packetLen, mdlLen) < sizeof *ethHdr) {
1153 ASSERT(FALSE);
1154 return NDIS_STATUS_FAILURE;
1155 }
1156 ethHdr = (EthHdr *)(bufferStart + NET_BUFFER_CURRENT_MDL_OFFSET(curNb));
1157
1158 RtlCopyMemory(ethHdr->Destination, ethAttr->eth_dst,
1159 sizeof ethHdr->Destination);
1160 RtlCopyMemory(ethHdr->Source, ethAttr->eth_src, sizeof ethHdr->Source);
1161
1162 return NDIS_STATUS_SUCCESS;
1163}
1164
1165/*
1166 *----------------------------------------------------------------------------
1167 * OvsUpdateIPv4Header --
1168 * Updates the IPv4 header in ovsFwdCtx.curNbl inline based on the
1169 * specified key.
1170 *----------------------------------------------------------------------------
1171 */
1172static __inline NDIS_STATUS
1173OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
1174 const struct ovs_key_ipv4 *ipAttr)
1175{
1176 PNET_BUFFER curNb;
1177 PMDL curMdl;
1178 ULONG curMdlOffset;
1179 PUINT8 bufferStart;
1180 UINT32 mdlLen, hdrSize, packetLen;
1181 OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers;
1182 NDIS_STATUS status;
1183 IPHdr *ipHdr;
1184 TCPHdr *tcpHdr = NULL;
1185 UDPHdr *udpHdr = NULL;
1186
1187 ASSERT(layers->value != 0);
1188
1189 /*
1190 * Peek into the MDL to get a handle to the IP header and if required
1191 * the TCP/UDP header as well. We check if the required headers are in one
1192 * contiguous MDL, and if not, we copy them over to one MDL.
1193 */
1194 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1195 ASSERT(curNb->Next == NULL);
1196 packetLen = NET_BUFFER_DATA_LENGTH(curNb);
1197 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1198 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1199 if (!bufferStart) {
1200 ovsActionStats.noResource++;
1201 return NDIS_STATUS_RESOURCES;
1202 }
1203 curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1204 mdlLen -= curMdlOffset;
1205 ASSERT((INT)mdlLen >= 0);
1206
1207 if (layers->isTcp || layers->isUdp) {
1208 hdrSize = layers->l4Offset +
1209 layers->isTcp ? sizeof (*tcpHdr) : sizeof (*udpHdr);
1210 } else {
1211 hdrSize = layers->l3Offset + sizeof (*ipHdr);
1212 }
1213
1214 /* Count of number of bytes of valid data there are in the first MDL. */
1215 mdlLen = MIN(packetLen, mdlLen);
1216 if (mdlLen < hdrSize) {
1217 PNET_BUFFER_LIST newNbl;
1218 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
1219 hdrSize, 0, TRUE /*copy NBL info*/);
1220 if (!newNbl) {
1221 ovsActionStats.noCopiedNbl++;
1222 return NDIS_STATUS_RESOURCES;
1223 }
1224 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1225 L"Complete after partial copy.");
1226
1227 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
1228 newNbl, ovsFwdCtx->srcVportNo, 0,
1229 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
1230 NULL, &ovsFwdCtx->layers, FALSE);
1231 if (status != NDIS_STATUS_SUCCESS) {
1232 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1233 L"OVS-Dropped due to resources");
1234 return NDIS_STATUS_RESOURCES;
1235 }
1236
1237 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1238 ASSERT(curNb->Next == NULL);
1239 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1240 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1241 if (!curMdl) {
1242 ovsActionStats.noResource++;
1243 return NDIS_STATUS_RESOURCES;
1244 }
1245 curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1246 mdlLen -= curMdlOffset;
1247 ASSERT(mdlLen >= hdrSize);
1248 }
1249
1250 ipHdr = (IPHdr *)(bufferStart + curMdlOffset + layers->l3Offset);
1251
1252 if (layers->isTcp) {
1253 tcpHdr = (TCPHdr *)(bufferStart + curMdlOffset + layers->l4Offset);
1254 } else if (layers->isUdp) {
1255 udpHdr = (UDPHdr *)(bufferStart + curMdlOffset + layers->l4Offset);
1256 }
1257
1258 /*
1259 * Adjust the IP header inline as dictated by the action, nad also update
1260 * the IP and the TCP checksum for the data modified.
1261 *
1262 * In the future, this could be optimized to make one call to
1263 * ChecksumUpdate32(). Ignoring this for now, since for the most common
1264 * case, we only update the TTL.
1265 */
1266 if (ipHdr->saddr != ipAttr->ipv4_src) {
1267 if (tcpHdr) {
1268 tcpHdr->check = ChecksumUpdate32(tcpHdr->check, ipHdr->saddr,
1269 ipAttr->ipv4_src);
1270 } else if (udpHdr && udpHdr->check) {
1271 udpHdr->check = ChecksumUpdate32(udpHdr->check, ipHdr->saddr,
1272 ipAttr->ipv4_src);
1273 }
1274
1275 if (ipHdr->check != 0) {
1276 ipHdr->check = ChecksumUpdate32(ipHdr->check, ipHdr->saddr,
1277 ipAttr->ipv4_src);
1278 }
1279 ipHdr->saddr = ipAttr->ipv4_src;
1280 }
1281 if (ipHdr->daddr != ipAttr->ipv4_dst) {
1282 if (tcpHdr) {
1283 tcpHdr->check = ChecksumUpdate32(tcpHdr->check, ipHdr->daddr,
1284 ipAttr->ipv4_dst);
1285 } else if (udpHdr && udpHdr->check) {
1286 udpHdr->check = ChecksumUpdate32(udpHdr->check, ipHdr->daddr,
1287 ipAttr->ipv4_dst);
1288 }
1289
1290 if (ipHdr->check != 0) {
1291 ipHdr->check = ChecksumUpdate32(ipHdr->check, ipHdr->daddr,
1292 ipAttr->ipv4_dst);
1293 }
1294 ipHdr->daddr = ipAttr->ipv4_dst;
1295 }
1296 if (ipHdr->protocol != ipAttr->ipv4_proto) {
1297 UINT16 oldProto = (ipHdr->protocol << 16) & 0xff00;
1298 UINT16 newProto = (ipAttr->ipv4_proto << 16) & 0xff00;
1299 if (tcpHdr) {
1300 tcpHdr->check = ChecksumUpdate16(tcpHdr->check, oldProto, newProto);
1301 } else if (udpHdr && udpHdr->check) {
1302 udpHdr->check = ChecksumUpdate16(udpHdr->check, oldProto, newProto);
1303 }
1304
1305 if (ipHdr->check != 0) {
1306 ipHdr->check = ChecksumUpdate16(ipHdr->check, oldProto, newProto);
1307 }
1308 ipHdr->protocol = ipAttr->ipv4_proto;
1309 }
1310 if (ipHdr->ttl != ipAttr->ipv4_ttl) {
1311 UINT16 oldTtl = (ipHdr->ttl) & 0xff;
1312 UINT16 newTtl = (ipAttr->ipv4_ttl) & 0xff;
1313 if (ipHdr->check != 0) {
1314 ipHdr->check = ChecksumUpdate16(ipHdr->check, oldTtl, newTtl);
1315 }
1316 ipHdr->ttl = ipAttr->ipv4_ttl;
1317 }
1318
1319 return NDIS_STATUS_SUCCESS;
1320}
1321
1322/*
1323 * --------------------------------------------------------------------------
1324 * OvsExecuteSetAction --
1325 * Executes a set() action, but storing the actions into 'ovsFwdCtx'
1326 * --------------------------------------------------------------------------
1327 */
1328static __inline NDIS_STATUS
1329OvsExecuteSetAction(OvsForwardingContext *ovsFwdCtx,
1330 OvsFlowKey *key,
1331 UINT64 *hash,
d838e577 1332 const PNL_ATTR a)
c803536e 1333{
d838e577 1334 enum ovs_key_attr type = NlAttrType(a);
c803536e
SS
1335 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
1336
1337 switch (type) {
1338 case OVS_KEY_ATTR_ETHERNET:
1339 status = OvsUpdateEthHeader(ovsFwdCtx,
d838e577 1340 NlAttrGetUnspec(a, sizeof(struct ovs_key_ethernet)));
c803536e
SS
1341 break;
1342
1343 case OVS_KEY_ATTR_IPV4:
1344 status = OvsUpdateIPv4Header(ovsFwdCtx,
d838e577 1345 NlAttrGetUnspec(a, sizeof(struct ovs_key_ipv4)));
c803536e
SS
1346 break;
1347
1348 case OVS_KEY_ATTR_TUNNEL:
1349 {
1350 OvsIPv4TunnelKey tunKey;
022c2040 1351 status = OvsTunnelAttrToIPv4TunnelKey((PNL_ATTR)a, &tunKey);
c803536e
SS
1352 ASSERT(status == NDIS_STATUS_SUCCESS);
1353 tunKey.flow_hash = (uint16)(hash ? *hash : OvsHashFlow(key));
ffde5f8f 1354 tunKey.dst_port = key->ipKey.l4.tpDst;
c803536e 1355 RtlCopyMemory(&ovsFwdCtx->tunKey, &tunKey, sizeof ovsFwdCtx->tunKey);
c803536e
SS
1356 break;
1357 }
1358 case OVS_KEY_ATTR_SKB_MARK:
1359 /* XXX: Not relevant to Hyper-V. Return OK */
1360 break;
1361 case OVS_KEY_ATTR_UNSPEC:
1362 case OVS_KEY_ATTR_ENCAP:
1363 case OVS_KEY_ATTR_ETHERTYPE:
1364 case OVS_KEY_ATTR_IN_PORT:
1365 case OVS_KEY_ATTR_VLAN:
1366 case OVS_KEY_ATTR_ICMP:
1367 case OVS_KEY_ATTR_ICMPV6:
1368 case OVS_KEY_ATTR_ARP:
1369 case OVS_KEY_ATTR_ND:
1370 case __OVS_KEY_ATTR_MAX:
1371 default:
1372 OVS_LOG_INFO("Unhandled attribute %#x", type);
1373 ASSERT(FALSE);
1374 }
1375 return status;
1376}
1377
1378/*
1379 * --------------------------------------------------------------------------
1380 * OvsActionsExecute --
1381 * Interpret and execute the specified 'actions' on the specifed packet
1382 * 'curNbl'. The expectation is that if the packet needs to be dropped
1383 * (completed) for some reason, it is added to 'completionList' so that the
1384 * caller can complete the packet. If 'completionList' is NULL, the NBL is
1385 * assumed to be generated by OVS and freed up. Otherwise, the function
1386 * consumes the NBL by generating a NDIS send indication for the packet.
1387 *
1388 * There are one or more of "clone" NBLs that may get generated while
1389 * executing the actions. Upon any failures, the "cloned" NBLs are freed up,
1390 * and the caller does not have to worry about them.
1391 *
1392 * Success or failure is returned based on whether the specified actions
1393 * were executed successfully on the packet or not.
1394 * --------------------------------------------------------------------------
1395 */
1396NDIS_STATUS
1397OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext,
1398 OvsCompletionList *completionList,
1399 PNET_BUFFER_LIST curNbl,
1400 UINT32 portNo,
1401 ULONG sendFlags,
1402 OvsFlowKey *key,
1403 UINT64 *hash,
1404 OVS_PACKET_HDR_INFO *layers,
d838e577 1405 const PNL_ATTR actions,
c803536e
SS
1406 INT actionsLen)
1407{
d838e577 1408 PNL_ATTR a;
c803536e
SS
1409 INT rem;
1410 UINT32 dstPortID;
1411 OvsForwardingContext ovsFwdCtx;
1412 PCWSTR dropReason = L"";
1413 NDIS_STATUS status;
1414 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail =
1415 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl);
1416
1417 /* XXX: ASSERT that the flow table lock is held. */
1418 status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl, portNo,
1419 sendFlags, fwdDetail, completionList,
1420 layers, TRUE);
1421 if (status != NDIS_STATUS_SUCCESS) {
1422 dropReason = L"OVS-initing destination port list failed";
1423 goto dropit;
1424 }
1425
1426 if (actionsLen == 0) {
1427 dropReason = L"OVS-Dropped due to Flow action";
1428 ovsActionStats.zeroActionLen++;
1429 goto dropit;
1430 }
1431
1432 NL_ATTR_FOR_EACH_UNSAFE (a, rem, actions, actionsLen) {
d838e577 1433 switch(NlAttrType(a)) {
c803536e 1434 case OVS_ACTION_ATTR_OUTPUT:
d838e577 1435 dstPortID = NlAttrGetU32(a);
c803536e
SS
1436 status = OvsAddPorts(&ovsFwdCtx, key, dstPortID,
1437 TRUE, TRUE);
1438 if (status != NDIS_STATUS_SUCCESS) {
1439 dropReason = L"OVS-adding destination port failed";
1440 goto dropit;
1441 }
1442 break;
1443
1444 case OVS_ACTION_ATTR_PUSH_VLAN:
1445 {
1446 struct ovs_action_push_vlan *vlan;
1447 PVOID vlanTagValue;
1448 PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag;
1449
1450 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1451 || ovsFwdCtx.tunnelRxNic != NULL) {
1452 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1453 if (status != NDIS_STATUS_SUCCESS) {
1454 dropReason = L"OVS-adding destination failed";
1455 goto dropit;
1456 }
1457 }
1458
1459 vlanTagValue = NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1460 Ieee8021QNetBufferListInfo);
1461 if (vlanTagValue != NULL) {
1462 /*
1463 * XXX: We don't support double VLAN tag offload. In such cases,
1464 * we need to insert the existing one into the packet buffer,
1465 * and add the new one as offload. This will take care of
1466 * guest tag-in-tag case as well as OVS rules that specify
1467 * tag-in-tag.
1468 */
1469 } else {
1470 vlanTagValue = 0;
1471 vlanTag = (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue;
d838e577 1472 vlan = (struct ovs_action_push_vlan *)NlAttrGet((const PNL_ATTR)a);
c803536e
SS
1473 vlanTag->TagHeader.VlanId = ntohs(vlan->vlan_tci) & 0xfff;
1474 vlanTag->TagHeader.UserPriority = ntohs(vlan->vlan_tci) >> 13;
1475
1476 NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1477 Ieee8021QNetBufferListInfo) = vlanTagValue;
1478 }
1479 break;
1480 }
1481
1482 case OVS_ACTION_ATTR_POP_VLAN:
1483 {
1484 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1485 || ovsFwdCtx.tunnelRxNic != NULL) {
1486 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1487 if (status != NDIS_STATUS_SUCCESS) {
1488 dropReason = L"OVS-adding destination failed";
1489 goto dropit;
1490 }
1491 }
1492
1493 if (NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1494 Ieee8021QNetBufferListInfo) != 0) {
1495 NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1496 Ieee8021QNetBufferListInfo) = 0;
1497 } else {
1498 /*
1499 * The VLAN tag is inserted into the packet buffer. Pop the tag
1500 * by packet buffer modification.
1501 */
1502 status = OvsPopVlanInPktBuf(&ovsFwdCtx);
1503 if (status != NDIS_STATUS_SUCCESS) {
1504 dropReason = L"OVS-pop vlan action failed";
1505 goto dropit;
1506 }
1507 }
1508 break;
1509 }
1510
1511 case OVS_ACTION_ATTR_USERSPACE:
1512 {
d838e577
AS
1513 PNL_ATTR userdataAttr;
1514 PNL_ATTR queueAttr;
c803536e 1515 POVS_PACKET_QUEUE_ELEM elem;
429d4556
AS
1516 BOOLEAN isRecv = FALSE;
1517
1518 POVS_VPORT_ENTRY vport = OvsFindVportByPortNo(switchContext,
1519 portNo);
1520
1521 if (vport) {
1522 if (vport->isExternal ||
1523 OvsIsTunnelVportType(vport->ovsType)) {
1524 isRecv = TRUE;
1525 }
1526 }
c803536e 1527
d838e577
AS
1528 queueAttr = NlAttrFindNested(a, OVS_USERSPACE_ATTR_PID);
1529 userdataAttr = NlAttrFindNested(a, OVS_USERSPACE_ATTR_USERDATA);
c803536e 1530
640ebde7
EE
1531 elem = OvsCreateQueueNlPacket((PVOID)userdataAttr,
1532 userdataAttr->nlaLen,
1533 OVS_PACKET_CMD_ACTION,
1534 portNo, key,ovsFwdCtx.curNbl,
1535 NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx.curNbl),
1536 isRecv,
1537 layers);
c803536e
SS
1538 if (elem) {
1539 LIST_ENTRY missedPackets;
1540 InitializeListHead(&missedPackets);
1541 InsertTailList(&missedPackets, &elem->link);
4a3c9b70 1542 OvsQueuePackets(&missedPackets, 1);
c803536e
SS
1543 dropReason = L"OVS-Completed since packet was copied to "
1544 L"userspace";
1545 } else {
1546 dropReason = L"OVS-Dropped due to failure to queue to "
1547 L"userspace";
1548 goto dropit;
1549 }
1550 break;
1551 }
1552 case OVS_ACTION_ATTR_SET:
1553 {
1554 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1555 || ovsFwdCtx.tunnelRxNic != NULL) {
1556 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1557 if (status != NDIS_STATUS_SUCCESS) {
1558 dropReason = L"OVS-adding destination failed";
1559 goto dropit;
1560 }
1561 }
1562
1563 status = OvsExecuteSetAction(&ovsFwdCtx, key, hash,
d838e577
AS
1564 (const PNL_ATTR)NlAttrGet
1565 ((const PNL_ATTR)a));
c803536e
SS
1566 if (status != NDIS_STATUS_SUCCESS) {
1567 dropReason = L"OVS-set action failed";
1568 goto dropit;
1569 }
1570 break;
1571 }
1572 case OVS_ACTION_ATTR_SAMPLE:
c803536e 1573 default:
7c5d9f17 1574 status = NDIS_STATUS_NOT_SUPPORTED;
c803536e
SS
1575 break;
1576 }
1577 }
1578
1579 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1580 || ovsFwdCtx.tunnelRxNic != NULL) {
1581 status = OvsOutputForwardingCtx(&ovsFwdCtx);
1582 ASSERT(ovsFwdCtx.curNbl == NULL);
1583 }
1584
1585 ASSERT(ovsFwdCtx.destPortsSizeOut == 0);
1586 ASSERT(ovsFwdCtx.tunnelRxNic == NULL);
1587 ASSERT(ovsFwdCtx.tunnelTxNic == NULL);
1588
1589dropit:
1590 /*
1591 * If curNbl != NULL, it implies the NBL has not been not freed up so far.
1592 */
1593 if (ovsFwdCtx.curNbl) {
1594 OvsCompleteNBLForwardingCtx(&ovsFwdCtx, dropReason);
1595 }
1596
1597 return status;
1598}