]> git.proxmox.com Git - mirror_ovs.git/blob - datapath-windows/ovsext/OvsActions.c
netlink-socket: Use read/write ioctl instead of ReadFile/WriteFile.
[mirror_ovs.git] / datapath-windows / ovsext / OvsActions.c
1 /*
2 * Copyright (c) 2014 VMware, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "precomp.h"
18
19 #include "OvsSwitch.h"
20 #include "OvsVport.h"
21 #include "OvsEvent.h"
22 #include "OvsUser.h"
23 #include "OvsNetProto.h"
24 #include "OvsFlow.h"
25 #include "OvsVxlan.h"
26 #include "OvsChecksum.h"
27 #include "OvsPacketIO.h"
28
29 #ifdef OVS_DBG_MOD
30 #undef OVS_DBG_MOD
31 #endif
32 #define OVS_DBG_MOD OVS_DBG_ACTION
33 #include "OvsDebug.h"
34
35 typedef struct _OVS_ACTION_STATS {
36 UINT64 rxVxlan;
37 UINT64 txVxlan;
38 UINT64 flowMiss;
39 UINT64 flowUserspace;
40 UINT64 txTcp;
41 UINT32 failedFlowMiss;
42 UINT32 noVport;
43 UINT32 failedFlowExtract;
44 UINT32 noResource;
45 UINT32 noCopiedNbl;
46 UINT32 failedEncap;
47 UINT32 failedDecap;
48 UINT32 cannotGrowDest;
49 UINT32 zeroActionLen;
50 UINT32 failedChecksum;
51 } OVS_ACTION_STATS, *POVS_ACTION_STATS;
52
53 OVS_ACTION_STATS ovsActionStats;
54
55 /*
56 * There a lot of data that needs to be maintained while executing the pipeline
57 * as dictated by the actions of a flow, across different functions at different
58 * levels. Such data is put together in a 'context' structure. Care should be
59 * exercised while adding new members to the structure - only add ones that get
60 * used across multiple stages in the pipeline/get used in multiple functions.
61 */
62 #define OVS_DEST_PORTS_ARRAY_MIN_SIZE 2
63 typedef struct OvsForwardingContext {
64 POVS_SWITCH_CONTEXT switchContext;
65 /* The NBL currently used in the pipeline. */
66 PNET_BUFFER_LIST curNbl;
67 /* NDIS forwarding detail for 'curNbl'. */
68 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
69 /* Array of destination ports for 'curNbl'. */
70 PNDIS_SWITCH_FORWARDING_DESTINATION_ARRAY destinationPorts;
71 /* send flags while sending 'curNbl' into NDIS. */
72 ULONG sendFlags;
73 /* Total number of output ports, used + unused, in 'curNbl'. */
74 UINT32 destPortsSizeIn;
75 /* Total number of used output ports in 'curNbl'. */
76 UINT32 destPortsSizeOut;
77 /*
78 * If 'curNbl' is not owned by OVS, they need to be tracked, if they need to
79 * be freed/completed.
80 */
81 OvsCompletionList *completionList;
82 /*
83 * vport number of 'curNbl' when it is passed from the PIF bridge to the INT
84 * bridge. ie. during tunneling on the Rx side.
85 */
86 UINT32 srcVportNo;
87
88 /*
89 * Tunnel key:
90 * - specified in actions during tunneling Tx
91 * - extracted from an NBL during tunneling Rx
92 */
93 OvsIPv4TunnelKey tunKey;
94
95 /*
96 * Tunneling - Tx:
97 * To store the output port, when it is a tunneled port. We don't foresee
98 * multiple tunneled ports as outport for any given NBL.
99 */
100 POVS_VPORT_ENTRY tunnelTxNic;
101
102 /*
103 * Tunneling - Rx:
104 * Points to the Internal port on the PIF Bridge, if the packet needs to be
105 * de-tunneled.
106 */
107 POVS_VPORT_ENTRY tunnelRxNic;
108
109 /* header information */
110 OVS_PACKET_HDR_INFO layers;
111 } OvsForwardingContext;
112
113
114 /*
115 * --------------------------------------------------------------------------
116 * OvsInitForwardingCtx --
117 * Function to init/re-init the 'ovsFwdCtx' context as the actions pipeline
118 * is being executed.
119 *
120 * Result:
121 * NDIS_STATUS_SUCCESS on success
122 * Other NDIS_STATUS upon failure. Upon failure, it is safe to call
123 * OvsCompleteNBLForwardingCtx(), since 'ovsFwdCtx' has been initialized
124 * enough for OvsCompleteNBLForwardingCtx() to do its work.
125 * --------------------------------------------------------------------------
126 */
127 static __inline NDIS_STATUS
128 OvsInitForwardingCtx(OvsForwardingContext *ovsFwdCtx,
129 POVS_SWITCH_CONTEXT switchContext,
130 PNET_BUFFER_LIST curNbl,
131 UINT32 srcVportNo,
132 ULONG sendFlags,
133 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail,
134 OvsCompletionList *completionList,
135 OVS_PACKET_HDR_INFO *layers,
136 BOOLEAN resetTunnelInfo)
137 {
138 ASSERT(ovsFwdCtx);
139 ASSERT(switchContext);
140 ASSERT(curNbl);
141 ASSERT(fwdDetail);
142
143 /*
144 * Set values for curNbl and switchContext so upon failures, we have enough
145 * information to do cleanup.
146 */
147 ovsFwdCtx->curNbl = curNbl;
148 ovsFwdCtx->switchContext = switchContext;
149 ovsFwdCtx->completionList = completionList;
150 ovsFwdCtx->fwdDetail = fwdDetail;
151
152 if (fwdDetail->NumAvailableDestinations > 0) {
153 /*
154 * XXX: even though MSDN says GetNetBufferListDestinations() returns
155 * NDIS_STATUS, the header files say otherwise.
156 */
157 switchContext->NdisSwitchHandlers.GetNetBufferListDestinations(
158 switchContext->NdisSwitchContext, curNbl,
159 &ovsFwdCtx->destinationPorts);
160
161 ASSERT(ovsFwdCtx->destinationPorts);
162 /* Ensure that none of the elements are consumed yet. */
163 ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
164 fwdDetail->NumAvailableDestinations);
165 } else {
166 ovsFwdCtx->destinationPorts = NULL;
167 }
168 ovsFwdCtx->destPortsSizeIn = fwdDetail->NumAvailableDestinations;
169 ovsFwdCtx->destPortsSizeOut = 0;
170 ovsFwdCtx->srcVportNo = srcVportNo;
171 ovsFwdCtx->sendFlags = sendFlags;
172 if (layers) {
173 ovsFwdCtx->layers = *layers;
174 } else {
175 RtlZeroMemory(&ovsFwdCtx->layers, sizeof ovsFwdCtx->layers);
176 }
177 if (resetTunnelInfo) {
178 ovsFwdCtx->tunnelTxNic = NULL;
179 ovsFwdCtx->tunnelRxNic = NULL;
180 RtlZeroMemory(&ovsFwdCtx->tunKey, sizeof ovsFwdCtx->tunKey);
181 }
182
183 return NDIS_STATUS_SUCCESS;
184 }
185
186 /*
187 * --------------------------------------------------------------------------
188 * OvsDetectTunnelRxPkt --
189 * Utility function for an RX packet to detect its tunnel type.
190 *
191 * Result:
192 * True - if the tunnel type was detected.
193 * False - if not a tunnel packet or tunnel type not supported.
194 * --------------------------------------------------------------------------
195 */
196 static __inline BOOLEAN
197 OvsDetectTunnelRxPkt(OvsForwardingContext *ovsFwdCtx,
198 const OvsFlowKey *flowKey)
199 {
200 POVS_VPORT_ENTRY tunnelVport = NULL;
201
202 /* XXX: we should also check for the length of the UDP payload to pick
203 * packets only if they are at least VXLAN header size.
204 */
205 if (!flowKey->ipKey.nwFrag &&
206 flowKey->ipKey.nwProto == IPPROTO_UDP &&
207 flowKey->ipKey.l4.tpDst == VXLAN_UDP_PORT_NBO) {
208 tunnelVport = OvsGetTunnelVport(OVSWIN_VPORT_TYPE_VXLAN);
209 ovsActionStats.rxVxlan++;
210 }
211
212 // We might get tunnel packets even before the tunnel gets initialized.
213 if (tunnelVport) {
214 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
215 ovsFwdCtx->tunnelRxNic = tunnelVport;
216 return TRUE;
217 }
218
219 return FALSE;
220 }
221
222 /*
223 * --------------------------------------------------------------------------
224 * OvsDetectTunnelPkt --
225 * Utility function to detect if a packet is to be subjected to
226 * tunneling (Tx) or de-tunneling (Rx). Various factors such as source
227 * port, destination port, packet contents, and previously setup tunnel
228 * context are used.
229 *
230 * Result:
231 * True - If the packet is to be subjected to tunneling.
232 * In case of invalid tunnel context, the tunneling functionality is
233 * a no-op and is completed within this function itself by consuming
234 * all of the tunneling context.
235 * False - If not a tunnel packet or tunnel type not supported. Caller should
236 * process the packet as a non-tunnel packet.
237 * --------------------------------------------------------------------------
238 */
239 static __inline BOOLEAN
240 OvsDetectTunnelPkt(OvsForwardingContext *ovsFwdCtx,
241 const POVS_VPORT_ENTRY dstVport,
242 const OvsFlowKey *flowKey)
243 {
244 if (OvsIsInternalVportType(dstVport->ovsType)) {
245 /*
246 * Rx:
247 * The source of NBL during tunneling Rx could be the external
248 * port or if it is being executed from userspace, the source port is
249 * default port.
250 */
251 BOOLEAN validSrcPort = (ovsFwdCtx->fwdDetail->SourcePortId ==
252 ovsFwdCtx->switchContext->externalPortId) ||
253 (ovsFwdCtx->fwdDetail->SourcePortId ==
254 NDIS_SWITCH_DEFAULT_PORT_ID);
255
256 if (validSrcPort && OvsDetectTunnelRxPkt(ovsFwdCtx, flowKey)) {
257 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
258 ASSERT(ovsFwdCtx->tunnelRxNic != NULL);
259 return TRUE;
260 }
261 } else if (OvsIsTunnelVportType(dstVport->ovsType)) {
262 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
263 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
264
265 /*
266 * Tx:
267 * The destination port is a tunnel port. Encapsulation must be
268 * performed only on packets that originate from a VIF port or from
269 * userspace (default port)
270 *
271 * If the packet will not be encapsulated, consume the tunnel context
272 * by clearing it.
273 */
274 if (ovsFwdCtx->srcVportNo != OVS_DEFAULT_PORT_NO &&
275 !OvsIsVifVportNo(ovsFwdCtx->srcVportNo)) {
276 ovsFwdCtx->tunKey.dst = 0;
277 }
278
279 /* Tunnel the packet only if tunnel context is set. */
280 if (ovsFwdCtx->tunKey.dst != 0) {
281 ovsActionStats.txVxlan++;
282 ovsFwdCtx->tunnelTxNic = dstVport;
283 }
284
285 return TRUE;
286 }
287
288 return FALSE;
289 }
290
291
292 /*
293 * --------------------------------------------------------------------------
294 * OvsAddPorts --
295 * Add the specified destination vport into the forwarding context. If the
296 * vport is a VIF/external port, it is added directly to the NBL. If it is
297 * a tunneling port, it is NOT added to the NBL.
298 *
299 * Result:
300 * NDIS_STATUS_SUCCESS on success
301 * Other NDIS_STATUS upon failure.
302 * --------------------------------------------------------------------------
303 */
304 static __inline NDIS_STATUS
305 OvsAddPorts(OvsForwardingContext *ovsFwdCtx,
306 OvsFlowKey *flowKey,
307 NDIS_SWITCH_PORT_ID dstPortId,
308 BOOLEAN preserveVLAN,
309 BOOLEAN preservePriority)
310 {
311 POVS_VPORT_ENTRY vport;
312 PNDIS_SWITCH_PORT_DESTINATION fwdPort;
313 NDIS_STATUS status;
314 POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext;
315
316 /*
317 * We hold the dispatch lock that protects the list of vports, so vports
318 * validated here can be added as destinations safely before we call into
319 * NDIS.
320 *
321 * Some of the vports can be tunnelled ports as well in which case
322 * they should be added to a separate list of tunnelled destination ports
323 * instead of the VIF ports. The context for the tunnel is settable
324 * in OvsForwardingContext.
325 */
326 vport = OvsFindVportByPortNo(ovsFwdCtx->switchContext, dstPortId);
327 if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
328 /*
329 * There may be some latency between a port disappearing, and userspace
330 * updating the recalculated flows. In the meantime, handle invalid
331 * ports gracefully.
332 */
333 ovsActionStats.noVport++;
334 return NDIS_STATUS_SUCCESS;
335 }
336 ASSERT(vport->nicState == NdisSwitchNicStateConnected);
337 vport->stats.txPackets++;
338 vport->stats.txBytes +=
339 NET_BUFFER_DATA_LENGTH(NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl));
340
341 if (OvsDetectTunnelPkt(ovsFwdCtx, vport, flowKey)) {
342 return NDIS_STATUS_SUCCESS;
343 }
344
345 if (ovsFwdCtx->destPortsSizeOut == ovsFwdCtx->destPortsSizeIn) {
346 if (ovsFwdCtx->destPortsSizeIn == 0) {
347 ASSERT(ovsFwdCtx->destinationPorts == NULL);
348 ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0);
349 status =
350 switchContext->NdisSwitchHandlers.GrowNetBufferListDestinations(
351 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
352 OVS_DEST_PORTS_ARRAY_MIN_SIZE,
353 &ovsFwdCtx->destinationPorts);
354 if (status != NDIS_STATUS_SUCCESS) {
355 ovsActionStats.cannotGrowDest++;
356 return status;
357 }
358 ovsFwdCtx->destPortsSizeIn =
359 ovsFwdCtx->fwdDetail->NumAvailableDestinations;
360 ASSERT(ovsFwdCtx->destinationPorts);
361 } else {
362 ASSERT(ovsFwdCtx->destinationPorts != NULL);
363 /*
364 * NumElements:
365 * A ULONG value that specifies the total number of
366 * NDIS_SWITCH_PORT_DESTINATION elements in the
367 * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure.
368 *
369 * NumDestinations:
370 * A ULONG value that specifies the number of
371 * NDIS_SWITCH_PORT_DESTINATION elements in the
372 * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure that
373 * specify port destinations.
374 *
375 * NumAvailableDestinations:
376 * A value that specifies the number of unused extensible switch
377 * destination ports elements within an NET_BUFFER_LIST structure.
378 */
379 ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
380 ovsFwdCtx->destPortsSizeIn);
381 ASSERT(ovsFwdCtx->destinationPorts->NumDestinations ==
382 ovsFwdCtx->destPortsSizeOut -
383 ovsFwdCtx->fwdDetail->NumAvailableDestinations);
384 ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations > 0);
385 /*
386 * Before we grow the array of destination ports, the current set
387 * of ports needs to be committed. Only the ports added since the
388 * last commit need to be part of the new update.
389 */
390 status = switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations(
391 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
392 ovsFwdCtx->fwdDetail->NumAvailableDestinations,
393 ovsFwdCtx->destinationPorts);
394 if (status != NDIS_STATUS_SUCCESS) {
395 ovsActionStats.cannotGrowDest++;
396 return status;
397 }
398 ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
399 ovsFwdCtx->destPortsSizeIn);
400 ASSERT(ovsFwdCtx->destinationPorts->NumDestinations ==
401 ovsFwdCtx->destPortsSizeOut);
402 ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0);
403
404 status = switchContext->NdisSwitchHandlers.GrowNetBufferListDestinations(
405 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
406 ovsFwdCtx->destPortsSizeIn, &ovsFwdCtx->destinationPorts);
407 if (status != NDIS_STATUS_SUCCESS) {
408 ovsActionStats.cannotGrowDest++;
409 return status;
410 }
411 ASSERT(ovsFwdCtx->destinationPorts != NULL);
412 ovsFwdCtx->destPortsSizeIn <<= 1;
413 }
414 }
415
416 ASSERT(ovsFwdCtx->destPortsSizeOut < ovsFwdCtx->destPortsSizeIn);
417 fwdPort =
418 NDIS_SWITCH_PORT_DESTINATION_AT_ARRAY_INDEX(ovsFwdCtx->destinationPorts,
419 ovsFwdCtx->destPortsSizeOut);
420
421 fwdPort->PortId = vport->portId;
422 fwdPort->NicIndex = vport->nicIndex;
423 fwdPort->IsExcluded = 0;
424 fwdPort->PreserveVLAN = preserveVLAN;
425 fwdPort->PreservePriority = preservePriority;
426 ovsFwdCtx->destPortsSizeOut += 1;
427
428 return NDIS_STATUS_SUCCESS;
429 }
430
431
432 /*
433 * --------------------------------------------------------------------------
434 * OvsClearTunTxCtx --
435 * Utility function to clear tx tunneling context.
436 * --------------------------------------------------------------------------
437 */
438 static __inline VOID
439 OvsClearTunTxCtx(OvsForwardingContext *ovsFwdCtx)
440 {
441 ovsFwdCtx->tunnelTxNic = NULL;
442 ovsFwdCtx->tunKey.dst = 0;
443 }
444
445
446 /*
447 * --------------------------------------------------------------------------
448 * OvsClearTunRxCtx --
449 * Utility function to clear rx tunneling context.
450 * --------------------------------------------------------------------------
451 */
452 static __inline VOID
453 OvsClearTunRxCtx(OvsForwardingContext *ovsFwdCtx)
454 {
455 ovsFwdCtx->tunnelRxNic = NULL;
456 ovsFwdCtx->tunKey.dst = 0;
457 }
458
459
460 /*
461 * --------------------------------------------------------------------------
462 * OvsCompleteNBLForwardingCtx --
463 * This utility function is responsible for freeing/completing an NBL - either
464 * by adding it to a completion list or by freeing it.
465 *
466 * Side effects:
467 * It also resets the necessary fields in 'ovsFwdCtx'.
468 * --------------------------------------------------------------------------
469 */
470 static __inline VOID
471 OvsCompleteNBLForwardingCtx(OvsForwardingContext *ovsFwdCtx,
472 PCWSTR dropReason)
473 {
474 NDIS_STRING filterReason;
475
476 RtlInitUnicodeString(&filterReason, dropReason);
477 if (ovsFwdCtx->completionList) {
478 OvsAddPktCompletionList(ovsFwdCtx->completionList, TRUE,
479 ovsFwdCtx->fwdDetail->SourcePortId, ovsFwdCtx->curNbl, 1,
480 &filterReason);
481 ovsFwdCtx->curNbl = NULL;
482 } else {
483 /* If there is no completionList, we assume this is ovs created NBL */
484 ovsFwdCtx->curNbl = OvsCompleteNBL(ovsFwdCtx->switchContext,
485 ovsFwdCtx->curNbl, TRUE);
486 ASSERT(ovsFwdCtx->curNbl == NULL);
487 }
488 /* XXX: these can be made debug only to save cycles. Ideally the pipeline
489 * using these fields should reset the values at the end of the pipeline. */
490 ovsFwdCtx->destPortsSizeOut = 0;
491 ovsFwdCtx->tunnelTxNic = NULL;
492 ovsFwdCtx->tunnelRxNic = NULL;
493 }
494
495 /*
496 * --------------------------------------------------------------------------
497 * OvsDoFlowLookupOutput --
498 * Function to be used for the second stage of a tunneling workflow, ie.:
499 * - On the encapsulated packet on Tx path, to do a flow extract, flow
500 * lookup and excuting the actions.
501 * - On the decapsulated packet on Rx path, to do a flow extract, flow
502 * lookup and excuting the actions.
503 *
504 * XXX: It is assumed that the NBL in 'ovsFwdCtx' is owned by OVS. This is
505 * until the new buffer management framework is adopted.
506 *
507 * Side effects:
508 * The NBL in 'ovsFwdCtx' is consumed.
509 * --------------------------------------------------------------------------
510 */
511 static __inline NDIS_STATUS
512 OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx)
513 {
514 OvsFlowKey key;
515 OvsFlow *flow;
516 UINT64 hash;
517 NDIS_STATUS status;
518 POVS_VPORT_ENTRY vport =
519 OvsFindVportByPortNo(ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo);
520 if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
521 ASSERT(FALSE); // XXX: let's catch this for now
522 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
523 L"OVS-Dropped due to internal/tunnel port removal");
524 ovsActionStats.noVport++;
525 return NDIS_STATUS_SUCCESS;
526 }
527 ASSERT(vport->nicState == NdisSwitchNicStateConnected);
528
529 /* Assert that in the Rx direction, key is always setup. */
530 ASSERT(ovsFwdCtx->tunnelRxNic == NULL || ovsFwdCtx->tunKey.dst != 0);
531 status = OvsExtractFlow(ovsFwdCtx->curNbl, ovsFwdCtx->srcVportNo,
532 &key, &ovsFwdCtx->layers, ovsFwdCtx->tunKey.dst != 0 ?
533 &ovsFwdCtx->tunKey : NULL);
534 if (status != NDIS_STATUS_SUCCESS) {
535 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
536 L"OVS-Flow extract failed");
537 ovsActionStats.failedFlowExtract++;
538 return status;
539 }
540
541 flow = OvsLookupFlow(&ovsFwdCtx->switchContext->datapath, &key, &hash, FALSE);
542 if (flow) {
543 OvsFlowUsed(flow, ovsFwdCtx->curNbl, &ovsFwdCtx->layers);
544 ovsFwdCtx->switchContext->datapath.hits++;
545 status = OvsActionsExecute(ovsFwdCtx->switchContext,
546 ovsFwdCtx->completionList, ovsFwdCtx->curNbl,
547 ovsFwdCtx->srcVportNo, ovsFwdCtx->sendFlags,
548 &key, &hash, &ovsFwdCtx->layers,
549 flow->actions, flow->actionsLen);
550 ovsFwdCtx->curNbl = NULL;
551 } else {
552 LIST_ENTRY missedPackets;
553 UINT32 num = 0;
554 ovsFwdCtx->switchContext->datapath.misses++;
555 InitializeListHead(&missedPackets);
556 status = OvsCreateAndAddPackets(
557 OVS_DEFAULT_PACKET_QUEUE, NULL, 0, OVS_PACKET_CMD_MISS,
558 ovsFwdCtx->srcVportNo,
559 key.tunKey.dst != 0 ?
560 (OvsIPv4TunnelKey *)&key.tunKey : NULL,
561 ovsFwdCtx->curNbl,
562 ovsFwdCtx->tunnelRxNic != NULL, &ovsFwdCtx->layers,
563 ovsFwdCtx->switchContext, &missedPackets, &num);
564 if (num) {
565 OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE, &missedPackets, num);
566 }
567 if (status == NDIS_STATUS_SUCCESS) {
568 /* Complete the packet since it was copied to user buffer. */
569 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
570 L"OVS-Dropped since packet was copied to userspace");
571 ovsActionStats.flowMiss++;
572 status = NDIS_STATUS_SUCCESS;
573 } else {
574 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
575 L"OVS-Dropped due to failure to queue to userspace");
576 status = NDIS_STATUS_FAILURE;
577 ovsActionStats.failedFlowMiss++;
578 }
579 }
580
581 return status;
582 }
583
584 /*
585 * --------------------------------------------------------------------------
586 * OvsTunnelPortTx --
587 * The start function for Tx tunneling - encapsulates the packet, and
588 * outputs the packet on the PIF bridge.
589 *
590 * Side effects:
591 * The NBL in 'ovsFwdCtx' is consumed.
592 * --------------------------------------------------------------------------
593 */
594 static __inline NDIS_STATUS
595 OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx)
596 {
597 NDIS_STATUS status = NDIS_STATUS_FAILURE;
598 PNET_BUFFER_LIST newNbl = NULL;
599
600 /*
601 * Setup the source port to be the internal port to as to facilitate the
602 * second OvsLookupFlow.
603 */
604 if (ovsFwdCtx->switchContext->internalVport == NULL) {
605 OvsClearTunTxCtx(ovsFwdCtx);
606 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
607 L"OVS-Dropped since internal port is absent");
608 return NDIS_STATUS_FAILURE;
609 }
610 ovsFwdCtx->srcVportNo =
611 ((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->portNo;
612
613 ovsFwdCtx->fwdDetail->SourcePortId = ovsFwdCtx->switchContext->internalPortId;
614 ovsFwdCtx->fwdDetail->SourceNicIndex =
615 ((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->nicIndex;
616
617 /* Do the encap. Encap function does not consume the NBL. */
618 switch(ovsFwdCtx->tunnelTxNic->ovsType) {
619 case OVSWIN_VPORT_TYPE_VXLAN:
620 status = OvsEncapVxlan(ovsFwdCtx->curNbl, &ovsFwdCtx->tunKey,
621 ovsFwdCtx->switchContext,
622 (VOID *)ovsFwdCtx->completionList,
623 &ovsFwdCtx->layers, &newNbl);
624 break;
625 default:
626 ASSERT(! "Tx: Unhandled tunnel type");
627 }
628
629 /* Reset the tunnel context so that it doesn't get used after this point. */
630 OvsClearTunTxCtx(ovsFwdCtx);
631
632 if (status == NDIS_STATUS_SUCCESS) {
633 ASSERT(newNbl);
634 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
635 L"Complete after cloning NBL for encapsulation");
636 ovsFwdCtx->curNbl = newNbl;
637 status = OvsDoFlowLookupOutput(ovsFwdCtx);
638 ASSERT(ovsFwdCtx->curNbl == NULL);
639 } else {
640 /*
641 * XXX: Temporary freeing of the packet until we register a
642 * callback to IP helper.
643 */
644 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
645 L"OVS-Dropped due to encap failure");
646 ovsActionStats.failedEncap++;
647 status = NDIS_STATUS_SUCCESS;
648 }
649
650 return status;
651 }
652
653 /*
654 * --------------------------------------------------------------------------
655 * OvsTunnelPortRx --
656 * Decapsulate the incoming NBL based on the tunnel type and goes through
657 * the flow lookup for the inner packet.
658 *
659 * Note: IP checksum is validate here, but L4 checksum validation needs
660 * to be done by the corresponding tunnel types.
661 *
662 * Side effects:
663 * The NBL in 'ovsFwdCtx' is consumed.
664 * --------------------------------------------------------------------------
665 */
666 static __inline NDIS_STATUS
667 OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx)
668 {
669 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
670 PNET_BUFFER_LIST newNbl = NULL;
671 POVS_VPORT_ENTRY tunnelRxVport = ovsFwdCtx->tunnelRxNic;
672
673 if (OvsValidateIPChecksum(ovsFwdCtx->curNbl, &ovsFwdCtx->layers)
674 != NDIS_STATUS_SUCCESS) {
675 ovsActionStats.failedChecksum++;
676 OVS_LOG_INFO("Packet dropped due to IP checksum failure.");
677 goto dropNbl;
678 }
679
680 switch(tunnelRxVport->ovsType) {
681 case OVSWIN_VPORT_TYPE_VXLAN:
682 /*
683 * OvsDoDecapVxlan should return a new NBL if it was copied, and
684 * this new NBL should be setup as the ovsFwdCtx->curNbl.
685 */
686 status = OvsDoDecapVxlan(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
687 &ovsFwdCtx->tunKey, &newNbl);
688 break;
689 default:
690 OVS_LOG_ERROR("Rx: Unhandled tunnel type: %d\n",
691 tunnelRxVport->ovsType);
692 ASSERT(! "Rx: Unhandled tunnel type");
693 status = NDIS_STATUS_NOT_SUPPORTED;
694 }
695
696 if (status != NDIS_STATUS_SUCCESS) {
697 ovsActionStats.failedDecap++;
698 goto dropNbl;
699 }
700
701 /*
702 * tunnelRxNic and other fields will be cleared, re-init the context
703 * before usage.
704 */
705 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
706 L"OVS-dropped due to new decap packet");
707
708 /* Decapsulated packet is in a new NBL */
709 ovsFwdCtx->tunnelRxNic = tunnelRxVport;
710 OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
711 newNbl, tunnelRxVport->portNo, 0,
712 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
713 ovsFwdCtx->completionList,
714 &ovsFwdCtx->layers, FALSE);
715
716 /*
717 * Set the NBL's SourcePortId and SourceNicIndex to default values to
718 * keep NDIS happy when we forward the packet.
719 */
720 ovsFwdCtx->fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
721 ovsFwdCtx->fwdDetail->SourceNicIndex = 0;
722
723 status = OvsDoFlowLookupOutput(ovsFwdCtx);
724 ASSERT(ovsFwdCtx->curNbl == NULL);
725 OvsClearTunRxCtx(ovsFwdCtx);
726
727 return status;
728
729 dropNbl:
730 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
731 L"OVS-dropped due to decap failure");
732 OvsClearTunRxCtx(ovsFwdCtx);
733 return status;
734 }
735
736
737 /*
738 * --------------------------------------------------------------------------
739 * OvsOutputForwardingCtx --
740 * This function outputs an NBL to NDIS or to a tunneling pipeline based on
741 * the ports added so far into 'ovsFwdCtx'.
742 *
743 * Side effects:
744 * This function consumes the NBL - either by forwarding it successfully to
745 * NDIS, or adding it to the completion list in 'ovsFwdCtx', or freeing it.
746 *
747 * Also makes sure that the list of destination ports - tunnel or otherwise is
748 * drained.
749 * --------------------------------------------------------------------------
750 */
751 static __inline NDIS_STATUS
752 OvsOutputForwardingCtx(OvsForwardingContext *ovsFwdCtx)
753 {
754 NDIS_STATUS status = STATUS_SUCCESS;
755 POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext;
756
757 /*
758 * Handle the case where the some of the destination ports are tunneled
759 * ports - the non-tunneled ports get a unmodified copy of the NBL, and the
760 * tunneling pipeline starts when we output the packet to tunneled port.
761 */
762 if (ovsFwdCtx->destPortsSizeOut > 0) {
763 PNET_BUFFER_LIST newNbl = NULL;
764 PNET_BUFFER nb;
765 UINT32 portsToUpdate =
766 ovsFwdCtx->fwdDetail->NumAvailableDestinations -
767 (ovsFwdCtx->destPortsSizeIn - ovsFwdCtx->destPortsSizeOut);
768
769 ASSERT(ovsFwdCtx->destinationPorts != NULL);
770
771 /*
772 * Create a copy of the packet in order to do encap on it later. Also,
773 * don't copy the offload context since the encap'd packet has a
774 * different set of headers. This will change when we implement offloads
775 * before doing encapsulation.
776 */
777 if (ovsFwdCtx->tunnelTxNic != NULL || ovsFwdCtx->tunnelRxNic != NULL) {
778 nb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
779 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
780 0, 0, TRUE /*copy NBL info*/);
781 if (newNbl == NULL) {
782 status = NDIS_STATUS_RESOURCES;
783 ovsActionStats.noCopiedNbl++;
784 goto dropit;
785 }
786 }
787
788 /* It does not seem like we'll get here unless 'portsToUpdate' > 0. */
789 ASSERT(portsToUpdate > 0);
790 status = switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations(
791 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
792 portsToUpdate, ovsFwdCtx->destinationPorts);
793 if (status != NDIS_STATUS_SUCCESS) {
794 OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE);
795 ovsActionStats.cannotGrowDest++;
796 goto dropit;
797 }
798
799 OvsSendNBLIngress(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
800 ovsFwdCtx->sendFlags);
801 /* End this pipeline by resetting the corresponding context. */
802 ovsFwdCtx->destPortsSizeOut = 0;
803 ovsFwdCtx->curNbl = NULL;
804 if (newNbl) {
805 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
806 newNbl, ovsFwdCtx->srcVportNo, 0,
807 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
808 ovsFwdCtx->completionList,
809 &ovsFwdCtx->layers, FALSE);
810 if (status != NDIS_STATUS_SUCCESS) {
811 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
812 L"Dropped due to resouces");
813 goto dropit;
814 }
815 }
816 }
817
818 if (ovsFwdCtx->tunnelTxNic != NULL) {
819 status = OvsTunnelPortTx(ovsFwdCtx);
820 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
821 ASSERT(ovsFwdCtx->tunKey.dst == 0);
822 } else if (ovsFwdCtx->tunnelRxNic != NULL) {
823 status = OvsTunnelPortRx(ovsFwdCtx);
824 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
825 ASSERT(ovsFwdCtx->tunKey.dst == 0);
826 }
827 ASSERT(ovsFwdCtx->curNbl == NULL);
828
829 return status;
830
831 dropit:
832 if (status != NDIS_STATUS_SUCCESS) {
833 OvsCompleteNBLForwardingCtx(ovsFwdCtx, L"Dropped due to XXX");
834 }
835
836 return status;
837 }
838
839
840 /*
841 * --------------------------------------------------------------------------
842 * OvsLookupFlowOutput --
843 * Utility function for external callers to do flow extract, lookup,
844 * actions execute on a given NBL.
845 *
846 * Note: If this is being used from a callback function, make sure that the
847 * arguments specified are still valid in the asynchronous context.
848 *
849 * Side effects:
850 * This function consumes the NBL.
851 * --------------------------------------------------------------------------
852 */
853 VOID
854 OvsLookupFlowOutput(POVS_SWITCH_CONTEXT switchContext,
855 VOID *compList,
856 PNET_BUFFER_LIST curNbl)
857 {
858 NDIS_STATUS status;
859 OvsForwardingContext ovsFwdCtx;
860 POVS_VPORT_ENTRY internalVport =
861 (POVS_VPORT_ENTRY)switchContext->internalVport;
862
863 /* XXX: make sure comp list was not a stack variable previously. */
864 OvsCompletionList *completionList = (OvsCompletionList *)compList;
865
866 /*
867 * XXX: can internal port disappear while we are busy doing ARP resolution?
868 * It could, but will we get this callback from IP helper in that case. Need
869 * to check.
870 */
871 ASSERT(switchContext->internalVport);
872 status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl,
873 internalVport->portNo, 0,
874 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl),
875 completionList, NULL, TRUE);
876 if (status != NDIS_STATUS_SUCCESS) {
877 OvsCompleteNBLForwardingCtx(&ovsFwdCtx,
878 L"OVS-Dropped due to resources");
879 return;
880 }
881
882 ASSERT(FALSE);
883 /*
884 * XXX: We need to acquire the dispatch lock and the datapath lock.
885 */
886
887 OvsDoFlowLookupOutput(&ovsFwdCtx);
888 }
889
890
891 /*
892 * --------------------------------------------------------------------------
893 * OvsOutputBeforeSetAction --
894 * Function to be called to complete one set of actions on an NBL, before
895 * we start the next one.
896 * --------------------------------------------------------------------------
897 */
898 static __inline NDIS_STATUS
899 OvsOutputBeforeSetAction(OvsForwardingContext *ovsFwdCtx)
900 {
901 PNET_BUFFER_LIST newNbl;
902 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
903 PNET_BUFFER nb;
904
905 /*
906 * Create a copy and work on the copy after this point. The original NBL is
907 * forwarded. One reason to not use the copy for forwarding is that
908 * ports have already been added to the original NBL, and it might be
909 * inefficient/impossible to remove/re-add them to the copy. There's no
910 * notion of removing the ports, the ports need to be marked as
911 * "isExcluded". There's seems no real advantage to retaining the original
912 * and sending out the copy instead.
913 *
914 * XXX: We are copying the offload context here. This is to handle actions
915 * such as:
916 * outport, pop_vlan(), outport, push_vlan(), outport
917 *
918 * copy size needs to include inner ether + IP + TCP, need to revisit
919 * if we support IP options.
920 * XXX Head room needs to include the additional encap.
921 * XXX copySize check is not considering multiple NBs.
922 */
923 nb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
924 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
925 0, 0, TRUE /*copy NBL info*/);
926
927 ASSERT(ovsFwdCtx->destPortsSizeOut > 0 ||
928 ovsFwdCtx->tunnelTxNic != NULL || ovsFwdCtx->tunnelRxNic != NULL);
929
930 /* Send the original packet out */
931 status = OvsOutputForwardingCtx(ovsFwdCtx);
932 ASSERT(ovsFwdCtx->curNbl == NULL);
933 ASSERT(ovsFwdCtx->destPortsSizeOut == 0);
934 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
935 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
936
937 /* If we didn't make a copy, can't continue. */
938 if (newNbl == NULL) {
939 ovsActionStats.noCopiedNbl++;
940 return NDIS_STATUS_RESOURCES;
941 }
942
943 /* Finish the remaining actions with the new NBL */
944 if (status != NDIS_STATUS_SUCCESS) {
945 OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE);
946 } else {
947 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
948 newNbl, ovsFwdCtx->srcVportNo, 0,
949 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
950 ovsFwdCtx->completionList,
951 &ovsFwdCtx->layers, FALSE);
952 }
953
954 return status;
955 }
956
957
958 /*
959 * --------------------------------------------------------------------------
960 * OvsPopVlanInPktBuf --
961 * Function to pop a VLAN tag when the tag is in the packet buffer.
962 * --------------------------------------------------------------------------
963 */
964 static __inline NDIS_STATUS
965 OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx)
966 {
967 PNET_BUFFER curNb;
968 PMDL curMdl;
969 PUINT8 bufferStart;
970 ULONG dataLength = sizeof (DL_EUI48) + sizeof (DL_EUI48);
971 UINT32 packetLen, mdlLen;
972 PNET_BUFFER_LIST newNbl;
973 NDIS_STATUS status;
974
975 /*
976 * Declare a dummy vlanTag structure since we need to compute the size
977 * of shiftLength. The NDIS one is a unionized structure.
978 */
979 NDIS_PACKET_8021Q_INFO vlanTag = {0};
980 ULONG shiftLength = sizeof (vlanTag.TagHeader);
981 PUINT8 tempBuffer[sizeof (DL_EUI48) + sizeof (DL_EUI48)];
982
983 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
984 0, 0, TRUE /* copy NBL info */);
985 if (!newNbl) {
986 ovsActionStats.noCopiedNbl++;
987 return NDIS_STATUS_RESOURCES;
988 }
989
990 /* Complete the original NBL and create a copy to modify. */
991 OvsCompleteNBLForwardingCtx(ovsFwdCtx, L"OVS-Dropped due to copy");
992
993 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
994 newNbl, ovsFwdCtx->srcVportNo, 0,
995 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
996 NULL, &ovsFwdCtx->layers, FALSE);
997 if (status != NDIS_STATUS_SUCCESS) {
998 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
999 L"Dropped due to resouces");
1000 return NDIS_STATUS_RESOURCES;
1001 }
1002
1003 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1004 packetLen = NET_BUFFER_DATA_LENGTH(curNb);
1005 ASSERT(curNb->Next == NULL);
1006 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1007 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1008 if (!bufferStart) {
1009 return NDIS_STATUS_RESOURCES;
1010 }
1011 mdlLen -= NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1012 /* Bail out if L2 + VLAN header is not contiguous in the first buffer. */
1013 if (MIN(packetLen, mdlLen) < sizeof (EthHdr) + shiftLength) {
1014 ASSERT(FALSE);
1015 return NDIS_STATUS_FAILURE;
1016 }
1017 bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1018 RtlCopyMemory(tempBuffer, bufferStart, dataLength);
1019 RtlCopyMemory(bufferStart + shiftLength, tempBuffer, dataLength);
1020 NdisAdvanceNetBufferDataStart(curNb, shiftLength, FALSE, NULL);
1021
1022 return NDIS_STATUS_SUCCESS;
1023 }
1024
1025 /*
1026 * --------------------------------------------------------------------------
1027 * OvsTunnelAttrToIPv4TunnelKey --
1028 * Convert tunnel attribute to OvsIPv4TunnelKey.
1029 * --------------------------------------------------------------------------
1030 */
1031 static __inline NDIS_STATUS
1032 OvsTunnelAttrToIPv4TunnelKey(PNL_ATTR attr,
1033 OvsIPv4TunnelKey *tunKey)
1034 {
1035 PNL_ATTR a;
1036 INT rem;
1037
1038 tunKey->attr[0] = 0;
1039 tunKey->attr[1] = 0;
1040 tunKey->attr[2] = 0;
1041 ASSERT(NlAttrType(attr) == OVS_KEY_ATTR_TUNNEL);
1042
1043 NL_ATTR_FOR_EACH_UNSAFE (a, rem, NlAttrData(attr),
1044 NlAttrGetSize(attr)) {
1045 switch (NlAttrType(a)) {
1046 case OVS_TUNNEL_KEY_ATTR_ID:
1047 tunKey->tunnelId = NlAttrGetBe64(a);
1048 tunKey->flags |= OVS_TNL_F_KEY;
1049 break;
1050 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
1051 tunKey->src = NlAttrGetBe32(a);
1052 break;
1053 case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
1054 tunKey->dst = NlAttrGetBe32(a);
1055 break;
1056 case OVS_TUNNEL_KEY_ATTR_TOS:
1057 tunKey->tos = NlAttrGetU8(a);
1058 break;
1059 case OVS_TUNNEL_KEY_ATTR_TTL:
1060 tunKey->ttl = NlAttrGetU8(a);
1061 break;
1062 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
1063 tunKey->flags |= OVS_TNL_F_DONT_FRAGMENT;
1064 break;
1065 case OVS_TUNNEL_KEY_ATTR_CSUM:
1066 tunKey->flags |= OVS_TNL_F_CSUM;
1067 break;
1068 default:
1069 ASSERT(0);
1070 }
1071 }
1072
1073 return NDIS_STATUS_SUCCESS;
1074 }
1075
1076 /*
1077 *----------------------------------------------------------------------------
1078 * OvsUpdateEthHeader --
1079 * Updates the ethernet header in ovsFwdCtx.curNbl inline based on the
1080 * specified key.
1081 *----------------------------------------------------------------------------
1082 */
1083 static __inline NDIS_STATUS
1084 OvsUpdateEthHeader(OvsForwardingContext *ovsFwdCtx,
1085 const struct ovs_key_ethernet *ethAttr)
1086 {
1087 PNET_BUFFER curNb;
1088 PMDL curMdl;
1089 PUINT8 bufferStart;
1090 EthHdr *ethHdr;
1091 UINT32 packetLen, mdlLen;
1092
1093 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1094 ASSERT(curNb->Next == NULL);
1095 packetLen = NET_BUFFER_DATA_LENGTH(curNb);
1096 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1097 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1098 if (!bufferStart) {
1099 ovsActionStats.noResource++;
1100 return NDIS_STATUS_RESOURCES;
1101 }
1102 mdlLen -= NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1103 ASSERT(mdlLen > 0);
1104 /* Bail out if the L2 header is not in a contiguous buffer. */
1105 if (MIN(packetLen, mdlLen) < sizeof *ethHdr) {
1106 ASSERT(FALSE);
1107 return NDIS_STATUS_FAILURE;
1108 }
1109 ethHdr = (EthHdr *)(bufferStart + NET_BUFFER_CURRENT_MDL_OFFSET(curNb));
1110
1111 RtlCopyMemory(ethHdr->Destination, ethAttr->eth_dst,
1112 sizeof ethHdr->Destination);
1113 RtlCopyMemory(ethHdr->Source, ethAttr->eth_src, sizeof ethHdr->Source);
1114
1115 return NDIS_STATUS_SUCCESS;
1116 }
1117
1118 /*
1119 *----------------------------------------------------------------------------
1120 * OvsUpdateIPv4Header --
1121 * Updates the IPv4 header in ovsFwdCtx.curNbl inline based on the
1122 * specified key.
1123 *----------------------------------------------------------------------------
1124 */
1125 static __inline NDIS_STATUS
1126 OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
1127 const struct ovs_key_ipv4 *ipAttr)
1128 {
1129 PNET_BUFFER curNb;
1130 PMDL curMdl;
1131 ULONG curMdlOffset;
1132 PUINT8 bufferStart;
1133 UINT32 mdlLen, hdrSize, packetLen;
1134 OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers;
1135 NDIS_STATUS status;
1136 IPHdr *ipHdr;
1137 TCPHdr *tcpHdr = NULL;
1138 UDPHdr *udpHdr = NULL;
1139
1140 ASSERT(layers->value != 0);
1141
1142 /*
1143 * Peek into the MDL to get a handle to the IP header and if required
1144 * the TCP/UDP header as well. We check if the required headers are in one
1145 * contiguous MDL, and if not, we copy them over to one MDL.
1146 */
1147 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1148 ASSERT(curNb->Next == NULL);
1149 packetLen = NET_BUFFER_DATA_LENGTH(curNb);
1150 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1151 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1152 if (!bufferStart) {
1153 ovsActionStats.noResource++;
1154 return NDIS_STATUS_RESOURCES;
1155 }
1156 curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1157 mdlLen -= curMdlOffset;
1158 ASSERT((INT)mdlLen >= 0);
1159
1160 if (layers->isTcp || layers->isUdp) {
1161 hdrSize = layers->l4Offset +
1162 layers->isTcp ? sizeof (*tcpHdr) : sizeof (*udpHdr);
1163 } else {
1164 hdrSize = layers->l3Offset + sizeof (*ipHdr);
1165 }
1166
1167 /* Count of number of bytes of valid data there are in the first MDL. */
1168 mdlLen = MIN(packetLen, mdlLen);
1169 if (mdlLen < hdrSize) {
1170 PNET_BUFFER_LIST newNbl;
1171 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
1172 hdrSize, 0, TRUE /*copy NBL info*/);
1173 if (!newNbl) {
1174 ovsActionStats.noCopiedNbl++;
1175 return NDIS_STATUS_RESOURCES;
1176 }
1177 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1178 L"Complete after partial copy.");
1179
1180 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
1181 newNbl, ovsFwdCtx->srcVportNo, 0,
1182 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
1183 NULL, &ovsFwdCtx->layers, FALSE);
1184 if (status != NDIS_STATUS_SUCCESS) {
1185 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1186 L"OVS-Dropped due to resources");
1187 return NDIS_STATUS_RESOURCES;
1188 }
1189
1190 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1191 ASSERT(curNb->Next == NULL);
1192 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1193 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1194 if (!curMdl) {
1195 ovsActionStats.noResource++;
1196 return NDIS_STATUS_RESOURCES;
1197 }
1198 curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1199 mdlLen -= curMdlOffset;
1200 ASSERT(mdlLen >= hdrSize);
1201 }
1202
1203 ipHdr = (IPHdr *)(bufferStart + curMdlOffset + layers->l3Offset);
1204
1205 if (layers->isTcp) {
1206 tcpHdr = (TCPHdr *)(bufferStart + curMdlOffset + layers->l4Offset);
1207 } else if (layers->isUdp) {
1208 udpHdr = (UDPHdr *)(bufferStart + curMdlOffset + layers->l4Offset);
1209 }
1210
1211 /*
1212 * Adjust the IP header inline as dictated by the action, nad also update
1213 * the IP and the TCP checksum for the data modified.
1214 *
1215 * In the future, this could be optimized to make one call to
1216 * ChecksumUpdate32(). Ignoring this for now, since for the most common
1217 * case, we only update the TTL.
1218 */
1219 if (ipHdr->saddr != ipAttr->ipv4_src) {
1220 if (tcpHdr) {
1221 tcpHdr->check = ChecksumUpdate32(tcpHdr->check, ipHdr->saddr,
1222 ipAttr->ipv4_src);
1223 } else if (udpHdr && udpHdr->check) {
1224 udpHdr->check = ChecksumUpdate32(udpHdr->check, ipHdr->saddr,
1225 ipAttr->ipv4_src);
1226 }
1227
1228 if (ipHdr->check != 0) {
1229 ipHdr->check = ChecksumUpdate32(ipHdr->check, ipHdr->saddr,
1230 ipAttr->ipv4_src);
1231 }
1232 ipHdr->saddr = ipAttr->ipv4_src;
1233 }
1234 if (ipHdr->daddr != ipAttr->ipv4_dst) {
1235 if (tcpHdr) {
1236 tcpHdr->check = ChecksumUpdate32(tcpHdr->check, ipHdr->daddr,
1237 ipAttr->ipv4_dst);
1238 } else if (udpHdr && udpHdr->check) {
1239 udpHdr->check = ChecksumUpdate32(udpHdr->check, ipHdr->daddr,
1240 ipAttr->ipv4_dst);
1241 }
1242
1243 if (ipHdr->check != 0) {
1244 ipHdr->check = ChecksumUpdate32(ipHdr->check, ipHdr->daddr,
1245 ipAttr->ipv4_dst);
1246 }
1247 ipHdr->daddr = ipAttr->ipv4_dst;
1248 }
1249 if (ipHdr->protocol != ipAttr->ipv4_proto) {
1250 UINT16 oldProto = (ipHdr->protocol << 16) & 0xff00;
1251 UINT16 newProto = (ipAttr->ipv4_proto << 16) & 0xff00;
1252 if (tcpHdr) {
1253 tcpHdr->check = ChecksumUpdate16(tcpHdr->check, oldProto, newProto);
1254 } else if (udpHdr && udpHdr->check) {
1255 udpHdr->check = ChecksumUpdate16(udpHdr->check, oldProto, newProto);
1256 }
1257
1258 if (ipHdr->check != 0) {
1259 ipHdr->check = ChecksumUpdate16(ipHdr->check, oldProto, newProto);
1260 }
1261 ipHdr->protocol = ipAttr->ipv4_proto;
1262 }
1263 if (ipHdr->ttl != ipAttr->ipv4_ttl) {
1264 UINT16 oldTtl = (ipHdr->ttl) & 0xff;
1265 UINT16 newTtl = (ipAttr->ipv4_ttl) & 0xff;
1266 if (ipHdr->check != 0) {
1267 ipHdr->check = ChecksumUpdate16(ipHdr->check, oldTtl, newTtl);
1268 }
1269 ipHdr->ttl = ipAttr->ipv4_ttl;
1270 }
1271
1272 return NDIS_STATUS_SUCCESS;
1273 }
1274
1275 /*
1276 * --------------------------------------------------------------------------
1277 * OvsExecuteSetAction --
1278 * Executes a set() action, but storing the actions into 'ovsFwdCtx'
1279 * --------------------------------------------------------------------------
1280 */
1281 static __inline NDIS_STATUS
1282 OvsExecuteSetAction(OvsForwardingContext *ovsFwdCtx,
1283 OvsFlowKey *key,
1284 UINT64 *hash,
1285 const PNL_ATTR a)
1286 {
1287 enum ovs_key_attr type = NlAttrType(a);
1288 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
1289
1290 switch (type) {
1291 case OVS_KEY_ATTR_ETHERNET:
1292 status = OvsUpdateEthHeader(ovsFwdCtx,
1293 NlAttrGetUnspec(a, sizeof(struct ovs_key_ethernet)));
1294 break;
1295
1296 case OVS_KEY_ATTR_IPV4:
1297 status = OvsUpdateIPv4Header(ovsFwdCtx,
1298 NlAttrGetUnspec(a, sizeof(struct ovs_key_ipv4)));
1299 break;
1300
1301 case OVS_KEY_ATTR_TUNNEL:
1302 {
1303 OvsIPv4TunnelKey tunKey;
1304
1305 status = OvsTunnelAttrToIPv4TunnelKey((PNL_ATTR)a, &tunKey);
1306 ASSERT(status == NDIS_STATUS_SUCCESS);
1307 tunKey.flow_hash = (uint16)(hash ? *hash : OvsHashFlow(key));
1308 RtlCopyMemory(&ovsFwdCtx->tunKey, &tunKey, sizeof ovsFwdCtx->tunKey);
1309
1310 break;
1311 }
1312 case OVS_KEY_ATTR_SKB_MARK:
1313 /* XXX: Not relevant to Hyper-V. Return OK */
1314 break;
1315 case OVS_KEY_ATTR_UNSPEC:
1316 case OVS_KEY_ATTR_ENCAP:
1317 case OVS_KEY_ATTR_ETHERTYPE:
1318 case OVS_KEY_ATTR_IN_PORT:
1319 case OVS_KEY_ATTR_VLAN:
1320 case OVS_KEY_ATTR_ICMP:
1321 case OVS_KEY_ATTR_ICMPV6:
1322 case OVS_KEY_ATTR_ARP:
1323 case OVS_KEY_ATTR_ND:
1324 case __OVS_KEY_ATTR_MAX:
1325 default:
1326 OVS_LOG_INFO("Unhandled attribute %#x", type);
1327 ASSERT(FALSE);
1328 }
1329 return status;
1330 }
1331
1332 /*
1333 * --------------------------------------------------------------------------
1334 * OvsActionsExecute --
1335 * Interpret and execute the specified 'actions' on the specifed packet
1336 * 'curNbl'. The expectation is that if the packet needs to be dropped
1337 * (completed) for some reason, it is added to 'completionList' so that the
1338 * caller can complete the packet. If 'completionList' is NULL, the NBL is
1339 * assumed to be generated by OVS and freed up. Otherwise, the function
1340 * consumes the NBL by generating a NDIS send indication for the packet.
1341 *
1342 * There are one or more of "clone" NBLs that may get generated while
1343 * executing the actions. Upon any failures, the "cloned" NBLs are freed up,
1344 * and the caller does not have to worry about them.
1345 *
1346 * Success or failure is returned based on whether the specified actions
1347 * were executed successfully on the packet or not.
1348 * --------------------------------------------------------------------------
1349 */
1350 NDIS_STATUS
1351 OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext,
1352 OvsCompletionList *completionList,
1353 PNET_BUFFER_LIST curNbl,
1354 UINT32 portNo,
1355 ULONG sendFlags,
1356 OvsFlowKey *key,
1357 UINT64 *hash,
1358 OVS_PACKET_HDR_INFO *layers,
1359 const PNL_ATTR actions,
1360 INT actionsLen)
1361 {
1362 PNL_ATTR a;
1363 INT rem;
1364 UINT32 dstPortID;
1365 OvsForwardingContext ovsFwdCtx;
1366 PCWSTR dropReason = L"";
1367 NDIS_STATUS status;
1368 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail =
1369 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl);
1370
1371 /* XXX: ASSERT that the flow table lock is held. */
1372 status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl, portNo,
1373 sendFlags, fwdDetail, completionList,
1374 layers, TRUE);
1375 if (status != NDIS_STATUS_SUCCESS) {
1376 dropReason = L"OVS-initing destination port list failed";
1377 goto dropit;
1378 }
1379
1380 if (actionsLen == 0) {
1381 dropReason = L"OVS-Dropped due to Flow action";
1382 ovsActionStats.zeroActionLen++;
1383 goto dropit;
1384 }
1385
1386 NL_ATTR_FOR_EACH_UNSAFE (a, rem, actions, actionsLen) {
1387 switch(NlAttrType(a)) {
1388 case OVS_ACTION_ATTR_OUTPUT:
1389 dstPortID = NlAttrGetU32(a);
1390 status = OvsAddPorts(&ovsFwdCtx, key, dstPortID,
1391 TRUE, TRUE);
1392 if (status != NDIS_STATUS_SUCCESS) {
1393 dropReason = L"OVS-adding destination port failed";
1394 goto dropit;
1395 }
1396 break;
1397
1398 case OVS_ACTION_ATTR_PUSH_VLAN:
1399 {
1400 struct ovs_action_push_vlan *vlan;
1401 PVOID vlanTagValue;
1402 PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag;
1403
1404 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1405 || ovsFwdCtx.tunnelRxNic != NULL) {
1406 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1407 if (status != NDIS_STATUS_SUCCESS) {
1408 dropReason = L"OVS-adding destination failed";
1409 goto dropit;
1410 }
1411 }
1412
1413 vlanTagValue = NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1414 Ieee8021QNetBufferListInfo);
1415 if (vlanTagValue != NULL) {
1416 /*
1417 * XXX: We don't support double VLAN tag offload. In such cases,
1418 * we need to insert the existing one into the packet buffer,
1419 * and add the new one as offload. This will take care of
1420 * guest tag-in-tag case as well as OVS rules that specify
1421 * tag-in-tag.
1422 */
1423 } else {
1424 vlanTagValue = 0;
1425 vlanTag = (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue;
1426 vlan = (struct ovs_action_push_vlan *)NlAttrGet((const PNL_ATTR)a);
1427 vlanTag->TagHeader.VlanId = ntohs(vlan->vlan_tci) & 0xfff;
1428 vlanTag->TagHeader.UserPriority = ntohs(vlan->vlan_tci) >> 13;
1429
1430 NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1431 Ieee8021QNetBufferListInfo) = vlanTagValue;
1432 }
1433 break;
1434 }
1435
1436 case OVS_ACTION_ATTR_POP_VLAN:
1437 {
1438 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1439 || ovsFwdCtx.tunnelRxNic != NULL) {
1440 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1441 if (status != NDIS_STATUS_SUCCESS) {
1442 dropReason = L"OVS-adding destination failed";
1443 goto dropit;
1444 }
1445 }
1446
1447 if (NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1448 Ieee8021QNetBufferListInfo) != 0) {
1449 NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1450 Ieee8021QNetBufferListInfo) = 0;
1451 } else {
1452 /*
1453 * The VLAN tag is inserted into the packet buffer. Pop the tag
1454 * by packet buffer modification.
1455 */
1456 status = OvsPopVlanInPktBuf(&ovsFwdCtx);
1457 if (status != NDIS_STATUS_SUCCESS) {
1458 dropReason = L"OVS-pop vlan action failed";
1459 goto dropit;
1460 }
1461 }
1462 break;
1463 }
1464
1465 case OVS_ACTION_ATTR_USERSPACE:
1466 {
1467 PNL_ATTR userdataAttr;
1468 PNL_ATTR queueAttr;
1469 POVS_PACKET_QUEUE_ELEM elem;
1470 UINT32 queueId = OVS_DEFAULT_PACKET_QUEUE;
1471 //XXX confusing that portNo is actually portId for external port.
1472 BOOLEAN isRecv = (portNo == switchContext->externalPortId)
1473 || OvsIsTunnelVportNo(portNo);
1474
1475 queueAttr = NlAttrFindNested(a, OVS_USERSPACE_ATTR_PID);
1476 userdataAttr = NlAttrFindNested(a, OVS_USERSPACE_ATTR_USERDATA);
1477
1478 elem = OvsCreateQueuePacket(queueId, (PVOID)userdataAttr,
1479 userdataAttr->nlaLen,
1480 OVS_PACKET_CMD_ACTION,
1481 portNo, (OvsIPv4TunnelKey *)&key->tunKey,
1482 ovsFwdCtx.curNbl,
1483 NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx.curNbl),
1484 isRecv,
1485 layers);
1486 if (elem) {
1487 LIST_ENTRY missedPackets;
1488 InitializeListHead(&missedPackets);
1489 InsertTailList(&missedPackets, &elem->link);
1490 OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE, &missedPackets, 1);
1491 dropReason = L"OVS-Completed since packet was copied to "
1492 L"userspace";
1493 } else {
1494 dropReason = L"OVS-Dropped due to failure to queue to "
1495 L"userspace";
1496 goto dropit;
1497 }
1498 break;
1499 }
1500 case OVS_ACTION_ATTR_SET:
1501 {
1502 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1503 || ovsFwdCtx.tunnelRxNic != NULL) {
1504 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1505 if (status != NDIS_STATUS_SUCCESS) {
1506 dropReason = L"OVS-adding destination failed";
1507 goto dropit;
1508 }
1509 }
1510
1511 status = OvsExecuteSetAction(&ovsFwdCtx, key, hash,
1512 (const PNL_ATTR)NlAttrGet
1513 ((const PNL_ATTR)a));
1514 if (status != NDIS_STATUS_SUCCESS) {
1515 dropReason = L"OVS-set action failed";
1516 goto dropit;
1517 }
1518 break;
1519 }
1520 case OVS_ACTION_ATTR_SAMPLE:
1521 break;
1522 case OVS_ACTION_ATTR_UNSPEC:
1523 case __OVS_ACTION_ATTR_MAX:
1524 default:
1525 break;
1526 }
1527 }
1528
1529 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1530 || ovsFwdCtx.tunnelRxNic != NULL) {
1531 status = OvsOutputForwardingCtx(&ovsFwdCtx);
1532 ASSERT(ovsFwdCtx.curNbl == NULL);
1533 }
1534
1535 ASSERT(ovsFwdCtx.destPortsSizeOut == 0);
1536 ASSERT(ovsFwdCtx.tunnelRxNic == NULL);
1537 ASSERT(ovsFwdCtx.tunnelTxNic == NULL);
1538
1539 dropit:
1540 /*
1541 * If curNbl != NULL, it implies the NBL has not been not freed up so far.
1542 */
1543 if (ovsFwdCtx.curNbl) {
1544 OvsCompleteNBLForwardingCtx(&ovsFwdCtx, dropReason);
1545 }
1546
1547 return status;
1548 }