]> git.proxmox.com Git - mirror_ovs.git/blob - datapath-windows/ovsext/Conntrack.c
datapath-windows: Correct endianness for deleting zone.
[mirror_ovs.git] / datapath-windows / ovsext / Conntrack.c
1 /*
2 * Copyright (c) 2015, 2016 VMware, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "Conntrack.h"
18 #include "IpFragment.h"
19 #include "Jhash.h"
20 #include "PacketParser.h"
21 #include "Event.h"
22 #include "Conntrack-nat.h"
23
24 #pragma warning(push)
25 #pragma warning(disable:4311)
26
27 #define WINDOWS_TICK 10000000
28 #define SEC_TO_UNIX_EPOCH 11644473600LL
29 #define SEC_TO_NANOSEC 1000000000LL
30
31 KSTART_ROUTINE OvsConntrackEntryCleaner;
32 static PLIST_ENTRY ovsConntrackTable;
33 static OVS_CT_THREAD_CTX ctThreadCtx;
34 static PNDIS_RW_LOCK_EX ovsConntrackLockObj;
35 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
36 static UINT64 ctTotalEntries;
37
38 static __inline NDIS_STATUS OvsCtFlush(UINT16 zone);
39
40 /*
41 *----------------------------------------------------------------------------
42 * OvsInitConntrack
43 * Initialize the components used by Connection Tracking
44 *----------------------------------------------------------------------------
45 */
46 NTSTATUS
47 OvsInitConntrack(POVS_SWITCH_CONTEXT context)
48 {
49 NTSTATUS status;
50 HANDLE threadHandle = NULL;
51 ctTotalEntries = 0;
52
53 /* Init the sync-lock */
54 ovsConntrackLockObj = NdisAllocateRWLock(context->NdisFilterHandle);
55 if (ovsConntrackLockObj == NULL) {
56 return STATUS_INSUFFICIENT_RESOURCES;
57 }
58
59 /* Init the Hash Buffer */
60 ovsConntrackTable = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY)
61 * CT_HASH_TABLE_SIZE,
62 OVS_CT_POOL_TAG);
63 if (ovsConntrackTable == NULL) {
64 NdisFreeRWLock(ovsConntrackLockObj);
65 ovsConntrackLockObj = NULL;
66 return STATUS_INSUFFICIENT_RESOURCES;
67 }
68
69 for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
70 InitializeListHead(&ovsConntrackTable[i]);
71 }
72
73 /* Init CT Cleaner Thread */
74 KeInitializeEvent(&ctThreadCtx.event, NotificationEvent, FALSE);
75 status = PsCreateSystemThread(&threadHandle, SYNCHRONIZE, NULL, NULL,
76 NULL, OvsConntrackEntryCleaner,
77 &ctThreadCtx);
78
79 if (status != STATUS_SUCCESS) {
80 NdisFreeRWLock(ovsConntrackLockObj);
81 ovsConntrackLockObj = NULL;
82
83 OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG);
84 ovsConntrackTable = NULL;
85
86 return status;
87 }
88
89 ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL, KernelMode,
90 &ctThreadCtx.threadObject, NULL);
91 ZwClose(threadHandle);
92 threadHandle = NULL;
93
94 status = OvsNatInit();
95
96 if (status != STATUS_SUCCESS) {
97 OvsCleanupConntrack();
98 return status;
99 }
100 return STATUS_SUCCESS;
101 }
102
103 /*
104 *----------------------------------------------------------------------------
105 * OvsCleanupConntrack
106 * Cleanup memory and thread that were spawned for Connection tracking
107 *----------------------------------------------------------------------------
108 */
109 VOID
110 OvsCleanupConntrack(VOID)
111 {
112 LOCK_STATE_EX lockState;
113 NdisAcquireRWLockWrite(ovsConntrackLockObj, &lockState, 0);
114 ctThreadCtx.exit = 1;
115 KeSetEvent(&ctThreadCtx.event, 0, FALSE);
116 NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
117
118 KeWaitForSingleObject(ctThreadCtx.threadObject, Executive,
119 KernelMode, FALSE, NULL);
120 ObDereferenceObject(ctThreadCtx.threadObject);
121
122 /* Force flush all entries before removing */
123 OvsCtFlush(0);
124
125 if (ovsConntrackTable) {
126 OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG);
127 ovsConntrackTable = NULL;
128 }
129
130 NdisFreeRWLock(ovsConntrackLockObj);
131 ovsConntrackLockObj = NULL;
132 OvsNatCleanup();
133 }
134
135 static __inline VOID
136 OvsCtKeyReverse(OVS_CT_KEY *key)
137 {
138 struct ct_endpoint tmp;
139 tmp = key->src;
140 key->src = key->dst;
141 key->dst = tmp;
142 }
143
144 static __inline VOID
145 OvsCtUpdateFlowKey(struct OvsFlowKey *key,
146 UINT32 state,
147 UINT16 zone,
148 UINT32 mark,
149 struct ovs_key_ct_labels *labels)
150 {
151 key->ct.state = state | OVS_CS_F_TRACKED;
152 key->ct.zone = zone;
153 key->ct.mark = mark;
154 if (labels) {
155 NdisMoveMemory(&key->ct.labels, labels,
156 sizeof(struct ovs_key_ct_labels));
157 } else {
158 memset(&key->ct.labels, 0,
159 sizeof(struct ovs_key_ct_labels));
160 }
161 }
162
163 static __inline VOID
164 OvsPostCtEventEntry(POVS_CT_ENTRY entry, UINT8 type)
165 {
166 OVS_CT_EVENT_ENTRY ctEventEntry = {0};
167 NdisMoveMemory(&ctEventEntry.entry, entry, sizeof(OVS_CT_ENTRY));
168 ctEventEntry.type = type;
169 OvsPostCtEvent(&ctEventEntry);
170 }
171
172 static __inline VOID
173 OvsCtIncrementCounters(POVS_CT_ENTRY entry, BOOLEAN reply, PNET_BUFFER_LIST nbl)
174 {
175 if (reply) {
176 entry->rev_key.byteCount+= OvsPacketLenNBL(nbl);
177 entry->rev_key.packetCount++;
178 } else {
179 entry->key.byteCount += OvsPacketLenNBL(nbl);
180 entry->key.packetCount++;
181 }
182 }
183
184 static __inline BOOLEAN
185 OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx,
186 PNAT_ACTION_INFO natInfo, UINT64 now)
187 {
188 NdisMoveMemory(&entry->key, &ctx->key, sizeof(OVS_CT_KEY));
189 NdisMoveMemory(&entry->rev_key, &ctx->key, sizeof(OVS_CT_KEY));
190 OvsCtKeyReverse(&entry->rev_key);
191
192 /* NatInfo is always initialized to be disabled, so that if NAT action
193 * fails, we will not end up deleting an non-existent NAT entry.
194 */
195 if (natInfo == NULL) {
196 entry->natInfo.natAction = NAT_ACTION_NONE;
197 } else {
198 if (OvsIsForwardNat(natInfo->natAction)) {
199 entry->natInfo = *natInfo;
200 if (!OvsNatTranslateCtEntry(entry)) {
201 return FALSE;
202 }
203 ctx->hash = OvsHashCtKey(&entry->key);
204 } else {
205 entry->natInfo.natAction = natInfo->natAction;
206 }
207 }
208
209 entry->timestampStart = now;
210 InsertHeadList(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK],
211 &entry->link);
212
213 ctTotalEntries++;
214 return TRUE;
215 }
216
217 static __inline POVS_CT_ENTRY
218 OvsCtEntryCreate(OvsForwardingContext *fwdCtx,
219 UINT8 ipProto,
220 UINT32 l4Offset,
221 OvsConntrackKeyLookupCtx *ctx,
222 OvsFlowKey *key,
223 PNAT_ACTION_INFO natInfo,
224 BOOLEAN commit,
225 UINT64 currentTime,
226 BOOLEAN *entryCreated)
227 {
228 POVS_CT_ENTRY entry = NULL;
229 UINT32 state = 0;
230 POVS_CT_ENTRY parentEntry;
231 PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
232
233 *entryCreated = FALSE;
234 state |= OVS_CS_F_NEW;
235
236 parentEntry = OvsCtRelatedLookup(ctx->key, currentTime);
237 if (parentEntry != NULL) {
238 state |= OVS_CS_F_RELATED;
239 }
240
241 switch (ipProto) {
242 case IPPROTO_TCP:
243 {
244 TCPHdr tcpStorage;
245 const TCPHdr *tcp;
246 tcp = OvsGetTcp(curNbl, l4Offset, &tcpStorage);
247 if (!OvsConntrackValidateTcpPacket(tcp)) {
248 state = OVS_CS_F_INVALID;
249 break;
250 }
251
252 if (commit) {
253 entry = OvsConntrackCreateTcpEntry(tcp, curNbl, currentTime);
254 }
255 break;
256 }
257 case IPPROTO_ICMP:
258 {
259 ICMPHdr storage;
260 const ICMPHdr *icmp;
261 icmp = OvsGetIcmp(curNbl, l4Offset, &storage);
262 if (!OvsConntrackValidateIcmpPacket(icmp)) {
263 state = OVS_CS_F_INVALID;
264 break;
265 }
266
267 if (commit) {
268 entry = OvsConntrackCreateIcmpEntry(currentTime);
269 }
270 break;
271 }
272 case IPPROTO_UDP:
273 {
274 if (commit) {
275 entry = OvsConntrackCreateOtherEntry(currentTime);
276 }
277 break;
278 }
279 default:
280 state = OVS_CS_F_INVALID;
281 break;
282 }
283
284 if (state != OVS_CS_F_INVALID && commit) {
285 if (entry) {
286 entry->parent = parentEntry;
287 if (OvsCtAddEntry(entry, ctx, natInfo, currentTime)) {
288 *entryCreated = TRUE;
289 } else {
290 /* Unable to add entry to the list */
291 OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
292 state = OVS_CS_F_INVALID;
293 entry = NULL;
294 }
295 } else {
296 /* OvsAllocateMemoryWithTag returned NULL; treat as invalid */
297 state = OVS_CS_F_INVALID;
298 }
299 }
300
301 OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
302 if (entry) {
303 OvsCtIncrementCounters(entry, ctx->reply, curNbl);
304 }
305 return entry;
306 }
307
308 static enum CT_UPDATE_RES
309 OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
310 PNET_BUFFER_LIST nbl,
311 UINT8 ipProto,
312 UINT32 l4Offset,
313 BOOLEAN reply,
314 UINT64 now)
315 {
316 switch (ipProto) {
317 case IPPROTO_TCP:
318 {
319 TCPHdr tcpStorage;
320 const TCPHdr *tcp;
321 tcp = OvsGetTcp(nbl, l4Offset, &tcpStorage);
322 if (!tcp) {
323 return CT_UPDATE_INVALID;
324 }
325 return OvsConntrackUpdateTcpEntry(entry, tcp, nbl, reply, now);
326 }
327 case IPPROTO_ICMP:
328 return OvsConntrackUpdateIcmpEntry(entry, reply, now);
329 case IPPROTO_UDP:
330 return OvsConntrackUpdateOtherEntry(entry, reply, now);
331 default:
332 return CT_UPDATE_INVALID;
333 }
334 }
335
336 static __inline VOID
337 OvsCtEntryDelete(POVS_CT_ENTRY entry)
338 {
339 if (entry == NULL) {
340 return;
341 }
342 if (entry->natInfo.natAction) {
343 OvsNatDeleteKey(&entry->key);
344 }
345 OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE);
346 RemoveEntryList(&entry->link);
347 OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
348 ctTotalEntries--;
349 }
350
351 static __inline BOOLEAN
352 OvsCtEntryExpired(POVS_CT_ENTRY entry)
353 {
354 UINT64 currentTime;
355 NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
356 return entry->expiration < currentTime;
357 }
358
359 static __inline NDIS_STATUS
360 OvsDetectCtPacket(OvsForwardingContext *fwdCtx,
361 OvsFlowKey *key,
362 PNET_BUFFER_LIST *newNbl)
363 {
364 /* Currently we support only Unfragmented TCP packets */
365 switch (ntohs(key->l2.dlType)) {
366 case ETH_TYPE_IPV4:
367 if (key->ipKey.nwFrag != OVS_FRAG_TYPE_NONE) {
368 return OvsProcessIpv4Fragment(fwdCtx->switchContext,
369 &fwdCtx->curNbl,
370 fwdCtx->completionList,
371 fwdCtx->fwdDetail->SourcePortId,
372 key->tunKey.tunnelId,
373 newNbl);
374 }
375 if (key->ipKey.nwProto == IPPROTO_TCP
376 || key->ipKey.nwProto == IPPROTO_UDP
377 || key->ipKey.nwProto == IPPROTO_ICMP) {
378 return NDIS_STATUS_SUCCESS;
379 }
380 return NDIS_STATUS_NOT_SUPPORTED;
381 case ETH_TYPE_IPV6:
382 return NDIS_STATUS_NOT_SUPPORTED;
383 }
384
385 return NDIS_STATUS_NOT_SUPPORTED;
386 }
387
388 BOOLEAN
389 OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey)
390 {
391 return ((NdisEqualMemory(&ctxKey.src, &entryKey.src,
392 sizeof(struct ct_endpoint))) &&
393 (NdisEqualMemory(&ctxKey.dst, &entryKey.dst,
394 sizeof(struct ct_endpoint))) &&
395 (ctxKey.dl_type == entryKey.dl_type) &&
396 (ctxKey.nw_proto == entryKey.nw_proto) &&
397 (ctxKey.zone == entryKey.zone));
398 }
399
400 POVS_CT_ENTRY
401 OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
402 {
403 PLIST_ENTRY link;
404 POVS_CT_ENTRY entry;
405 BOOLEAN reply = FALSE;
406 POVS_CT_ENTRY found = NULL;
407
408 /* Reverse NAT must be performed before OvsCtLookup, so here
409 * we simply need to flip the src and dst in key and compare
410 * they are equal. Note that flipped key is not equal to
411 * rev_key due to NAT effect.
412 */
413 OVS_CT_KEY revCtxKey = ctx->key;
414 OvsCtKeyReverse(&revCtxKey);
415
416 if (!ctTotalEntries) {
417 return found;
418 }
419
420 LIST_FORALL(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK], link) {
421 entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
422
423 if (OvsCtKeyAreSame(ctx->key, entry->key)) {
424 found = entry;
425 reply = FALSE;
426 break;
427 }
428
429 if (OvsCtKeyAreSame(revCtxKey, entry->key)) {
430 found = entry;
431 reply = TRUE;
432 break;
433 }
434 }
435
436 if (found) {
437 if (OvsCtEntryExpired(found)) {
438 found = NULL;
439 } else {
440 ctx->reply = reply;
441 }
442 }
443
444 ctx->entry = found;
445 return found;
446 }
447
448 UINT32
449 OvsHashCtKey(const OVS_CT_KEY *key)
450 {
451 UINT32 hsrc, hdst, hash;
452 hsrc = OvsJhashBytes((UINT32*) &key->src, sizeof(key->src), 0);
453 hdst = OvsJhashBytes((UINT32*) &key->dst, sizeof(key->dst), 0);
454 hash = hsrc ^ hdst; /* TO identify reverse traffic */
455 hash = OvsJhashBytes((uint32_t *) &key->dst + 1,
456 ((uint32_t *) (key + 1) -
457 (uint32_t *) (&key->dst + 1)),
458 hash);
459 return hash;
460 }
461
462 static UINT8
463 OvsReverseIcmpType(UINT8 type)
464 {
465 switch (type) {
466 case ICMP4_ECHO_REQUEST:
467 return ICMP4_ECHO_REPLY;
468 case ICMP4_ECHO_REPLY:
469 return ICMP4_ECHO_REQUEST;
470 case ICMP4_TIMESTAMP_REQUEST:
471 return ICMP4_TIMESTAMP_REPLY;
472 case ICMP4_TIMESTAMP_REPLY:
473 return ICMP4_TIMESTAMP_REQUEST;
474 case ICMP4_INFO_REQUEST:
475 return ICMP4_INFO_REPLY;
476 case ICMP4_INFO_REPLY:
477 return ICMP4_INFO_REQUEST;
478 default:
479 return 0;
480 }
481 }
482
483 static __inline NDIS_STATUS
484 OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
485 UINT16 zone,
486 OvsConntrackKeyLookupCtx *ctx,
487 PNET_BUFFER_LIST curNbl,
488 UINT32 l4Offset)
489 {
490 const OVS_NAT_ENTRY *natEntry;
491 ctx->key.zone = zone;
492 ctx->key.dl_type = flowKey->l2.dlType;
493 ctx->related = FALSE;
494
495 /* Extract L3 and L4*/
496 if (flowKey->l2.dlType == htons(ETH_TYPE_IPV4)) {
497 ctx->key.src.addr.ipv4 = flowKey->ipKey.nwSrc;
498 ctx->key.dst.addr.ipv4 = flowKey->ipKey.nwDst;
499 ctx->key.nw_proto = flowKey->ipKey.nwProto;
500
501 ctx->key.src.port = flowKey->ipKey.l4.tpSrc;
502 ctx->key.dst.port = flowKey->ipKey.l4.tpDst;
503 if (flowKey->ipKey.nwProto == IPPROTO_ICMP) {
504 ICMPHdr icmpStorage;
505 const ICMPHdr *icmp;
506 icmp = OvsGetIcmp(curNbl, l4Offset, &icmpStorage);
507 ASSERT(icmp);
508
509 /* Related bit is set when ICMP has an error */
510 /* XXX parse out the appropriate src and dst from inner pkt */
511 switch (icmp->type) {
512 case ICMP4_ECHO_REQUEST:
513 case ICMP4_ECHO_REPLY:
514 case ICMP4_TIMESTAMP_REQUEST:
515 case ICMP4_TIMESTAMP_REPLY:
516 case ICMP4_INFO_REQUEST:
517 case ICMP4_INFO_REPLY:
518 if (icmp->code != 0) {
519 return NDIS_STATUS_INVALID_PACKET;
520 }
521 /* Separate ICMP connection: identified using id */
522 ctx->key.dst.icmp_id = icmp->fields.echo.id;
523 ctx->key.src.icmp_id = icmp->fields.echo.id;
524 ctx->key.src.icmp_type = icmp->type;
525 ctx->key.dst.icmp_type = OvsReverseIcmpType(icmp->type);
526 break;
527 case ICMP4_DEST_UNREACH:
528 case ICMP4_TIME_EXCEEDED:
529 case ICMP4_PARAM_PROB:
530 case ICMP4_SOURCE_QUENCH:
531 case ICMP4_REDIRECT: {
532 /* XXX Handle inner packet */
533 ctx->related = TRUE;
534 break;
535 }
536 default:
537 ctx->related = FALSE;
538 }
539 }
540 } else if (flowKey->l2.dlType == htons(ETH_TYPE_IPV6)) {
541 ctx->key.src.addr.ipv6 = flowKey->ipv6Key.ipv6Src;
542 ctx->key.dst.addr.ipv6 = flowKey->ipv6Key.ipv6Dst;
543 ctx->key.nw_proto = flowKey->ipv6Key.nwProto;
544
545 ctx->key.src.port = flowKey->ipv6Key.l4.tpSrc;
546 ctx->key.dst.port = flowKey->ipv6Key.l4.tpDst;
547 /* XXX Handle ICMPv6 errors*/
548 } else {
549 return NDIS_STATUS_INVALID_PACKET;
550 }
551
552 natEntry = OvsNatLookup(&ctx->key, TRUE);
553 if (natEntry) {
554 /* Translate address first for reverse NAT */
555 ctx->key = natEntry->ctEntry->key;
556 OvsCtKeyReverse(&ctx->key);
557 }
558
559 ctx->hash = OvsHashCtKey(&ctx->key);
560 return NDIS_STATUS_SUCCESS;
561 }
562
563 static __inline BOOLEAN
564 OvsDetectFtpPacket(OvsFlowKey *key) {
565 return (key->ipKey.nwProto == IPPROTO_TCP &&
566 (ntohs(key->ipKey.l4.tpDst) == IPPORT_FTP ||
567 ntohs(key->ipKey.l4.tpSrc) == IPPORT_FTP));
568 }
569
570 /*
571 *----------------------------------------------------------------------------
572 * OvsProcessConntrackEntry
573 * Check the TCP flags and set the ct_state of the entry
574 *----------------------------------------------------------------------------
575 */
576 static __inline POVS_CT_ENTRY
577 OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
578 UINT32 l4Offset,
579 OvsConntrackKeyLookupCtx *ctx,
580 OvsFlowKey *key,
581 UINT16 zone,
582 NAT_ACTION_INFO *natInfo,
583 BOOLEAN commit,
584 UINT64 currentTime,
585 BOOLEAN *entryCreated)
586 {
587 POVS_CT_ENTRY entry = ctx->entry;
588 UINT32 state = 0;
589 PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
590 *entryCreated = FALSE;
591
592 /* If an entry was found, update the state based on TCP flags */
593 if (ctx->related) {
594 state |= OVS_CS_F_RELATED;
595 if (ctx->reply) {
596 state |= OVS_CS_F_REPLY_DIR;
597 }
598 } else {
599 CT_UPDATE_RES result;
600 result = OvsCtUpdateEntry(entry, curNbl, key->ipKey.nwProto,
601 l4Offset, ctx->reply, currentTime);
602 switch (result) {
603 case CT_UPDATE_VALID:
604 state |= OVS_CS_F_ESTABLISHED;
605 if (ctx->reply) {
606 state |= OVS_CS_F_REPLY_DIR;
607 }
608 break;
609 case CT_UPDATE_INVALID:
610 state |= OVS_CS_F_INVALID;
611 break;
612 case CT_UPDATE_NEW:
613 //Delete and update the Conntrack
614 OvsCtEntryDelete(ctx->entry);
615 ctx->entry = NULL;
616 entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto, l4Offset,
617 ctx, key, natInfo, commit, currentTime,
618 entryCreated);
619 if (!entry) {
620 return NULL;
621 }
622 break;
623 }
624 }
625
626 if (key->ipKey.nwProto == IPPROTO_TCP && entry) {
627 /* Update the related bit if there is a parent */
628 if (entry->parent) {
629 state |= OVS_CS_F_RELATED;
630 } else {
631 POVS_CT_ENTRY parentEntry;
632 parentEntry = OvsCtRelatedLookup(ctx->key, currentTime);
633 entry->parent = parentEntry;
634 if (parentEntry != NULL) {
635 state |= OVS_CS_F_RELATED;
636 }
637 }
638 }
639
640 /* Copy mark and label from entry into flowKey. If actions specify
641 different mark and label, update the flowKey. */
642 if (entry != NULL) {
643 OvsCtUpdateFlowKey(key, state, zone, entry->mark, &entry->labels);
644 } else {
645 OvsCtUpdateFlowKey(key, state, zone, 0, NULL);
646 }
647 return entry;
648 }
649
650 static __inline VOID
651 OvsConntrackSetMark(OvsFlowKey *key,
652 POVS_CT_ENTRY entry,
653 UINT32 value,
654 UINT32 mask,
655 BOOLEAN *markChanged)
656 {
657 UINT32 newMark;
658 newMark = value | (entry->mark & ~(mask));
659 if (entry->mark != newMark) {
660 entry->mark = newMark;
661 key->ct.mark = newMark;
662 *markChanged = TRUE;
663 }
664 }
665
666 static __inline void
667 OvsConntrackSetLabels(OvsFlowKey *key,
668 POVS_CT_ENTRY entry,
669 struct ovs_key_ct_labels *val,
670 struct ovs_key_ct_labels *mask,
671 BOOLEAN *labelChanged)
672 {
673 ovs_u128 v, m, pktMdLabel = {0};
674 memcpy(&v, val, sizeof v);
675 memcpy(&m, mask, sizeof m);
676
677 pktMdLabel.u64.lo = v.u64.lo | (pktMdLabel.u64.lo & ~(m.u64.lo));
678 pktMdLabel.u64.hi = v.u64.hi | (pktMdLabel.u64.hi & ~(m.u64.hi));
679
680 if (!NdisEqualMemory(&entry->labels, &pktMdLabel,
681 sizeof(struct ovs_key_ct_labels))) {
682 *labelChanged = TRUE;
683 }
684 NdisMoveMemory(&entry->labels, &pktMdLabel,
685 sizeof(struct ovs_key_ct_labels));
686 NdisMoveMemory(&key->ct.labels, &pktMdLabel,
687 sizeof(struct ovs_key_ct_labels));
688 }
689
690 static __inline NDIS_STATUS
691 OvsCtExecute_(OvsForwardingContext *fwdCtx,
692 OvsFlowKey *key,
693 OVS_PACKET_HDR_INFO *layers,
694 BOOLEAN commit,
695 BOOLEAN force,
696 UINT16 zone,
697 MD_MARK *mark,
698 MD_LABELS *labels,
699 PCHAR helper,
700 PNAT_ACTION_INFO natInfo,
701 BOOLEAN postUpdateEvent)
702 {
703 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
704 BOOLEAN triggerUpdateEvent = FALSE;
705 POVS_CT_ENTRY entry = NULL;
706 PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
707 OvsConntrackKeyLookupCtx ctx = { 0 };
708 LOCK_STATE_EX lockState;
709 UINT64 currentTime;
710 NdisGetCurrentSystemTime((LARGE_INTEGER *) &currentTime);
711
712
713 /* Retrieve the Conntrack Key related fields from packet */
714 OvsCtSetupLookupCtx(key, zone, &ctx, curNbl, layers->l4Offset);
715
716 NdisAcquireRWLockWrite(ovsConntrackLockObj, &lockState, 0);
717
718 /* Lookup Conntrack entries for a matching entry */
719 entry = OvsCtLookup(&ctx);
720 BOOLEAN entryCreated = FALSE;
721
722 /* Delete entry in reverse direction if 'force' is specified */
723 if (entry && force && ctx.reply) {
724 OvsCtEntryDelete(entry);
725 entry = NULL;
726 }
727
728 if (!entry && commit && ctTotalEntries >= CT_MAX_ENTRIES) {
729 /* Don't proceed with processing if the max limit has been hit.
730 * This blocks only new entries from being created and doesn't
731 * affect existing connections.
732 */
733 NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
734 OVS_LOG_ERROR("Conntrack Limit hit: %lu", ctTotalEntries);
735 return NDIS_STATUS_RESOURCES;
736 }
737
738 /* Increment the counters soon after the lookup, since we set ct.state
739 * to OVS_CS_F_TRACKED after processing the ct entry.
740 */
741 if (entry && (!(key->ct.state & OVS_CS_F_TRACKED))) {
742 OvsCtIncrementCounters(entry, ctx.reply, curNbl);
743 }
744
745 if (!entry) {
746 /* If no matching entry was found, create one and add New state */
747 entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto,
748 layers->l4Offset, &ctx,
749 key, natInfo, commit, currentTime,
750 &entryCreated);
751 } else {
752 /* Process the entry and update CT flags */
753 entry = OvsProcessConntrackEntry(fwdCtx, layers->l4Offset, &ctx, key,
754 zone, natInfo, commit, currentTime,
755 &entryCreated);
756 }
757
758 /*
759 * Note that natInfo is not the same as entry->natInfo here. natInfo
760 * is decided by action in the openflow rule, entry->natInfo is decided
761 * when the entry is created. In the reverse NAT case, natInfo is
762 * NAT_ACTION_REVERSE, yet entry->natInfo is NAT_ACTION_SRC or
763 * NAT_ACTION_DST without NAT_ACTION_REVERSE
764 */
765 if (entry && natInfo->natAction != NAT_ACTION_NONE)
766 {
767 OvsNatPacket(fwdCtx, entry, entry->natInfo.natAction,
768 key, ctx.reply);
769 }
770
771 if (entry && mark) {
772 OvsConntrackSetMark(key, entry, mark->value, mark->mask,
773 &triggerUpdateEvent);
774 }
775
776 if (entry && labels) {
777 OvsConntrackSetLabels(key, entry, &labels->value, &labels->mask,
778 &triggerUpdateEvent);
779 }
780
781 if (entry && OvsDetectFtpPacket(key)) {
782 /* FTP parser will always be loaded */
783 UNREFERENCED_PARAMETER(helper);
784
785 status = OvsCtHandleFtp(curNbl, key, layers, currentTime, entry,
786 (ntohs(key->ipKey.l4.tpDst) == IPPORT_FTP));
787 if (status != NDIS_STATUS_SUCCESS) {
788 OVS_LOG_ERROR("Error while parsing the FTP packet");
789 }
790 }
791
792 /* Add original tuple information to flow Key */
793 if (entry && entry->key.dl_type == ntohs(ETH_TYPE_IPV4)) {
794 OVS_CT_KEY *ctKey;
795 if (entry->parent != NULL) {
796 POVS_CT_ENTRY parent = entry->parent;
797 ctKey = &parent->key;
798 } else {
799 ctKey = &entry->key;
800 }
801
802 key->ct.tuple_ipv4.ipv4_src = ctKey->src.addr.ipv4_aligned;
803 key->ct.tuple_ipv4.ipv4_dst = ctKey->dst.addr.ipv4_aligned;
804 key->ct.tuple_ipv4.ipv4_proto = ctKey->nw_proto;
805
806 /* Orig tuple Port is overloaded to take in ICMP-Type & Code */
807 /* This mimics the behavior in lib/conntrack.c*/
808 key->ct.tuple_ipv4.src_port = ctKey->nw_proto != IPPROTO_ICMP ?
809 ctKey->src.port :
810 htons(ctKey->src.icmp_type);
811 key->ct.tuple_ipv4.dst_port = ctKey->nw_proto != IPPROTO_ICMP ?
812 ctKey->dst.port :
813 htons(ctKey->src.icmp_code);
814 }
815
816 if (entryCreated && entry) {
817 OvsPostCtEventEntry(entry, OVS_EVENT_CT_NEW);
818 }
819 if (postUpdateEvent && entry && !entryCreated && triggerUpdateEvent) {
820 OvsPostCtEventEntry(entry, OVS_EVENT_CT_UPDATE);
821 }
822
823 NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
824
825 return status;
826 }
827
828 /*
829 *---------------------------------------------------------------------------
830 * OvsExecuteConntrackAction
831 * Executes Conntrack actions XXX - Add more
832 * For the Ipv4 fragments, consume the orginal fragment NBL
833 *---------------------------------------------------------------------------
834 */
835 NDIS_STATUS
836 OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx,
837 OvsFlowKey *key,
838 const PNL_ATTR a)
839 {
840 PNL_ATTR ctAttr;
841 BOOLEAN commit = FALSE;
842 BOOLEAN force = FALSE;
843 BOOLEAN postUpdateEvent = FALSE;
844 UINT16 zone = 0;
845 UINT32 eventmask = 0;
846 MD_MARK *mark = NULL;
847 MD_LABELS *labels = NULL;
848 PCHAR helper = NULL;
849 NAT_ACTION_INFO natActionInfo;
850 OVS_PACKET_HDR_INFO *layers = &fwdCtx->layers;
851 PNET_BUFFER_LIST newNbl = NULL;
852 NDIS_STATUS status;
853
854 memset(&natActionInfo, 0, sizeof natActionInfo);
855 status = OvsDetectCtPacket(fwdCtx, key, &newNbl);
856 if (status != NDIS_STATUS_SUCCESS) {
857 return status;
858 }
859
860 /* XXX Convert this to NL_ATTR_FOR_EACH */
861 ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_ZONE);
862 if (ctAttr) {
863 zone = NlAttrGetU16(ctAttr);
864 }
865 ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_COMMIT);
866 if (ctAttr) {
867 commit = TRUE;
868 }
869 ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_MARK);
870 if (ctAttr) {
871 mark = NlAttrGet(ctAttr);
872 }
873 ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_LABELS);
874 if (ctAttr) {
875 labels = NlAttrGet(ctAttr);
876 }
877 natActionInfo.natAction = NAT_ACTION_NONE;
878 ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_NAT);
879 if (ctAttr) {
880 /* Pares Nested NAT attributes. */
881 PNL_ATTR natAttr;
882 unsigned int left;
883 BOOLEAN hasMinIp = FALSE;
884 BOOLEAN hasMinPort = FALSE;
885 BOOLEAN hasMaxIp = FALSE;
886 BOOLEAN hasMaxPort = FALSE;
887 NL_NESTED_FOR_EACH_UNSAFE (natAttr, left, ctAttr) {
888 enum ovs_nat_attr subtype = NlAttrType(natAttr);
889 switch(subtype) {
890 case OVS_NAT_ATTR_SRC:
891 case OVS_NAT_ATTR_DST:
892 natActionInfo.natAction |=
893 ((subtype == OVS_NAT_ATTR_SRC)
894 ? NAT_ACTION_SRC : NAT_ACTION_DST);
895 break;
896 case OVS_NAT_ATTR_IP_MIN:
897 memcpy(&natActionInfo.minAddr,
898 NlAttrData(natAttr), NlAttrGetSize(natAttr));
899 hasMinIp = TRUE;
900 break;
901 case OVS_NAT_ATTR_IP_MAX:
902 memcpy(&natActionInfo.maxAddr,
903 NlAttrData(natAttr), NlAttrGetSize(natAttr));
904 hasMaxIp = TRUE;
905 break;
906 case OVS_NAT_ATTR_PROTO_MIN:
907 natActionInfo.minPort = NlAttrGetU16(natAttr);
908 hasMinPort = TRUE;
909 break;
910 case OVS_NAT_ATTR_PROTO_MAX:
911 natActionInfo.maxPort = NlAttrGetU16(natAttr);
912 hasMaxPort = TRUE;
913 break;
914 case OVS_NAT_ATTR_PERSISTENT:
915 case OVS_NAT_ATTR_PROTO_HASH:
916 case OVS_NAT_ATTR_PROTO_RANDOM:
917 break;
918 }
919 }
920 if (natActionInfo.natAction == NAT_ACTION_NONE) {
921 natActionInfo.natAction = NAT_ACTION_REVERSE;
922 }
923 if (hasMinIp && !hasMaxIp) {
924 memcpy(&natActionInfo.maxAddr,
925 &natActionInfo.minAddr,
926 sizeof(natActionInfo.maxAddr));
927 }
928 if (hasMinPort && !hasMaxPort) {
929 natActionInfo.maxPort = natActionInfo.minPort;
930 }
931 if (hasMinPort || hasMaxPort) {
932 if (natActionInfo.natAction & NAT_ACTION_SRC) {
933 natActionInfo.natAction |= NAT_ACTION_SRC_PORT;
934 } else if (natActionInfo.natAction & NAT_ACTION_DST) {
935 natActionInfo.natAction |= NAT_ACTION_DST_PORT;
936 }
937 }
938 }
939 ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_HELPER);
940 if (ctAttr) {
941 helper = NlAttrGetString(ctAttr);
942 if (helper == NULL) {
943 return NDIS_STATUS_INVALID_PARAMETER;
944 }
945 if (strcmp("ftp", helper) != 0) {
946 /* Only support FTP */
947 return NDIS_STATUS_NOT_SUPPORTED;
948 }
949 }
950 ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_FORCE_COMMIT);
951 if (ctAttr) {
952 force = TRUE;
953 /* Force implicitly means commit */
954 commit = TRUE;
955 }
956 ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_EVENTMASK);
957 if (ctAttr) {
958 eventmask = NlAttrGetU32(ctAttr);
959 /* Only mark and label updates are supported. */
960 if (eventmask & (1 << IPCT_MARK | 1 << IPCT_LABEL))
961 postUpdateEvent = TRUE;
962 }
963 /* If newNbl is not allocated, use the current Nbl*/
964 status = OvsCtExecute_(fwdCtx, key, layers,
965 commit, force, zone, mark, labels, helper, &natActionInfo,
966 postUpdateEvent);
967 return status;
968 }
969
970 /*
971 *----------------------------------------------------------------------------
972 * OvsConntrackEntryCleaner
973 * Runs periodically and cleans up the connection tracker
974 *----------------------------------------------------------------------------
975 */
976 VOID
977 OvsConntrackEntryCleaner(PVOID data)
978 {
979
980 POVS_CT_THREAD_CTX context = (POVS_CT_THREAD_CTX)data;
981 PLIST_ENTRY link, next;
982 POVS_CT_ENTRY entry;
983 LOCK_STATE_EX lockState;
984 BOOLEAN success = TRUE;
985
986 while (success) {
987 if (ovsConntrackLockObj == NULL) {
988 /* Lock has been freed by 'OvsCleanupConntrack()' */
989 break;
990 }
991 NdisAcquireRWLockWrite(ovsConntrackLockObj, &lockState, 0);
992 if (context->exit) {
993 NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
994 break;
995 }
996
997 /* Set the timeout for the thread and cleanup */
998 INT64 threadSleepTimeout = -CT_CLEANUP_INTERVAL;
999
1000 if (ctTotalEntries) {
1001 for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
1002 LIST_FORALL_SAFE(&ovsConntrackTable[i], link, next) {
1003 entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
1004 if (entry && OvsCtEntryExpired(entry)) {
1005 OvsCtEntryDelete(entry);
1006 }
1007 }
1008 }
1009 }
1010 NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
1011 KeWaitForSingleObject(&context->event, Executive, KernelMode,
1012 FALSE, (LARGE_INTEGER *)&threadSleepTimeout);
1013 }
1014
1015 PsTerminateSystemThread(STATUS_SUCCESS);
1016 }
1017
1018 /*
1019 *----------------------------------------------------------------------------
1020 * OvsCtFlush
1021 * Flushes out all Conntrack Entries that match the given zone
1022 *----------------------------------------------------------------------------
1023 */
1024 static __inline NDIS_STATUS
1025 OvsCtFlush(UINT16 zone)
1026 {
1027 PLIST_ENTRY link, next;
1028 POVS_CT_ENTRY entry;
1029
1030 LOCK_STATE_EX lockState;
1031 NdisAcquireRWLockWrite(ovsConntrackLockObj, &lockState, 0);
1032
1033 if (ctTotalEntries) {
1034 for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
1035 LIST_FORALL_SAFE(&ovsConntrackTable[i], link, next) {
1036 entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
1037 /* zone is a non-zero value */
1038 if (!zone || zone == entry->key.zone)
1039 OvsCtEntryDelete(entry);
1040 }
1041 }
1042 }
1043
1044 OvsNatFlush(zone);
1045 NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
1046 return NDIS_STATUS_SUCCESS;
1047 }
1048
1049 NTSTATUS
1050 OvsCtDeleteCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1051 UINT32 *replyLen)
1052 {
1053 POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
1054 POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
1055 PNL_MSG_HDR nlMsgHdr = &(msgIn->nlMsg);
1056 PNL_ATTR ctAttrs[__CTA_MAX];
1057 UINT32 attrOffset = NLMSG_HDRLEN + NF_GEN_MSG_HDRLEN + OVS_HDRLEN;
1058 NL_ERROR nlError = NL_ERROR_SUCCESS;
1059 NTSTATUS status;
1060 UINT16 zone = 0;
1061 NL_BUFFER nlBuf;
1062 UINT16 nlmsgType;
1063 PNL_MSG_HDR nlMsg;
1064
1065 static const NL_POLICY ctZonePolicy[] = {
1066 [CTA_ZONE] = { .type = NL_A_BE16, .optional = TRUE },
1067 };
1068
1069 if ((NlAttrParse(nlMsgHdr, attrOffset, NlNfMsgAttrsLen(nlMsgHdr),
1070 ctZonePolicy, ARRAY_SIZE(ctZonePolicy),
1071 ctAttrs, ARRAY_SIZE(ctAttrs)))
1072 != TRUE) {
1073 OVS_LOG_ERROR("Zone attr parsing failed for msg: %p", nlMsgHdr);
1074 status = STATUS_INVALID_PARAMETER;
1075 goto done;
1076 }
1077
1078 if (ctAttrs[CTA_ZONE]) {
1079 zone = ntohs(NlAttrGetU16(ctAttrs[CTA_ZONE]));
1080 }
1081
1082 status = OvsCtFlush(zone);
1083 if (status == STATUS_SUCCESS) {
1084 nlmsgType = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_DELETE);
1085 NlBufInit(&nlBuf,
1086 usrParamsCtx->outputBuffer,
1087 usrParamsCtx->outputLength);
1088 if (!NlFillOvsMsgForNfGenMsg(&nlBuf, nlmsgType, NLM_F_CREATE,
1089 msgIn->nlMsg.nlmsgSeq,
1090 msgIn->nlMsg.nlmsgPid,
1091 AF_UNSPEC,
1092 msgIn->nfGenMsg.version,
1093 0)) {
1094 status = STATUS_INVALID_PARAMETER;
1095 }
1096 nlMsg = (PNL_MSG_HDR)NlBufAt(&nlBuf, 0, 0);
1097 nlMsg->nlmsgLen = NlBufSize(&nlBuf);
1098 *replyLen = msgOut->nlMsg.nlmsgLen;
1099 }
1100
1101 done:
1102 nlError = NlMapStatusToNlErr(status);
1103 if (nlError != NL_ERROR_SUCCESS) {
1104 POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR)
1105 usrParamsCtx->outputBuffer;
1106
1107 ASSERT(msgError);
1108 NlBuildErrorMsg(msgIn, msgError, nlError, replyLen);
1109 ASSERT(*replyLen != 0);
1110 status = STATUS_SUCCESS;
1111 }
1112
1113 return status;
1114 }
1115
1116 static __inline NDIS_STATUS
1117 MapIpTupleToNl(PNL_BUFFER nlBuf, OVS_CT_KEY *key)
1118 {
1119 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
1120 UINT32 offset = 0;
1121
1122 offset = NlMsgStartNested(nlBuf, CTA_TUPLE_IP);
1123 if (!offset) {
1124 return NDIS_STATUS_FAILURE;
1125 }
1126
1127 if (key->dl_type == ntohs(ETH_TYPE_IPV4)) {
1128 if (!NlMsgPutTailU32(nlBuf, CTA_IP_V4_SRC, key->src.addr.ipv4)) {
1129 status = NDIS_STATUS_FAILURE;
1130 goto done;
1131 }
1132 if (!NlMsgPutTailU32(nlBuf, CTA_IP_V4_DST, key->dst.addr.ipv4)) {
1133 status = NDIS_STATUS_FAILURE;
1134 goto done;
1135 }
1136 } else if (key->dl_type == ntohs(ETH_TYPE_IPV6)) {
1137 if (!NlMsgPutTailUnspec(nlBuf, CTA_IP_V6_SRC,
1138 (PCHAR)(&key->src.addr.ipv6),
1139 sizeof(key->src.addr.ipv6))) {
1140 status = NDIS_STATUS_FAILURE;
1141 goto done;
1142 }
1143 if (!NlMsgPutTailUnspec(nlBuf, CTA_IP_V6_DST,
1144 (PCHAR)(&key->dst.addr.ipv6),
1145 sizeof(key->dst.addr.ipv6))) {
1146 status = NDIS_STATUS_FAILURE;
1147 goto done;
1148 }
1149 }
1150
1151 done:
1152 NlMsgEndNested(nlBuf, offset);
1153 return status;
1154 }
1155
1156 static __inline NDIS_STATUS
1157 MapProtoTupleToNl(PNL_BUFFER nlBuf, OVS_CT_KEY *key)
1158 {
1159 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
1160 UINT32 offset = 0;
1161
1162 offset = NlMsgStartNested(nlBuf, CTA_TUPLE_PROTO);
1163 if (!offset) {
1164 return NDIS_STATUS_FAILURE;
1165 }
1166
1167 if (!NlMsgPutTailU8(nlBuf, CTA_PROTO_NUM, key->nw_proto)) {
1168 status = NDIS_STATUS_FAILURE;
1169 goto done;
1170 }
1171
1172 if (key->dl_type == ntohs(ETH_TYPE_IPV4)
1173 || key->dl_type == ntohs(ETH_TYPE_IPV6)) {
1174 /* ICMP and ICMPv6 Type, Code and ID are currently not tracked */
1175 if (key->nw_proto == IPPROTO_ICMP) {
1176 if (!NlMsgPutTailU16(nlBuf, CTA_PROTO_ICMP_ID,
1177 htons(key->src.icmp_id))) {
1178 status = NDIS_STATUS_FAILURE;
1179 goto done;
1180 }
1181 if (!NlMsgPutTailU8(nlBuf, CTA_PROTO_ICMP_TYPE,
1182 key->src.icmp_type)) {
1183 status = NDIS_STATUS_FAILURE;
1184 goto done;
1185 }
1186 if (!NlMsgPutTailU8(nlBuf, CTA_PROTO_ICMP_CODE,
1187 key->src.icmp_code)) {
1188 status = NDIS_STATUS_FAILURE;
1189 goto done;
1190 }
1191 } else if (key->nw_proto == IPPROTO_ICMPV6) {
1192 if (!NlMsgPutTailU16(nlBuf, CTA_PROTO_ICMPV6_ID, 0)) {
1193 status = NDIS_STATUS_FAILURE;
1194 goto done;
1195 }
1196 if (!NlMsgPutTailU8(nlBuf, CTA_PROTO_ICMPV6_TYPE, 0)) {
1197 status = NDIS_STATUS_FAILURE;
1198 goto done;
1199 }
1200 if (!NlMsgPutTailU8(nlBuf, CTA_PROTO_ICMPV6_CODE, 0)) {
1201 status = NDIS_STATUS_FAILURE;
1202 goto done;
1203 }
1204 } else if (key->nw_proto == IPPROTO_TCP
1205 || key->nw_proto == IPPROTO_UDP) {
1206 if (!NlMsgPutTailU16(nlBuf, CTA_PROTO_SRC_PORT,
1207 key->src.port)) {
1208 status = NDIS_STATUS_FAILURE;
1209 goto done;
1210 }
1211 if (!NlMsgPutTailU16(nlBuf, CTA_PROTO_DST_PORT,
1212 key->dst.port)) {
1213 status = NDIS_STATUS_FAILURE;
1214 goto done;
1215 }
1216 }
1217 }
1218
1219 done:
1220 NlMsgEndNested(nlBuf, offset);
1221 return status;
1222 }
1223
1224 static __inline NDIS_STATUS
1225 MapCtKeyTupleToNl(PNL_BUFFER nlBuf,
1226 UINT16 tupleType,
1227 OVS_CT_KEY *key)
1228 {
1229 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
1230 UINT32 offset = 0;
1231
1232 offset = NlMsgStartNested(nlBuf, tupleType);
1233 if (!offset) {
1234 return NDIS_STATUS_FAILURE;
1235 }
1236
1237 status = MapIpTupleToNl(nlBuf, key);
1238 if (status != NDIS_STATUS_SUCCESS) {
1239 goto done;
1240 }
1241
1242 status = MapProtoTupleToNl(nlBuf, key);
1243 if (status != NDIS_STATUS_SUCCESS) {
1244 goto done;
1245 }
1246
1247 done:
1248 NlMsgEndNested(nlBuf, offset);
1249 return status;
1250 }
1251
1252 static __inline NDIS_STATUS
1253 MapCtCounterToNl(PNL_BUFFER nlBuf,
1254 UINT16 counterType,
1255 OVS_CT_KEY *key)
1256 {
1257 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
1258 UINT32 offset = 0;
1259
1260 offset = NlMsgStartNested(nlBuf, counterType);
1261 if (!offset) {
1262 return NDIS_STATUS_FAILURE;
1263 }
1264
1265 if (!NlMsgPutTailU64(nlBuf, CTA_COUNTERS_PACKETS,
1266 htonll(key->packetCount))) {
1267 status = NDIS_STATUS_FAILURE;
1268 goto done;
1269 }
1270
1271 if (!NlMsgPutTailU64(nlBuf, CTA_COUNTERS_BYTES,
1272 htonll(key->byteCount))) {
1273 status = NDIS_STATUS_FAILURE;
1274 goto done;
1275 }
1276
1277 done:
1278 NlMsgEndNested(nlBuf, offset);
1279 return status;
1280 }
1281
1282 /* Userspace expects system time to be Unix timestamp in Nano Seconds */
1283 static __inline unsigned
1284 WindowsTickToUnixSeconds(long long windowsTicks)
1285 {
1286 /*
1287 * Windows epoch starts 1601-01-01T00:00:00Z. It's 11644473600 seconds
1288 * before the UNIX/Linux epoch (1970-01-01T00:00:00Z). Windows ticks are
1289 * in 100 nanoseconds
1290 */
1291 return (unsigned)((windowsTicks / WINDOWS_TICK
1292 - SEC_TO_UNIX_EPOCH));
1293 }
1294
1295 NTSTATUS
1296 OvsCreateNlMsgFromCtEntry(POVS_CT_ENTRY entry,
1297 PVOID outBuffer,
1298 UINT32 outBufLen,
1299 UINT8 eventType,
1300 UINT32 nlmsgSeq,
1301 UINT32 nlmsgPid,
1302 UINT8 nfGenVersion,
1303 UINT32 dpIfIndex)
1304 {
1305 NL_BUFFER nlBuf;
1306 BOOLEAN ok;
1307 PNL_MSG_HDR nlMsg;
1308 UINT32 timeout;
1309 NDIS_STATUS status;
1310 UINT64 currentTime, expiration;
1311 UINT16 nlmsgType;
1312 UINT16 nlmsgFlags = NLM_F_CREATE;
1313 NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
1314 UINT8 nfgenFamily = 0;
1315 if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) {
1316 nfgenFamily = AF_INET;
1317 } else if (entry->key.dl_type == htons(ETH_TYPE_IPV6)) {
1318 nfgenFamily = AF_INET6;
1319 }
1320
1321 NlBufInit(&nlBuf, outBuffer, outBufLen);
1322 /* Mimic netfilter */
1323 if (eventType == OVS_EVENT_CT_NEW || eventType == OVS_EVENT_CT_UPDATE) {
1324 nlmsgType = (UINT16) (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW);
1325 } else if (eventType == OVS_EVENT_CT_DELETE) {
1326 nlmsgType = (UINT16) (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_DELETE);
1327 } else {
1328 return STATUS_INVALID_PARAMETER;
1329 }
1330
1331 if (eventType == OVS_EVENT_CT_UPDATE) {
1332 /* In netlink-conntrack.c IPCTNL_MSG_CT_NEW msg type is used to
1333 * differentiate between OVS_EVENT_CT_NEW and OVS_EVENT_CT_UPDATE
1334 * events based on nlmsgFlags, unset it to notify an update event.
1335 */
1336 nlmsgFlags = 0;
1337 }
1338 ok = NlFillOvsMsgForNfGenMsg(&nlBuf, nlmsgType, nlmsgFlags,
1339 nlmsgSeq, nlmsgPid, nfgenFamily,
1340 nfGenVersion, dpIfIndex);
1341 if (!ok) {
1342 return STATUS_INVALID_BUFFER_SIZE;
1343 }
1344
1345 status = MapCtKeyTupleToNl(&nlBuf, CTA_TUPLE_ORIG, &entry->key);
1346 if (status != NDIS_STATUS_SUCCESS) {
1347 return STATUS_UNSUCCESSFUL;
1348 }
1349
1350 status = MapCtKeyTupleToNl(&nlBuf, CTA_TUPLE_REPLY, &entry->rev_key);
1351 if (status != NDIS_STATUS_SUCCESS) {
1352 return STATUS_UNSUCCESSFUL;
1353 }
1354
1355 status = MapCtCounterToNl(&nlBuf, CTA_COUNTERS_ORIG, &entry->key);
1356 if (status != NDIS_STATUS_SUCCESS) {
1357 return STATUS_UNSUCCESSFUL;
1358 }
1359
1360 status = MapCtCounterToNl(&nlBuf, CTA_COUNTERS_REPLY, &entry->rev_key);
1361 if (status != NDIS_STATUS_SUCCESS) {
1362 return STATUS_UNSUCCESSFUL;
1363 }
1364
1365 if (entry->key.zone) {
1366 if (!NlMsgPutTailU16(&nlBuf, CTA_ZONE, htons(entry->key.zone))) {
1367 return STATUS_INVALID_BUFFER_SIZE;
1368 }
1369 }
1370
1371 if (entry->mark) {
1372 if (!NlMsgPutTailU32(&nlBuf, CTA_MARK, htonl(entry->mark))) {
1373 return STATUS_INVALID_BUFFER_SIZE;
1374 }
1375 }
1376
1377 if (entry->labels.ct_labels) {
1378 ok = NlMsgPutTailUnspec(&nlBuf, CTA_LABELS,
1379 (PCHAR)(&entry->labels),
1380 sizeof(entry->labels));
1381 if (!ok) {
1382 return STATUS_INVALID_BUFFER_SIZE;
1383 }
1384 }
1385
1386 if (entry->expiration > currentTime) {
1387 expiration = entry->expiration - currentTime;
1388 timeout = (UINT32) (expiration / CT_INTERVAL_SEC);
1389 if (!NlMsgPutTailU32(&nlBuf, CTA_TIMEOUT, htonl(timeout))) {
1390 return STATUS_INVALID_BUFFER_SIZE;
1391 }
1392 }
1393
1394 if (entry->key.nw_proto == IPPROTO_TCP) {
1395 /* Add ProtoInfo for TCP */
1396 UINT32 offset;
1397 offset = NlMsgStartNested(&nlBuf, CTA_PROTOINFO);
1398 if (!offset) {
1399 return NDIS_STATUS_FAILURE;
1400 }
1401
1402 status = OvsCtMapTcpProtoInfoToNl(&nlBuf, entry);
1403 NlMsgEndNested(&nlBuf, offset);
1404 if (status != NDIS_STATUS_SUCCESS) {
1405 return STATUS_UNSUCCESSFUL;
1406 }
1407 }
1408
1409 /* CTA_STATUS is required but not implemented. Default to 0 */
1410 if (!NlMsgPutTailU32(&nlBuf, CTA_STATUS, 0)) {
1411 return STATUS_INVALID_BUFFER_SIZE;
1412 }
1413
1414 /* Mimic netfilter - nf_conntrack_netlink.c:
1415 *
1416 * int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) {
1417 * NLA_PUT_BE32(skb, CTA_ID, htonl((unsigned long)ct));
1418 * return 0;
1419 * }
1420 *
1421 */
1422 if(!NlMsgPutTailU32(&nlBuf, CTA_ID, htonl((UINT32) entry))) {
1423 return STATUS_INVALID_BUFFER_SIZE;
1424 }
1425
1426 if (entry->timestampStart) {
1427 UINT32 offset;
1428 offset = NlMsgStartNested(&nlBuf, CTA_TIMESTAMP);
1429 if (!offset) {
1430 return NDIS_STATUS_FAILURE;
1431 }
1432 UINT64 start;
1433 start = WindowsTickToUnixSeconds(entry->timestampStart);
1434 start = start * SEC_TO_NANOSEC;
1435 if (!NlMsgPutTailU64(&nlBuf, CTA_TIMESTAMP_START, htonll(start))) {
1436 NlMsgEndNested(&nlBuf, offset);
1437 return STATUS_INVALID_BUFFER_SIZE;
1438 }
1439
1440 NlMsgEndNested(&nlBuf, offset);
1441 }
1442
1443 nlMsg = (PNL_MSG_HDR)NlBufAt(&nlBuf, 0, 0);
1444 nlMsg->nlmsgLen = NlBufSize(&nlBuf);
1445
1446 return STATUS_SUCCESS;
1447 }
1448
1449 /*
1450 *----------------------------------------------------------------------------
1451 * OvsCtDumpCmdHandler --
1452 * Handler for IPCTNL_MSG_CT_GET command.
1453 *
1454 * XXX - Try to consolidate dump handler patterns around dumpState usage
1455 * The following dumpHandler is similar to one vport.c uses
1456 *----------------------------------------------------------------------------
1457 */
1458 NTSTATUS
1459 OvsCtDumpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1460 UINT32 *replyLen)
1461 {
1462 NTSTATUS rc;
1463 /* Setup Dump Start if it's OVS_WRITE_DEV_OP and return */
1464 if (usrParamsCtx->devOp == OVS_WRITE_DEV_OP) {
1465 *replyLen = 0;
1466 OvsSetupDumpStart(usrParamsCtx);
1467 return STATUS_SUCCESS;
1468 }
1469
1470 POVS_OPEN_INSTANCE instance =
1471 (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
1472 POVS_MESSAGE msgIn;
1473
1474 ASSERT(usrParamsCtx->devOp == OVS_READ_DEV_OP);
1475 if (instance->dumpState.ovsMsg == NULL) {
1476 ASSERT(FALSE);
1477 return STATUS_INVALID_DEVICE_STATE;
1478 }
1479
1480 /* Output buffer has been validated while validating read dev op. */
1481 ASSERT(usrParamsCtx->outputBuffer != NULL);
1482 msgIn = instance->dumpState.ovsMsg;
1483 UINT32 inBucket = instance->dumpState.index[0];
1484 UINT32 inIndex = instance->dumpState.index[1];
1485 UINT32 i = CT_HASH_TABLE_SIZE;
1486 UINT32 outIndex = 0;
1487
1488 LOCK_STATE_EX lockState;
1489 NdisAcquireRWLockRead(ovsConntrackLockObj, &lockState, 0);
1490
1491 if (ctTotalEntries) {
1492 for (i = inBucket; i < CT_HASH_TABLE_SIZE; i++) {
1493 PLIST_ENTRY head, link;
1494 head = &ovsConntrackTable[i];
1495 POVS_CT_ENTRY entry = NULL;
1496
1497 outIndex = 0;
1498 LIST_FORALL(head, link) {
1499 /*
1500 * if one or more dumps were previously done on this same
1501 * bucket, inIndex will be > 0, so we'll need to reply with
1502 * the inIndex + 1 ct-entry from the bucket.
1503 */
1504 if (outIndex >= inIndex) {
1505 entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
1506
1507 rc = OvsCreateNlMsgFromCtEntry(entry,
1508 usrParamsCtx->outputBuffer,
1509 usrParamsCtx->outputLength,
1510 OVS_EVENT_CT_NEW,
1511 msgIn->nlMsg.nlmsgSeq,
1512 msgIn->nlMsg.nlmsgPid,
1513 msgIn->nfGenMsg.version,
1514 0);
1515
1516 if (rc != NDIS_STATUS_SUCCESS) {
1517 NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
1518 return STATUS_UNSUCCESSFUL;
1519 }
1520
1521 ++outIndex;
1522 break;
1523 }
1524
1525 ++outIndex;
1526 }
1527
1528 if (entry) {
1529 break;
1530 }
1531
1532 /*
1533 * if no ct-entry was found above, check the next bucket, beginning
1534 * with the first (i.e. index 0) elem from within that bucket
1535 */
1536 inIndex = 0;
1537 }
1538 }
1539 instance->dumpState.index[0] = i;
1540 instance->dumpState.index[1] = outIndex;
1541 NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
1542
1543 /* if i < CT_HASH_TABLE_SIZE => entry was found */
1544 if (i < CT_HASH_TABLE_SIZE) {
1545 POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
1546 *replyLen = msgOut->nlMsg.nlmsgLen;
1547 } else {
1548 /* if i >= CT_HASH_TABLE_SIZE => entry was not found => dump done */
1549 *replyLen = 0;
1550 FreeUserDumpState(instance);
1551 }
1552
1553 return STATUS_SUCCESS;
1554 }
1555
1556 #pragma warning(pop)